@oh-my-pi/pi-coding-agent 16.0.4 → 16.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (270) hide show
  1. package/CHANGELOG.md +94 -0
  2. package/dist/cli.js +2027 -1396
  3. package/dist/types/advisor/advise-tool.d.ts +31 -19
  4. package/dist/types/autoresearch/tools/init-experiment.d.ts +13 -17
  5. package/dist/types/autoresearch/tools/log-experiment.d.ts +17 -19
  6. package/dist/types/autoresearch/tools/run-experiment.d.ts +3 -4
  7. package/dist/types/autoresearch/tools/update-notes.d.ts +4 -5
  8. package/dist/types/cli/args.d.ts +1 -0
  9. package/dist/types/cli/bench-cli.d.ts +6 -0
  10. package/dist/types/cli/ttsr-cli.d.ts +39 -0
  11. package/dist/types/commands/launch.d.ts +3 -0
  12. package/dist/types/commands/ttsr.d.ts +57 -0
  13. package/dist/types/commit/agentic/tools/analyze-file.d.ts +4 -5
  14. package/dist/types/commit/agentic/tools/git-file-diff.d.ts +4 -5
  15. package/dist/types/commit/agentic/tools/git-hunk.d.ts +5 -6
  16. package/dist/types/commit/agentic/tools/git-overview.d.ts +4 -5
  17. package/dist/types/commit/agentic/tools/propose-changelog.d.ts +23 -24
  18. package/dist/types/commit/agentic/tools/propose-commit.d.ts +11 -32
  19. package/dist/types/commit/agentic/tools/recent-commits.d.ts +3 -4
  20. package/dist/types/commit/agentic/tools/schemas.d.ts +6 -27
  21. package/dist/types/commit/agentic/tools/split-commit.d.ts +28 -49
  22. package/dist/types/commit/changelog/generate.d.ts +12 -13
  23. package/dist/types/commit/shared-llm.d.ts +10 -37
  24. package/dist/types/config/config-file.d.ts +4 -4
  25. package/dist/types/config/keybindings.d.ts +5 -0
  26. package/dist/types/config/models-config-schema.d.ts +625 -990
  27. package/dist/types/config/models-config.d.ts +229 -217
  28. package/dist/types/config/settings-schema.d.ts +144 -25
  29. package/dist/types/edit/hashline/params.d.ts +7 -11
  30. package/dist/types/edit/index.d.ts +2 -1
  31. package/dist/types/edit/modes/apply-patch.d.ts +4 -5
  32. package/dist/types/edit/modes/patch.d.ts +15 -24
  33. package/dist/types/edit/modes/replace.d.ts +16 -17
  34. package/dist/types/eval/js/index.d.ts +1 -0
  35. package/dist/types/extensibility/custom-commands/types.d.ts +6 -3
  36. package/dist/types/extensibility/custom-tools/types.d.ts +8 -5
  37. package/dist/types/extensibility/extensions/runner.d.ts +5 -2
  38. package/dist/types/extensibility/extensions/types.d.ts +14 -10
  39. package/dist/types/extensibility/hooks/types.d.ts +7 -4
  40. package/dist/types/extensibility/legacy-pi-ai-shim.d.ts +13 -5
  41. package/dist/types/extensibility/legacy-pi-coding-agent-shim.d.ts +17 -0
  42. package/dist/types/extensibility/shared-events.d.ts +22 -1
  43. package/dist/types/extensibility/typebox.d.ts +80 -58
  44. package/dist/types/goals/tools/goal-tool.d.ts +11 -24
  45. package/dist/types/index.d.ts +2 -0
  46. package/dist/types/lsp/index.d.ts +11 -26
  47. package/dist/types/lsp/types.d.ts +12 -28
  48. package/dist/types/main.d.ts +1 -0
  49. package/dist/types/mcp/client.d.ts +8 -0
  50. package/dist/types/modes/components/btw-panel.d.ts +1 -0
  51. package/dist/types/modes/components/custom-editor.d.ts +3 -1
  52. package/dist/types/modes/components/status-line/component.d.ts +1 -1
  53. package/dist/types/modes/components/status-line/context-thresholds.d.ts +0 -1
  54. package/dist/types/modes/controllers/btw-controller.d.ts +2 -0
  55. package/dist/types/modes/controllers/input-controller.d.ts +1 -0
  56. package/dist/types/modes/interactive-mode.d.ts +3 -0
  57. package/dist/types/modes/rpc/rpc-types.d.ts +1 -1
  58. package/dist/types/modes/setup-wizard/index.d.ts +1 -0
  59. package/dist/types/modes/setup-wizard/startup-splash.d.ts +7 -0
  60. package/dist/types/modes/theme/theme.d.ts +1 -1
  61. package/dist/types/modes/types.d.ts +3 -0
  62. package/dist/types/modes/utils/context-usage.d.ts +12 -0
  63. package/dist/types/sdk.d.ts +8 -1
  64. package/dist/types/session/agent-session.d.ts +24 -0
  65. package/dist/types/session/session-persistence.d.ts +4 -0
  66. package/dist/types/startup-splash.d.ts +12 -0
  67. package/dist/types/task/types.d.ts +47 -48
  68. package/dist/types/tools/ask.d.ts +26 -27
  69. package/dist/types/tools/ast-edit.d.ts +17 -17
  70. package/dist/types/tools/ast-grep.d.ts +12 -13
  71. package/dist/types/tools/bash.d.ts +20 -17
  72. package/dist/types/tools/browser.d.ts +46 -71
  73. package/dist/types/tools/checkpoint.d.ts +14 -15
  74. package/dist/types/tools/debug.d.ts +82 -145
  75. package/dist/types/tools/eval.d.ts +30 -40
  76. package/dist/types/tools/find.d.ts +17 -18
  77. package/dist/types/tools/gh.d.ts +49 -78
  78. package/dist/types/tools/image-gen.d.ts +20 -36
  79. package/dist/types/tools/inspect-image.d.ts +10 -11
  80. package/dist/types/tools/irc.d.ts +22 -33
  81. package/dist/types/tools/job.d.ts +11 -12
  82. package/dist/types/tools/learn.d.ts +21 -28
  83. package/dist/types/tools/manage-skill.d.ts +13 -22
  84. package/dist/types/tools/memory-edit.d.ts +15 -24
  85. package/dist/types/tools/memory-recall.d.ts +7 -8
  86. package/dist/types/tools/memory-reflect.d.ts +9 -10
  87. package/dist/types/tools/memory-retain.d.ts +13 -14
  88. package/dist/types/tools/read.d.ts +8 -8
  89. package/dist/types/tools/resolve.d.ts +11 -18
  90. package/dist/types/tools/review.d.ts +9 -15
  91. package/dist/types/tools/search-tool-bm25.d.ts +9 -10
  92. package/dist/types/tools/search.d.ts +16 -17
  93. package/dist/types/tools/ssh.d.ts +14 -15
  94. package/dist/types/tools/todo.d.ts +27 -43
  95. package/dist/types/tools/tts.d.ts +8 -9
  96. package/dist/types/tools/write.d.ts +9 -10
  97. package/dist/types/tui/code-cell.d.ts +2 -0
  98. package/dist/types/tui/index.d.ts +1 -0
  99. package/dist/types/tui/width-aware-text.d.ts +23 -0
  100. package/dist/types/utils/image-vision-fallback.d.ts +28 -0
  101. package/dist/types/utils/markit.d.ts +10 -1
  102. package/dist/types/web/search/index.d.ts +17 -28
  103. package/dist/types/web/search/providers/base.d.ts +1 -0
  104. package/dist/types/web/search/providers/gemini.d.ts +1 -0
  105. package/dist/types/web/search/providers/perplexity.d.ts +0 -2
  106. package/dist/types/web/search/types.d.ts +32 -26
  107. package/package.json +14 -13
  108. package/scripts/omp +1 -1
  109. package/src/advisor/__tests__/advisor.test.ts +103 -1
  110. package/src/advisor/advise-tool.ts +47 -11
  111. package/src/autoresearch/tools/init-experiment.ts +13 -16
  112. package/src/autoresearch/tools/log-experiment.ts +15 -18
  113. package/src/autoresearch/tools/run-experiment.ts +3 -3
  114. package/src/autoresearch/tools/update-notes.ts +4 -4
  115. package/src/cli/args.ts +1 -0
  116. package/src/cli/bench-cli.ts +30 -7
  117. package/src/cli/flag-tables.ts +8 -0
  118. package/src/cli/ttsr-cli.ts +995 -0
  119. package/src/cli-commands.ts +1 -0
  120. package/src/cli.ts +7 -1
  121. package/src/collab/host.ts +2 -2
  122. package/src/commands/launch.ts +3 -0
  123. package/src/commands/ttsr.ts +125 -0
  124. package/src/commit/agentic/tools/analyze-file.ts +4 -4
  125. package/src/commit/agentic/tools/git-file-diff.ts +4 -4
  126. package/src/commit/agentic/tools/git-hunk.ts +7 -5
  127. package/src/commit/agentic/tools/git-overview.ts +4 -4
  128. package/src/commit/agentic/tools/propose-changelog.ts +18 -15
  129. package/src/commit/agentic/tools/propose-commit.ts +6 -6
  130. package/src/commit/agentic/tools/recent-commits.ts +3 -3
  131. package/src/commit/agentic/tools/schemas.ts +8 -20
  132. package/src/commit/agentic/tools/split-commit.ts +19 -23
  133. package/src/commit/analysis/summary.ts +7 -5
  134. package/src/commit/changelog/generate.ts +15 -11
  135. package/src/commit/shared-llm.ts +17 -24
  136. package/src/config/config-file.ts +13 -15
  137. package/src/config/keybindings.ts +6 -0
  138. package/src/config/models-config-schema.ts +206 -179
  139. package/src/config/settings-schema.ts +118 -2
  140. package/src/discovery/builtin-rules/index.ts +2 -0
  141. package/src/discovery/builtin-rules/ts-import-type.md +2 -2
  142. package/src/discovery/builtin-rules/ts-no-any.md +11 -2
  143. package/src/discovery/builtin-rules/ts-no-inline-cast-access.md +55 -0
  144. package/src/edit/hashline/params.ts +12 -11
  145. package/src/edit/index.ts +5 -4
  146. package/src/edit/modes/apply-patch.ts +4 -4
  147. package/src/edit/modes/patch.ts +15 -18
  148. package/src/edit/modes/replace.ts +13 -17
  149. package/src/edit/renderer.ts +0 -1
  150. package/src/eval/agent-bridge.ts +11 -13
  151. package/src/eval/completion-bridge.ts +25 -17
  152. package/src/eval/js/context-manager.ts +17 -2
  153. package/src/eval/js/index.ts +1 -1
  154. package/src/eval/py/executor.ts +2 -2
  155. package/src/eval/py/runner.py +44 -0
  156. package/src/extensibility/custom-commands/loader.ts +5 -3
  157. package/src/extensibility/custom-commands/types.ts +6 -3
  158. package/src/extensibility/custom-tools/loader.ts +4 -2
  159. package/src/extensibility/custom-tools/types.ts +8 -5
  160. package/src/extensibility/extensions/loader.ts +4 -2
  161. package/src/extensibility/extensions/runner.ts +20 -2
  162. package/src/extensibility/extensions/types.ts +22 -8
  163. package/src/extensibility/hooks/loader.ts +5 -2
  164. package/src/extensibility/hooks/types.ts +7 -4
  165. package/src/extensibility/legacy-pi-ai-shim.ts +42 -5
  166. package/src/extensibility/legacy-pi-coding-agent-shim.ts +113 -0
  167. package/src/extensibility/plugins/legacy-pi-compat.ts +13 -13
  168. package/src/extensibility/shared-events.ts +24 -0
  169. package/src/extensibility/tool-proxy.ts +4 -1
  170. package/src/extensibility/typebox.ts +778 -251
  171. package/src/goals/guided-setup.ts +12 -3
  172. package/src/goals/tools/goal-tool.ts +6 -6
  173. package/src/index.ts +2 -0
  174. package/src/internal-urls/docs-index.generated.ts +15 -13
  175. package/src/lsp/types.ts +13 -27
  176. package/src/main.ts +29 -21
  177. package/src/mcp/client.ts +38 -13
  178. package/src/mcp/render.ts +102 -89
  179. package/src/modes/components/agent-hub.ts +11 -4
  180. package/src/modes/components/branch-summary-message.ts +1 -0
  181. package/src/modes/components/btw-panel.ts +5 -1
  182. package/src/modes/components/collab-prompt-message.ts +9 -7
  183. package/src/modes/components/compaction-summary-message.ts +1 -0
  184. package/src/modes/components/custom-editor.ts +18 -0
  185. package/src/modes/components/custom-message.ts +1 -0
  186. package/src/modes/components/footer.ts +6 -5
  187. package/src/modes/components/hook-message.ts +1 -0
  188. package/src/modes/components/read-tool-group.ts +9 -3
  189. package/src/modes/components/skill-message.ts +1 -0
  190. package/src/modes/components/status-line/component.ts +139 -15
  191. package/src/modes/components/status-line/context-thresholds.ts +0 -1
  192. package/src/modes/components/todo-reminder.ts +1 -0
  193. package/src/modes/components/tool-execution.ts +17 -10
  194. package/src/modes/components/ttsr-notification.ts +1 -0
  195. package/src/modes/components/user-message.ts +6 -6
  196. package/src/modes/controllers/btw-controller.ts +69 -1
  197. package/src/modes/controllers/event-controller.ts +2 -7
  198. package/src/modes/controllers/input-controller.ts +29 -0
  199. package/src/modes/controllers/selector-controller.ts +10 -3
  200. package/src/modes/interactive-mode.ts +42 -10
  201. package/src/modes/rpc/rpc-types.ts +1 -1
  202. package/src/modes/setup-wizard/index.ts +1 -0
  203. package/src/modes/setup-wizard/scenes/sign-in.ts +77 -5
  204. package/src/modes/setup-wizard/startup-splash.ts +107 -0
  205. package/src/modes/theme/theme.ts +133 -143
  206. package/src/modes/types.ts +3 -0
  207. package/src/modes/utils/context-usage.ts +37 -20
  208. package/src/modes/utils/hotkeys-markdown.ts +1 -0
  209. package/src/prompts/system/system-prompt.md +1 -0
  210. package/src/prompts/tools/image-attachment-describe-system.md +8 -0
  211. package/src/prompts/tools/image-attachment-describe.md +10 -0
  212. package/src/sdk.ts +35 -22
  213. package/src/session/agent-session.ts +715 -255
  214. package/src/session/session-history-format.ts +11 -2
  215. package/src/session/session-loader.ts +19 -32
  216. package/src/session/session-persistence.ts +27 -11
  217. package/src/session/snapcompact-inline.ts +1 -1
  218. package/src/slash-commands/builtin-registry.ts +4 -11
  219. package/src/ssh/connection-manager.ts +3 -2
  220. package/src/startup-splash.ts +19 -0
  221. package/src/task/executor.ts +12 -7
  222. package/src/task/types.ts +44 -41
  223. package/src/tool-discovery/tool-index.ts +17 -4
  224. package/src/tools/ask.ts +14 -14
  225. package/src/tools/ast-edit.ts +17 -14
  226. package/src/tools/ast-grep.ts +10 -9
  227. package/src/tools/bash.ts +15 -10
  228. package/src/tools/browser/launch.ts +13 -0
  229. package/src/tools/browser.ts +26 -32
  230. package/src/tools/checkpoint.ts +7 -7
  231. package/src/tools/debug.ts +72 -69
  232. package/src/tools/eval.ts +18 -19
  233. package/src/tools/find.ts +20 -13
  234. package/src/tools/gh.ts +29 -49
  235. package/src/tools/image-gen.ts +94 -57
  236. package/src/tools/inspect-image.ts +8 -9
  237. package/src/tools/irc.ts +12 -12
  238. package/src/tools/job.ts +6 -6
  239. package/src/tools/learn.ts +11 -14
  240. package/src/tools/manage-skill.ts +19 -23
  241. package/src/tools/memory-edit.ts +8 -8
  242. package/src/tools/memory-recall.ts +4 -4
  243. package/src/tools/memory-reflect.ts +5 -5
  244. package/src/tools/memory-retain.ts +9 -11
  245. package/src/tools/puppeteer/02_stealth_hairline.txt +1 -1
  246. package/src/tools/puppeteer/04_stealth_iframe.txt +4 -4
  247. package/src/tools/puppeteer/05_stealth_webgl.txt +1 -1
  248. package/src/tools/puppeteer/10_stealth_plugins.txt +6 -4
  249. package/src/tools/puppeteer/12_stealth_codecs.txt +2 -2
  250. package/src/tools/puppeteer/13_stealth_worker.txt +1 -1
  251. package/src/tools/read.ts +197 -19
  252. package/src/tools/report-tool-issue.ts +6 -6
  253. package/src/tools/resolve.ts +6 -6
  254. package/src/tools/review.ts +10 -12
  255. package/src/tools/search-tool-bm25.ts +5 -5
  256. package/src/tools/search.ts +20 -29
  257. package/src/tools/ssh.ts +8 -8
  258. package/src/tools/todo.ts +16 -19
  259. package/src/tools/tts.ts +16 -15
  260. package/src/tools/write.ts +5 -5
  261. package/src/tui/code-cell.ts +44 -3
  262. package/src/tui/index.ts +1 -0
  263. package/src/tui/width-aware-text.ts +58 -0
  264. package/src/utils/image-vision-fallback.ts +197 -0
  265. package/src/utils/markit.ts +17 -2
  266. package/src/web/search/index.ts +21 -9
  267. package/src/web/search/providers/base.ts +1 -0
  268. package/src/web/search/providers/gemini.ts +56 -18
  269. package/src/web/search/providers/perplexity.ts +373 -126
  270. package/src/web/search/types.ts +28 -48
@@ -1,13 +1,14 @@
1
1
  // Auto-generated by scripts/generate-docs-index.ts - DO NOT EDIT
2
2
 
3
- export const EMBEDDED_DOC_FILENAMES: readonly string[] = ["ERRATA-GPT5-HARMONY.md","adding-a-provider.md","advisor-watchdog.md","ai-schema-normalize.md","approval-mode.md","auth-broker-gateway.md","bash-tool-runtime.md","blob-artifact-architecture.md","collab.md","compaction.md","config-usage.md","context-files.md","custom-tools.md","environment-variables.md","extension-loading.md","extensions.md","fs-scan-cache-architecture.md","gemini-manifest-extensions.md","handoff-generation-pipeline.md","hooks.md","install-id.md","keybindings.md","local-models.md","lsp-config.md","macos-signing-notarization.md","marketplace.md","mcp-config.md","mcp-protocol-transports.md","mcp-runtime-lifecycle.md","mcp-server-tool-authoring.md","memory.md","mnemosyne-memory-backend.md","models.md","natives-addon-loader-runtime.md","natives-architecture.md","natives-binding-contract.md","natives-build-release-debugging.md","natives-media-system-utils.md","natives-rust-task-cancellation.md","natives-shell-pty-process.md","natives-text-search-pipeline.md","non-compaction-retry-policy.md","notebook-tool-runtime.md","plugin-manager-installer-plumbing.md","porting-from-pi-mono.md","porting-to-natives.md","provider-streaming-internals.md","providers.md","python-repl.md","resolve-tool-runtime.md","rpc.md","rulebook-matching-pipeline.md","sdk.md","secrets.md","session-operations-export-share-fork-resume.md","session-switching-and-recent-listing.md","session-tree-plan.md","session.md","settings.md","skills.md","skills/authoring-extensions.md","skills/authoring-hooks.md","skills/authoring-marketplaces.md","skills/examples/hello-extension/README.md","skills/examples/mini-marketplace/README.md","skills/examples/safety-hook/README.md","slash-command-internals.md","system-prompt-customization.md","task-agent-discovery.md","theme.md","toolconv/anthropic.md","toolconv/deepseek.md","toolconv/gemini.md","toolconv/gemma.md","toolconv/glm-4.5.md","toolconv/harmony.md","toolconv/kimi-k2.md","toolconv/pi-native.md","toolconv/qwen3.md","tools/ask.md","tools/ast-edit.md","tools/ast-grep.md","tools/bash.md","tools/browser.md","tools/checkpoint.md","tools/debug.md","tools/edit.md","tools/eval.md","tools/find.md","tools/github.md","tools/inspect_image.md","tools/irc.md","tools/job.md","tools/lsp.md","tools/read.md","tools/recall.md","tools/reflect.md","tools/resolve.md","tools/retain.md","tools/rewind.md","tools/search.md","tools/search_tool_bm25.md","tools/ssh.md","tools/task.md","tools/todo.md","tools/web_search.md","tools/write.md","tree.md","ttsr-injection-lifecycle.md","tui-core-renderer.md","tui-runtime-internals.md","tui.md"];
3
+ export const EMBEDDED_DOC_FILENAMES: readonly string[] = ["ERRATA-GPT5-HARMONY.md","adding-a-provider.md","advisor-watchdog.md","ai-schema-normalize.md","approval-mode.md","arktype-guide.md","auth-broker-gateway.md","bash-tool-runtime.md","blob-artifact-architecture.md","collab.md","compaction.md","config-usage.md","context-files.md","custom-tools.md","environment-variables.md","extension-loading.md","extensions.md","fs-scan-cache-architecture.md","gemini-manifest-extensions.md","handoff-generation-pipeline.md","hooks.md","install-id.md","keybindings.md","local-models.md","lsp-config.md","macos-signing-notarization.md","marketplace.md","mcp-config.md","mcp-protocol-transports.md","mcp-runtime-lifecycle.md","mcp-server-tool-authoring.md","memory.md","mnemosyne-memory-backend.md","models.md","natives-addon-loader-runtime.md","natives-architecture.md","natives-binding-contract.md","natives-build-release-debugging.md","natives-media-system-utils.md","natives-rust-task-cancellation.md","natives-shell-pty-process.md","natives-text-search-pipeline.md","non-compaction-retry-policy.md","notebook-tool-runtime.md","plugin-manager-installer-plumbing.md","porting-from-pi-mono.md","porting-to-natives.md","provider-endpoint-constraints.md","provider-streaming-internals.md","providers.md","python-repl.md","resolve-tool-runtime.md","rpc.md","rulebook-matching-pipeline.md","sdk.md","secrets.md","session-operations-export-share-fork-resume.md","session-switching-and-recent-listing.md","session-tree-plan.md","session.md","settings.md","skills.md","skills/authoring-extensions.md","skills/authoring-hooks.md","skills/authoring-marketplaces.md","skills/examples/hello-extension/README.md","skills/examples/mini-marketplace/README.md","skills/examples/safety-hook/README.md","slash-command-internals.md","system-prompt-customization.md","task-agent-discovery.md","theme.md","toolconv/anthropic.md","toolconv/deepseek.md","toolconv/gemini.md","toolconv/gemma.md","toolconv/glm-4.5.md","toolconv/harmony.md","toolconv/kimi-k2.md","toolconv/pi-native.md","toolconv/qwen3.md","tools/ask.md","tools/ast-edit.md","tools/ast-grep.md","tools/bash.md","tools/browser.md","tools/checkpoint.md","tools/debug.md","tools/edit.md","tools/eval.md","tools/find.md","tools/github.md","tools/inspect_image.md","tools/irc.md","tools/job.md","tools/lsp.md","tools/read.md","tools/recall.md","tools/reflect.md","tools/resolve.md","tools/retain.md","tools/rewind.md","tools/search.md","tools/search_tool_bm25.md","tools/ssh.md","tools/task.md","tools/todo.md","tools/web_search.md","tools/write.md","tree.md","ttsr-injection-lifecycle.md","tui-core-renderer.md","tui-runtime-internals.md","tui.md"];
4
4
 
5
5
  export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
6
6
  "ERRATA-GPT5-HARMONY.md": "# ERRATA — GPT-5 Harmony-Header Leakage\n\nHistorical research note, not a current runtime contract. The statistics below\ncome from the named local stats database snapshot, not from checked-in tests or\nruntime code.\n\n## 1. The problem\n\nOpenAI frames tool calls in the Harmony chat protocol:\n\n```\n<|start|>assistant<|channel|>commentary to=functions.<NAME><|message|>{ARGS}<|call|>\n```\n\n`<|channel|>commentary to=functions.NAME` is the **routing header** —\ncontrol tokens consumed by the runtime to dispatch the call. These\ntokens never appear as content under normal operation; the runtime\nstrips them.\n\nThe defect: gpt-5 models occasionally emit, **as ordinary content\ninside `{ARGS}`**, the **plain-text shadow** of these routing tokens —\nthe same characters without the `<|…|>` brackets — and continue\nproducing more pseudo-routing structure (channel name, body marker,\nmultilingual spam, fake tool-result framing). The contamination lives\ninside the visible tool argument and is dispatched to the tool as if it\nwere intended content.\n\n**Critical detail.** The actual `<|start|>` / `<|channel|>` /\n`<|message|>` / `<|call|>` special tokens almost never appear in tool\nargs. What leaks is the bracket-less spelling — `analysis to=functions.X\ncode …` — because OpenAI applies a logit mask suppressing the\ncontrol-token IDs inside the args region. The mass that would have gone\nto those special tokens redistributes onto the un-bracketed plain-text\nrepresentation the model also learned. This makes the leak structurally\ninvisible to the routing parser and lands it in the tool input verbatim.\n\nManifestation in tool args (real corpus example):\n\n```\n~ add_function(iso, ctx, ns, \"installSystemChangeObserver\",\n os_install_system_change_observer);】【\"】【analysis to=functions.edit\n code above เงินไทยฟรีuser to=functions.edit code …\n```\n\nThe leading code is real and intended. Everything after the first\nnon-Latin token through the next clean structural boundary is corruption.\n\n---\n\n## 2. Observed statistics & failure modes\n\nSource: `~/.omp/stats.db` (`ss_tool_calls`, `ss_assistant_msgs`), through\n2026-05-10. 1.05M tool calls scanned.\n\n### 2.1 Rate\n\n| Model | Leaks in tool args | Calls | per million |\n| ------------- | -----------------: | ------: | ----------: |\n| gpt-5.4 | 37 | 226,957 | 163 |\n| gpt-5.3-codex | 17 | 112,243 | 151 |\n| gpt-5.5 | 2 | 80,750 | 25 |\n| gpt-5.2-codex | 0 | — | — |\n\nPlus 15 hits in assistant visible text / thinking blobs.\n\n### 2.2 Tool distribution\n\n| Tool | Hits |\n| ------------------------------ | -----: |\n| `edit` | 38 |\n| `eval` | 11 |\n| `report_tool_issue` | 3 |\n| `grep`/`read`/`search`/`yield` | 1 each |\n\nConcentrated in tools with free-form (non-JSON-schema) argument formats.\n\n### 2.3 Leak shape (deterministic)\n\n```\nLEAK ::= JUNK_PREFIX MARKER CHANNEL_BODY (LEAK)?\nMARKER ::= \"to=functions.\" TOOL_NAME\nCHANNEL_BODY ::= \" code \" (SPAM | reasoning_prose | fake_tool_output)*\nJUNK_PREFIX ::= (GLITCH_TOKEN | CHANNEL_WORD | NON_LATIN_RUN | \"}\" | \"】【\")+\n```\n\n**Cascading is common.** Of 96 marker occurrences across 71 contaminated\nrecords, 39 contain ≥2 markers and 7 contain ≥3 — the model emits\nmultiple fake `to=functions.X code …` blocks back-to-back, often with\nfake `code_output\\nCell N:\\n…` framing between them. Once the\nplain-text scaffolding is in the residual stream, the prefix now _looks\nlike_ a fresh tool envelope start, so the macro prior over continuations\nkeeps voting for more scaffolding. Self-amplifying.\n\n### 2.4 Glitch tokens\n\nSingle-token identifiers in `o200k_base` whose embeddings appear to be\nnear-init from underrepresentation in post-training. ASCII residue\nimmediately before the marker in the natural corpus:\n\n| Surface string | Single-token | Token ID | Hits in corpus |\n| ----------------- | :----------: | -------: | ------------------------------: |\n| `Japgolly` | ✅ | 199,745 | 1 |\n| `Jsii` | ✅ | 114,318 | (subtoken of `Jsii_commentary`) |\n| `Jsii_commentary` | — (3 toks) | — | 2 |\n| `changedFiles` | — (2 toks) | — | 8 |\n| `RTLU` | — (2 toks) | — | 3 |\n\n`Japgolly` is in the last 0.13% of the vocabulary — the same family of\nGitHub-corpus residue that produced `SolidGoldMagikarp` in the 2023\nGPT-2 vocabulary (Rumbelow & Watkins). `SolidGoldMagikarp` itself\ntokenizes to 5 tokens in `o200k_base` — that specific token was retired,\nbut the class wasn't.\n\nFor the multi-token entries, the corpus-level signature is the surface\nstring; the underlying glitch trigger is a sub-token (e.g. `Jsii` inside\n`Jsii_commentary`). The detector list (`G` signal) keys on the surface\nstrings.\n\nStable across unrelated sessions. Treated as a high-precision detector\nsignal.\n\n### 2.5 Channel-word leakage\n\n`analysis` (5), `assistant` (5), `commentary` (3), `user` (1) appear\ndirectly preceding `to=`. Always bare words; never `<|channel|>analysis`\nor any other bracketed form. Consistent with §1 — the brackets are\nmasked, the words are not.\n\n### 2.6 Non-Latin spam residue\n\n96 marker hits, by script: CJK 40, Cyrillic 12, Telugu/Kannada/Malayalam\n18, Thai 8, Georgian 7, Armenian 7, Arabic 1. Recurring fragments are\nChinese gambling SEO (`大发时时彩`, `天天中彩票`), Georgian/Abkhaz junk,\nand Thai casino spam — well-known low-quality crawl residue.\n\nThis is the same script distribution observed in the controlled\nreproduction (§7.3), independent of the prompt's natural language.\n\n### 2.7 Failure-mode breakdown for the `edit` tool\n\nThe `edit` tool exists in two variants in the corpus:\n\n| Variant | Calls | Recovery |\n| -------------------------------------------------- | ----: | ---------------------------------------------------------------------------------------------------------------------------------------------------- |\n| Patch-DSL (`[PATH#TAG]`/anchor/`SWAP DEL INS` ops) | 27 | **Recoverable** by op-truncation (§3.3) |\n| JSON-schema (`{path,edits:[…]}`) | 11 | **Not recoverable** — contamination is escaped _inside_ JSON strings, parser accepts it cleanly, content would be written verbatim into source files |\n\nFor Patch-DSL leaks specifically:\n\n- 20/27 cases: contamination on the last input line; nothing follows.\n- 7/27 cases: contamination mid-input; what follows is one of: a\n duplicate replay of an earlier file/anchor, intended content for a\n _different_ tool call (the model started its next call inline), or\n pure hallucination. Post-contamination content is never trustworthy.\n\n### 2.8 Mechanism (confirmed)\n\n**Prior collapse from null-embedding glitch tokens, into a\ncontrol-token-masked basin whose mass redistributes onto the\nplain-text shadow of the Harmony protocol.**\n\nStep by step:\n\n1. The model is mid-`{ARGS}` of a Harmony tool call. The runtime applies\n a logit mask suppressing structural control tokens (`<|channel|>`,\n `<|message|>`, `<|call|>`, `<|start|>`, `<|end|>`) inside the args\n region. Without this mask, normal generation would constantly\n hallucinate envelope-closes; with it, those token IDs have logit\n `-∞` in args.\n2. A glitch token `g` is sampled. By construction `g` was in the BPE\n merge corpus but barely in LM/RL training, so its **input embedding\n `e_g` ≈ near-init noise of small norm**.\n3. At position t+1, the residual update `h_{t+1} ≈ LN(h_t + e_g + Attn +\nMLP)` is dominated by the prefix-derived terms; the just-emitted-token\n signal is effectively absent. Generation diversity normally comes\n from `e_x` steering the residual into different sub-regions —\n stripped here.\n4. The next-token distribution therefore collapses onto the **conditional\n prior over continuations of the prefix, with local conditioning\n removed**. In a tool-calling rollout context, that prior is sharply\n peaked on Harmony scaffolding (control tokens + routing tokens) —\n that's what RL trained.\n5. The mask zeros the control-token IDs. Mass redistributes onto the\n **next-best continuation**: the un-bracketed surface-form spelling of\n the same protocol (`analysis`, `commentary`, ` to=functions.X`,\n `code`). This spelling is unmasked because those characters are\n ordinary tokens.\n6. Once a few tokens of plain-text scaffolding land in the residual\n stream, the prefix now resembles a fresh envelope start. The macro\n prior keeps voting for more scaffolding. Cascading (§2.3) follows.\n7. Multilingual spam after the marker is the same prior-collapse\n continuation, drawn from the training neighborhood of the glitch\n token (often ESL/auto-generated multilingual web junk — exactly the\n crawl residue in §2.6).\n\n**Two corollaries the corpus data demanded but only the experiment\nexplained:**\n\n- **The brackets never appear** (§1, §2.5). The mask is what makes the\n leak land in plain text instead of as a real envelope-close.\n- **Counterintuitive grammar dependency** (§7.4). The leak is _worse_ in\n formats closest to OpenAI's training distribution. Off-distribution\n custom grammars dampen the macro-prior basin; the official\n `*** Begin Patch` format is the strongest collapse target.\n\nThe 2023 SolidGoldMagikarp paper documented mechanism (1)+(2)+(4). The\nnew piece is (5): when constrained decoding masks the natural collapse\ntarget, the mass laundered through the un-masked plain-text shadow\nbecomes a structurally-invisible exfiltration channel.\n",
7
7
  "adding-a-provider.md": "# Adding a provider\n\nA provider is described in two halves:\n\n- **Catalog half** (`packages/catalog`): one entry in the `CATALOG_PROVIDERS`\n table (`packages/catalog/src/provider-models/descriptors.ts`) carrying the\n `id`, `defaultModel`, runtime model-discovery factory, and catalog-generation\n wiring. `KnownProvider`, `PROVIDER_DESCRIPTORS`, and\n `DEFAULT_MODEL_PER_PROVIDER` are derived from this table.\n- **Auth half** (`packages/ai`): one declarative `ProviderDefinition` in the\n registry carrying env-key fallbacks and login/refresh flows. The\n `OAuthProvider` union, the env-key map, the `/login` provider list, the\n `refreshOAuthToken` / `AuthStorage.login` dispatch, and the coding-agent\n callback maps are derived from the registry.\n\n**Scope.** This is for a provider that reuses an existing wire API\n(`openai-completions`, `anthropic-messages`, `google-generative-ai`, …) — the\ncommon case for gateways and API-key providers, since stream dispatch keys on\n`model.api`, not `model.provider`. Adding a *new wire protocol* (a new\n`KnownApi`) is a separate task that also touches `stream.ts` dispatch,\n`api-registry.ts`, and the catalog `types.ts`.\n\n## Shape\n\nFor the common case, a provider is **one catalog entry + one def file + one registry line**:\n\n1. **Add an entry to `CATALOG_PROVIDERS`** in\n `packages/catalog/src/provider-models/descriptors.ts` with the `id`,\n `defaultModel`, the plain API-key env var(s) as `envVars`, and (usually) a\n `createModelManagerOptions` factory. For a\n simple OpenAI-compatible gateway, build the factory in\n `packages/catalog/src/provider-models/openai-compat.ts` or inline with the\n exported `createSimpleOpenAICompletionsOptions(providerId, baseUrl, config)`.\n2. **Create `packages/ai/src/registry/<id>.ts`** exporting one\n `export const <camelId>Provider = { … } as const satisfies ProviderDefinition;`\n with the auth fields (`login`, …). Plain env-var names live in the catalog\n entry's `envVars`; set `envKeys` only for computed resolvers (Foundry/ADC/\n Bedrock-style probes).\n3. **Add it to the `ALL` array** in `packages/ai/src/registry/registry.ts`\n (one import + one array entry). `ALL` order is the `/login` list order for\n loginable providers.\n\nThat is the full change for:\n- env-key-only providers,\n- providers with a simple inline API-key login flow,\n- most OpenAI-compatible gateways.\n\nFor a **non-trivial provider-local OAuth flow**, put the implementation in\n`packages/ai/src/registry/oauth/<vendor>.ts` and lazy-import it from the def\nfile. The shared OAuth flow infrastructure it builds on lives in the same\n`registry/oauth/` directory.\n\nDescriptors, the default-model map, env-key map, login list, and refresh\ndispatch all update automatically; the `KnownProvider` union gains the new id\nfrom the catalog table and `OAuthProvider` from the registry.\n\n## Field reference\n\n**Catalog table entry** (`ProviderCatalogEntry`, see\n`packages/catalog/src/provider-models/descriptor-types.ts` for JSDoc):\n\n| Field | Effect |\n|---|---|\n| `id` | Required. Member of `KnownProvider`. |\n| `defaultModel` | Required. Preferred model when no explicit selection is made. |\n| `envVars` | Env var name(s), in order, for the runtime API-key fallback (`getEnvApiKey`). |\n| `createModelManagerOptions` | Runtime model-discovery factory. Present (and not `specialModelManager`) ⇒ appears in `PROVIDER_DESCRIPTORS`. |\n| `allowUnauthenticated` | Runtime creates a model manager even without a key. |\n| `dynamicModelsAuthoritative` | Successful discovery replaces bundled models. |\n| `catalogDiscovery` | `{ label, envVars?, oauthProvider?, allowUnauthenticated? }` for offline catalog generation (`generate-models.ts`). `envVars` here overrides the entry-level list when generation uses different credentials (e.g. `cursor`). |\n| `specialModelManager` | Bespoke runtime factory (`google-antigravity` / `google-gemini-cli` / `openai-codex`); excluded from `PROVIDER_DESCRIPTORS`. |\n\n**Registry definition** (`ProviderDefinition`, see\n`packages/ai/src/registry/types.ts`):\n\n| Field | Effect |\n|---|---|\n| `id`, `name` | Required. `name` shows in the `/login` list. |\n| `envKeys` | Computed env fallback for `getEnvApiKey`, overriding the catalog entry's `envVars`: a var name string or a `() => string \\| undefined` resolver. Omit when `envVars` covers it. |\n| `login` | Interactive login. Present ⇒ member of `OAuthProvider`, shown in `/login`, dispatchable via `AuthStorage.login`. Returns an api-key `string` or `OAuthCredentials`. |\n| `refreshToken` | OAuth refresher; omit for static-token providers (the dispatch returns credentials unchanged). |\n| `storeCredentialsAs` | Store credentials under a different provider id (e.g. `openai-codex-device` ⇒ `openai-codex`). |\n| `callbackPort` | Present ⇒ entry in the auth-broker `CALLBACK_PORTS` map. |\n| `pasteCodeFlow` | OAuth flow needs a pasted code/redirect URL ⇒ member of `PASTE_CODE_LOGIN_PROVIDERS`. |\n\n## Conventions\n\n- Use `... as const satisfies ProviderDefinition` so the literal `id` is preserved\n for the union derivation.\n- `login` / `refreshToken` for simple API-key or validation-based flows can live\n directly in the provider def file (export the named login function there so\n tests can import it directly).\n- `login` / `refreshToken` for heavy provider-local OAuth flows MUST reach the\n adjacent `registry/oauth/*` module via a dynamic-import\n thunk (`const { loginX } = await import(\"./oauth/x\"); return loginX(cb);`),\n keeping those flows out of the eager startup graph.\n- All OAuth code lives under `registry/oauth/`: the shared flow infra\n (`callback-server`, `pkce`, `google-oauth-shared`, `types`, the runtime API\n `index`) plus every provider flow, including the `github-copilot` / `kimi` /\n `openai-codex` helpers reused by the streaming and usage layers. The non-OAuth\n API-key helpers (`api-key-login`, `api-key-validation`) sit beside the def\n files in `registry/`, since they back simple paste-an-API-key logins.\n- For a simple OpenAI-compatible gateway, build the manager inline with the\n exported `createSimpleOpenAICompletionsOptions(providerId, baseUrl, config)` —\n no edits to `openai-compat.ts` required.\n- A `ProviderDefinition` may also be registered at runtime by an extension via\n `registerOAuthProvider` (the `AuthStorage.login` dispatcher handles built-ins\n and extensions through the same path).\n",
8
- "advisor-watchdog.md": "# Advisor and WATCHDOG.md\n\nThe advisor is an optional second model attached to a session. It reviews the primary agent's transcript after each turn, can inspect the workspace with read-only tools, and injects concise advice back into the primary session.\n\nThe advisor is not a second executor. It cannot edit files, run commands, approve actions, or change session state directly.\n\n## Implementation files\n\n- [`src/advisor/runtime.ts`](../packages/coding-agent/src/advisor/runtime.ts)\n- [`src/advisor/advise-tool.ts`](../packages/coding-agent/src/advisor/advise-tool.ts)\n- [`src/advisor/watchdog.ts`](../packages/coding-agent/src/advisor/watchdog.ts)\n- [`src/prompts/advisor/system.md`](../packages/coding-agent/src/prompts/advisor/system.md)\n- [`src/prompts/advisor/advise-tool.md`](../packages/coding-agent/src/prompts/advisor/advise-tool.md)\n- [`src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts)\n- [`src/slash-commands/builtin-registry.ts`](../packages/coding-agent/src/slash-commands/builtin-registry.ts)\n- [`src/config/settings-schema.ts`](../packages/coding-agent/src/config/settings-schema.ts)\n\n---\n\n## Enabling the advisor\n\nThe advisor requires both:\n\n1. `advisor.enabled: true`\n2. a model assigned to the `advisor` model role\n\nExample:\n\n```yaml\nmodelRoles:\n advisor: anthropic/claude-sonnet-4-5:medium\n\nadvisor:\n enabled: true\n```\n\nThe advisor role uses normal model-role resolution, including provider-prefixed ids, canonical ids, and optional thinking suffixes.\n\nSlash commands:\n\n| Command | Effect |\n|---|---|\n| `/advisor` | Toggle the persisted `advisor.enabled` setting. |\n| `/advisor on` | Enable the setting and start the runtime when an advisor model is assigned. |\n| `/advisor off` | Disable the setting and stop the runtime. |\n| `/advisor status` | Show active model, context usage, token usage, and cost. |\n| `/advisor dump` | Copy the advisor's compact transcript to the clipboard. |\n| `/advisor dump raw` | Copy the advisor's full dump (system prompt, tools, thinking, and calls) to the clipboard. |\n\nIf `advisor.enabled` is true but no `modelRoles.advisor` value resolves to an available model, status reports that the setting is enabled but no advisor model is assigned.\n\n## What the advisor sees\n\nAt each primary turn end, `AdvisorRuntime` receives only the new transcript delta since the last advisor update. Deltas are rendered with `formatSessionHistoryMarkdown(..., { includeThinking: true, includeToolIntent: true, watchedRoles: true })`, so the advisor can review assistant reasoning as well as user-visible text, tool calls, and tool results.\n\nAdvisor messages already injected into the primary transcript are filtered out before the next delta is rendered. This prevents the advisor from recursively reviewing its own advice.\n\nWhen the primary transcript is rewritten, the advisor runtime is reset:\n\n- compaction\n- session switch/resume\n- branch/fork style history replacement\n- context-maintenance re-prime when the advisor's own context cannot fit\n\nReset clears the advisor's private in-memory transcript and rewinds its cursor. The next advisor update replays the current bounded primary transcript instead of continuing from stale pre-rewrite context.\n\nWhen the advisor is enabled mid-session, the cursor seeds to the current primary transcript length. That avoids replaying the whole old conversation on the first enabled turn.\n\n## Tools and isolation\n\nThe advisor receives a hard-isolated read-only tool set:\n\n- `read`\n- `search`\n- `find`\n- `advise`\n\nThe read/search/find tools are built against a distinct `ToolSession` whose session id is suffixed with `-advisor`. The advisor therefore does not share the primary agent's file snapshots, seen-lines tracking, conflict state, summary cache, or edit/yield capabilities.\n\nThe `advise` tool accepts one note and an optional severity:\n\n| Severity | Delivery | Intended use |\n|---|---|---|\n| omitted / `nit` | Non-interrupting aside, batched into the primary transcript at the next step boundary. | Cleanup, simplification, low-risk edge cases. |\n| `concern` | Interrupting steering message. | Material risk, likely wrong direction, missing constraint, hallucinated API. |\n| `blocker` | Interrupting steering message. | Continuing would clearly waste work or produce broken output. |\n\nInterrupting advice is sent through the steering channel and can abort in-flight tools at the next steering boundary. Each note (interrupting or batched) is rendered into the primary transcript as an `<advisory>` element — severity rides a `severity` attribute, and a `guidance` attribute carries the \"weigh, don't blindly obey\" framing (the primary agent's system prompt never mentions advisories, so the tag is its only cue). Note bodies are XML-escaped so advice containing `<`, `>`, or `&` can't break the wrapper:\n\n```text\n<advisory severity=\"concern\" guidance=\"weigh, don't blindly obey\">\nnote text\n</advisory>\n```\n\nWhen you deliberately interrupt the agent (Esc, or a cancel from collab, ACP, RPC, the SDK, or an extension), the advisor stops auto-resuming it. An interrupting `concern`/`blocker` raised while the run is stopped is recorded as a visible advisor card instead of restarting the turn, and a concern already in flight when you interrupt is preserved the same way rather than driving a surprise resume. The advice re-enters context the next time you resume — a new message, the `.`/`c` continue shortcut, or a steer/follow-up. A normal yield is unaffected: the advisor can still steer and resume a run the agent ended on its own.\n\n## Bounded catch-up with `advisor.syncBacklog`\n\n`advisor.syncBacklog` is not lockstep turn execution. It is a bounded catch-up delay for the primary agent when the advisor falls behind.\n\nAllowed values:\n\n- `off` — never wait for advisor catch-up\n- `1`\n- `3`\n- `5`\n\nOn primary turn end:\n\n1. the primary turn delta is queued for the advisor\n2. the advisor drain loop starts or continues in the background\n3. if `advisor.syncBacklog` is not `off`, the primary agent waits only while advisor backlog is at or above the configured threshold\n4. the wait is capped at 30 seconds\n5. if the advisor catches up below the threshold, the primary continues immediately\n6. if the cap expires, the primary continues anyway\n\nPractical interpretation:\n\n- `off` favors maximum primary throughput.\n- `1` is the closest mode to synchronous review: after each queued advisor delta, the primary waits up to 30 seconds for backlog to return to zero.\n- `3` and `5` allow more advisor lag before the primary pauses.\n\nAdvisor failures do not permanently stall the primary. A failed advisor prompt is retried; after three consecutive advisor failures, the runtime logs a warning, drops the backlog, and lets the session continue.\n\n## WATCHDOG.md\n\n`WATCHDOG.md` is advisor-only guidance. It is appended to the advisor system prompt; it is not injected into the primary agent's normal context and does not behave like `AGENTS.md`, `RULES.md`, or other context files.\n\nUse it for review priorities: risks the advisor should watch for, project-specific traps, dangerous APIs, architectural boundaries, and quality bars that are useful to a reviewer but too noisy for the main executor.\n\nExample:\n\n```markdown\n# Watchdog notes\n\nEspecially watch for:\n\n- Changes that bypass the durable queue in `src/jobs/`.\n- UI renderer paths that display unsanitized tool output.\n- New worker spawns that do not re-enter the CLI host.\n```\n\n### Discovery locations\n\n`discoverWatchdogFiles(cwd, agentDir)` loads every readable candidate from these locations:\n\n1. user level: `<active agent dir>/WATCHDOG.md` (`~/.omp/agent/WATCHDOG.md` by default; relocated by `PI_CODING_AGENT_DIR`)\n2. project levels while walking from `cwd` upward to the git repository root, or to the home directory when no repo root is found:\n - `<dir>/WATCHDOG.md`\n - `<dir>/.omp/WATCHDOG.md`\n\nUnlike native context files, watchdog discovery does not stop at the nearest project file. Multiple project watchdog files can load together.\n\nCandidates in hidden owner directories are ignored unless the file is inside an `.omp` directory. This keeps unrelated dot-directory conventions from being picked up accidentally while still allowing `.omp/WATCHDOG.md`.\n\n### `@` imports\n\n`WATCHDOG.md` content is expanded with the same `@` import helper used by context files:\n\n- relative imports resolve from the importing file's directory\n- `~/` resolves from the user's home directory\n- imports inside fenced code blocks and inline code spans stay literal\n- cycles are skipped\n- missing or unreadable imports leave the original `@path` text in place\n\n### Prompt order\n\nLoaded watchdog blocks are sorted as:\n\n1. user-level `WATCHDOG.md`\n2. project-level files from farther ancestors down toward `cwd`\n\nEach file is appended to the advisor system prompt as:\n\n```xml\nEspecially pay attention to:\n<attention>\n...expanded watchdog content...\n</attention>\n```\n\nLater project files sit closer to the end of the advisor prompt, so narrower directory guidance is more prominent than broad ancestor guidance.\n\n## Subagents\n\n`advisor.subagents` controls whether spawned task/eval subagents also get an advisor runtime.\n\n- `false` (default): only the main session can run an advisor.\n- `true`: eligible subagent sessions build their own advisor with the same settings/model-role resolution, then rerun `WATCHDOG.md` discovery for that subagent session's `cwd` and agent directory.\n\nSubagent advisors remain isolated from the subagent's primary tool session in the same way the main advisor is isolated from the main agent.\n\n## Cost and context behavior\n\nAdvisor usage is separate model usage. `/advisor status` reports advisor token counts and cost from the advisor agent's own transcript.\n\nThe advisor has its own append-only context. Before each advisor prompt, `AgentSession` estimates incoming tokens and may maintain advisor context:\n\n1. try model-level context promotion when enabled and a larger compatible model is available\n2. if promotion cannot fit enough context, compact the advisor's own message history\n3. if compaction has no candidates or still cannot fit, re-prime from the current bounded primary transcript\n\nThe advisor transcript is in-memory for the session. It is retained while the session runs so `/advisor dump` can inspect it, but advisor state is not a replacement for the primary persisted transcript.\n",
8
+ "advisor-watchdog.md": "# Advisor and WATCHDOG.md\n\nThe advisor is an optional second model attached to a session. It reviews the primary agent's transcript after each turn, can inspect the workspace with read-only tools, and injects concise advice back into the primary session.\n\nThe advisor is not a second executor. It cannot edit files, run commands, approve actions, or change session state directly.\n\n## Implementation files\n\n- [`src/advisor/runtime.ts`](../packages/coding-agent/src/advisor/runtime.ts)\n- [`src/advisor/advise-tool.ts`](../packages/coding-agent/src/advisor/advise-tool.ts)\n- [`src/advisor/watchdog.ts`](../packages/coding-agent/src/advisor/watchdog.ts)\n- [`src/prompts/advisor/system.md`](../packages/coding-agent/src/prompts/advisor/system.md)\n- [`src/prompts/advisor/advise-tool.md`](../packages/coding-agent/src/prompts/advisor/advise-tool.md)\n- [`src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts)\n- [`src/slash-commands/builtin-registry.ts`](../packages/coding-agent/src/slash-commands/builtin-registry.ts)\n- [`src/config/settings-schema.ts`](../packages/coding-agent/src/config/settings-schema.ts)\n\n---\n\n## Enabling the advisor\n\nThe advisor requires both:\n\n1. `advisor.enabled: true`\n2. a model assigned to the `advisor` model role\n\nExample:\n\n```yaml\nmodelRoles:\n advisor: anthropic/claude-sonnet-4-5:medium\n\nadvisor:\n enabled: true\n```\n\nThe advisor role uses normal model-role resolution, including provider-prefixed ids, canonical ids, and optional thinking suffixes.\n\nSlash commands:\n\n| Command | Effect |\n|---|---|\n| `/advisor` | Toggle the persisted `advisor.enabled` setting. |\n| `/advisor on` | Enable the setting and start the runtime when an advisor model is assigned. |\n| `/advisor off` | Disable the setting and stop the runtime. |\n| `/advisor status` | Show active model, context usage, token usage, and cost. |\n| `/advisor dump` | Copy the advisor's compact transcript to the clipboard. |\n| `/advisor dump raw` | Copy the advisor's full dump (system prompt, tools, thinking, and calls) to the clipboard. |\n\nIf `advisor.enabled` is true but no `modelRoles.advisor` value resolves to an available model, status reports that the setting is enabled but no advisor model is assigned.\n\n## What the advisor sees\n\nAt each primary turn end, `AdvisorRuntime` receives only the new transcript delta since the last advisor update. Deltas are rendered with `formatSessionHistoryMarkdown(..., { includeThinking: true, includeToolIntent: true, watchedRoles: true })`, so the advisor can review assistant reasoning as well as user-visible text, tool calls, and tool results.\n\nAdvisor messages already injected into the primary transcript are filtered out before the next delta is rendered. This prevents the advisor from recursively reviewing its own advice.\n\nWhen the primary transcript is rewritten, the advisor runtime is reset:\n\n- compaction\n- session switch/resume\n- branch/fork style history replacement\n- context-maintenance re-prime when the advisor's own context cannot fit\n\nReset clears the advisor's private in-memory transcript and rewinds its cursor. The next advisor update replays the current bounded primary transcript instead of continuing from stale pre-rewrite context.\n\nWhen the advisor is enabled mid-session, the cursor seeds to the current primary transcript length. That avoids replaying the whole old conversation on the first enabled turn.\n\n## Tools and isolation\n\nThe advisor receives a hard-isolated read-only tool set:\n\n- `read`\n- `search`\n- `find`\n- `advise`\n\nThe read/search/find tools are built against a distinct `ToolSession` whose session id is suffixed with `-advisor`. The advisor therefore does not share the primary agent's file snapshots, seen-lines tracking, conflict state, summary cache, or edit/yield capabilities.\n\nThe `advise` tool accepts one note and an optional severity:\n\n| Severity | Delivery | Intended use |\n|---|---|---|\n| omitted / `nit` | Non-interrupting aside, batched into the primary transcript at the next step boundary. | Cleanup, simplification, low-risk edge cases. |\n| `concern` | Interrupting steering message. | Material risk, likely wrong direction, missing constraint, hallucinated API. |\n| `blocker` | Interrupting steering message. | Continuing would clearly waste work or produce broken output. |\n\nInterrupting advice is sent through the steering channel and can abort in-flight tools at the next steering boundary. Each note (interrupting or batched) is rendered into the primary transcript as an `<advisory>` element — severity rides a `severity` attribute, and a `guidance` attribute carries the \"weigh, don't blindly obey\" framing (the primary agent's system prompt never mentions advisories, so the tag is its only cue). Note bodies are XML-escaped so advice containing `<`, `>`, or `&` can't break the wrapper:\n\n```text\n<advisory severity=\"concern\" guidance=\"weigh, don't blindly obey\">\nnote text\n</advisory>\n```\n\nWhen you deliberately interrupt the agent (Esc, or a cancel from collab, ACP, RPC, the SDK, or an extension), the advisor stops auto-resuming it. An interrupting `concern`/`blocker` raised while the run is stopped is recorded as a visible advisor card instead of restarting the turn, and a concern already in flight when you interrupt is preserved the same way rather than driving a surprise resume. The advice re-enters context the next time you resume — a new message, the `.`/`c` continue shortcut, or a steer/follow-up. A normal yield is unaffected: the advisor can still steer and resume a run the agent ended on its own.\n\n`advisor.immuneTurns` limits interruption frequency. After the advisor successfully delivers a `concern` or `blocker` through the steering channel, later concerns/blockers are routed as non-interrupting asides until the configured number of primary turns has completed. The default is `1`. `nit` notes are unchanged, and advice raised while user-interrupt auto-resume suppression is active is still preserved instead of restarting a stopped run.\n\n## Bounded catch-up with `advisor.syncBacklog`\n\n`advisor.syncBacklog` is not lockstep turn execution. It is a bounded catch-up delay for the primary agent when the advisor falls behind.\n\nAllowed values:\n\n- `off` — never wait for advisor catch-up\n- `1`\n- `3`\n- `5`\n\nOn primary turn end:\n\n1. the primary turn delta is queued for the advisor\n2. the advisor drain loop starts or continues in the background\n3. if `advisor.syncBacklog` is not `off`, the primary agent waits only while advisor backlog is at or above the configured threshold\n4. the wait is capped at 30 seconds\n5. if the advisor catches up below the threshold, the primary continues immediately\n6. if the cap expires, the primary continues anyway\n\nPractical interpretation:\n\n- `off` favors maximum primary throughput.\n- `1` is the closest mode to synchronous review: after each queued advisor delta, the primary waits up to 30 seconds for backlog to return to zero.\n- `3` and `5` allow more advisor lag before the primary pauses.\n\nAdvisor failures do not permanently stall the primary. A failed advisor prompt is retried; after three consecutive advisor failures, the runtime logs a warning, drops the backlog, and lets the session continue.\n\n## WATCHDOG.md\n\n`WATCHDOG.md` is advisor-only guidance. It is appended to the advisor system prompt; it is not injected into the primary agent's normal context and does not behave like `AGENTS.md`, `RULES.md`, or other context files.\n\nUse it for review priorities: risks the advisor should watch for, project-specific traps, dangerous APIs, architectural boundaries, and quality bars that are useful to a reviewer but too noisy for the main executor.\n\nExample:\n\n```markdown\n# Watchdog notes\n\nEspecially watch for:\n\n- Changes that bypass the durable queue in `src/jobs/`.\n- UI renderer paths that display unsanitized tool output.\n- New worker spawns that do not re-enter the CLI host.\n```\n\n### Discovery locations\n\n`discoverWatchdogFiles(cwd, agentDir)` loads every readable candidate from these locations:\n\n1. user level: `<active agent dir>/WATCHDOG.md` (`~/.omp/agent/WATCHDOG.md` by default; relocated by `PI_CODING_AGENT_DIR`)\n2. project levels while walking from `cwd` upward to the git repository root, or to the home directory when no repo root is found:\n - `<dir>/WATCHDOG.md`\n - `<dir>/.omp/WATCHDOG.md`\n\nUnlike native context files, watchdog discovery does not stop at the nearest project file. Multiple project watchdog files can load together.\n\nCandidates in hidden owner directories are ignored unless the file is inside an `.omp` directory. This keeps unrelated dot-directory conventions from being picked up accidentally while still allowing `.omp/WATCHDOG.md`.\n\n### `@` imports\n\n`WATCHDOG.md` content is expanded with the same `@` import helper used by context files:\n\n- relative imports resolve from the importing file's directory\n- `~/` resolves from the user's home directory\n- imports inside fenced code blocks and inline code spans stay literal\n- cycles are skipped\n- missing or unreadable imports leave the original `@path` text in place\n\n### Prompt order\n\nLoaded watchdog blocks are sorted as:\n\n1. user-level `WATCHDOG.md`\n2. project-level files from farther ancestors down toward `cwd`\n\nEach file is appended to the advisor system prompt as:\n\n```xml\nEspecially pay attention to:\n<attention>\n...expanded watchdog content...\n</attention>\n```\n\nLater project files sit closer to the end of the advisor prompt, so narrower directory guidance is more prominent than broad ancestor guidance.\n\n## Subagents\n\n`advisor.subagents` controls whether spawned task/eval subagents also get an advisor runtime.\n\n- `false` (default): only the main session can run an advisor.\n- `true`: eligible subagent sessions build their own advisor with the same settings/model-role resolution, then rerun `WATCHDOG.md` discovery for that subagent session's `cwd` and agent directory.\n\nSubagent advisors remain isolated from the subagent's primary tool session in the same way the main advisor is isolated from the main agent.\n\n## Cost and context behavior\n\nAdvisor usage is separate model usage. `/advisor status` reports advisor token counts and cost from the advisor agent's own transcript.\n\nThe advisor has its own append-only context. Before each advisor prompt, `AgentSession` estimates incoming tokens and may maintain advisor context:\n\n1. try model-level context promotion when enabled and a larger compatible model is available\n2. if promotion cannot fit enough context, compact the advisor's own message history\n3. if compaction has no candidates or still cannot fit, re-prime from the current bounded primary transcript\n\nThe advisor transcript is in-memory for the session. It is retained while the session runs so `/advisor dump` can inspect it, but advisor state is not a replacement for the primary persisted transcript.\n",
9
9
  "ai-schema-normalize.md": "# AI tool-schema normalization\n\n`@oh-my-pi/pi-ai` exposes one unified schema normalizer that providers consume\nbefore tools are sent on the wire. All walkers live in\n`packages/ai/src/utils/schema/normalize.ts`; the operational contract is\n`packages/ai/src/utils/schema/CONSTRAINTS.md`.\n\nThere is no separate `strict-mode.ts` module any more — OpenAI strict-mode\nsanitization, OpenAI Responses `oneOf` rewriting, Google/Vertex/Gemini-CLI\nsanitization, Cloud Code Assist Claude sanitization, and MCP sanitization all\nshare the same option-driven walk.\n\n## Entry points\n\nAll exports live under `@oh-my-pi/pi-ai/utils/schema`:\n\n- `normalizeSchema(value, options)` — generic option-driven walker.\n- `normalizeSchemaForGoogle(value)` — Gemini / Vertex / Gemini CLI.\n- `normalizeSchemaForCCA(value)` — Cloud Code Assist Claude (Antigravity + GCA).\n- `normalizeSchemaForMCP(value)` — MCP inputSchemas before they enter the\n custom-tool registry. `tool-bridge.ts` runs every MCP `inputSchema` through\n this dispatcher.\n- `sanitizeSchemaForOpenAIResponses(schema)` (alias\n `normalizeSchemaForOpenAIResponses`) — rewrites `oneOf` → `anyOf` for the\n Responses family.\n- `sanitizeSchemaForStrictMode(schema)` and\n `enforceStrictSchema(schema)` / `tryEnforceStrictSchema(schema)` — the\n OpenAI strict-mode pipeline (sanitize → enforce). All three are exported\n from `normalize.ts`.\n- `adaptSchemaForStrict(schema, strict)` from `./adapt` — thin composer that\n upgrades draft-07 inputs to 2020-12 and wraps `tryEnforceStrictSchema` for\n provider call sites. `./adapt` also exports the `NO_STRICT` global-bypass\n flag (env `PI_NO_STRICT`) honored by every provider that emits `strict: true`.\n\nRemoved in the unified-flow refactor:\n\n- `strict-mode.ts` (merged into `normalize.ts`).\n- `sanitize-google.ts` and `normalize-cca.ts` (replaced by\n `normalizeSchemaFor*` dispatchers).\n- `StringEnum` helper — use `z.enum([...])` directly; Zod's emitted JSON\n Schema is already wire-compatible with Google and other providers.\n- `sanitizeSchemaFor{Google,CCA,MCP}` / `prepareSchemaForCCA` — renamed to\n `normalizeSchemaFor{Google,CCA,MCP}`.\n\n## Dispatcher mapping\n\n| Provider transport(s) | Dispatcher |\n| ------------------------------------------------------------------ | ------------------------------------------- |\n| `openai-completions`, `openai-responses`, `openai-codex-responses` | `adaptSchemaForStrict` (sanitize + enforce) |\n| `openai-responses` family (`oneOf` → `anyOf` only) | `normalizeSchemaForOpenAIResponses` |\n| `google-generative-ai`, `google-vertex`, Gemini CLI | `normalizeSchemaForGoogle` |\n| Cloud Code Assist Claude (Antigravity + GCA, `claude-*` model ids) | `normalizeSchemaForCCA` |\n| MCP `inputSchema` ingestion | `normalizeSchemaForMCP` |\n| `anthropic-messages` (native, not CCA) | per-provider whitelist in `anthropic.ts` |\n\nGemini CLI / Antigravity CCA MUST run the full `normalizeSchemaForCCA`\npipeline (not just the first keyword-stripping pass) to keep parity with the\nshared Google Claude path.\n\n## Walk semantics\n\n`normalizeSchema` first detoxifies serialized Zod-instance-shaped inputs, upgrades them to\nJSON Schema 2020-12, dereferences the tree, then walks it with the option set\npinned by the dispatcher. Each node:\n\n1. Renames `snake_case` combinator/property keys to camelCase\n (`any_of` → `anyOf`, etc.; collisions follow python-genai\n `pop(from)`/`set(to)` semantics — snake_case wins).\n2. Applies the `handle_null_fields` collapse for nullable unions before\n recursing into children.\n3. Strips keys the target provider does not support, optionally lifting\n human-meaningful keys (`pattern`, `format`, min/max, `default`,\n `examples`, ...) into the sibling `description` via the spill formatter\n (`spill.ts`). Structural/meta keys (`$ref`, `$defs`,\n `additionalProperties`) are not spilled.\n4. Normalizes type unions (`type: [\"T\", \"null\"]` → `type: \"T\"` + nullable\n marker on Google, plain `type: \"T\"` on CCA).\n5. Collapses object-only / same-type combiners, optionally lossy-collapses\n mixed-type combiners (CCA only), and runs the residual-combiner fixpoint.\n6. Validates with the in-house structural validator (`isValidJsonSchema`\n from `meta-validator.ts`) when `validateAndFallback` is set (CCA path)\n and emits the per-tool fallback `{ \"type\": \"object\", \"properties\": {} }`\n on residual incompatibility — `type` array, `type: \"null\"`, `nullable`\n key, or any remaining `anyOf`/`oneOf`/`allOf`.\n\n## OpenAI strict-mode pipeline\n\n`adaptSchemaForStrict(schema, strict)` runs `tryEnforceStrictSchema`,\nwhich composes:\n\n1. **Sanitize** (`sanitizeSchemaForStrictMode`): strips non-structural\n keywords (`format`, `pattern`, min/max, `examples`, `default`,\n `if`/`then`/`else`, `not`, `unevaluated*`, `patternProperties`,\n `dependent*`, `content*`, `min/maxProperties`, `$dynamicRef`, etc.). The\n `default` value is inlined into the sibling `description` as\n ` (default: X)` before being dropped, unless `description` already\n contains `(default:` or no `description` exists.\n2. **Enforce** (`enforceStrictSchema`): every object node gets\n `additionalProperties: false`, every property goes into `required`, and\n optional properties become nullable unions\n (`anyOf: [<original>, { \"type\": \"null\" }]`). Tuple `prefixItems` are\n strictified recursively.\n\nThe two passes use cache/cycle guards, so refs, `allOf`, and nullable wrapping\nstay deterministic without recursing forever. `tryEnforceStrictSchema` is\nfail-open: if anything throws, it returns `{ strict: false, schema: upgraded }`\nso callers MUST emit `strict: true` only when enforcement actually succeeded.\n\n### Edge cases the strict-mode normalizer handles\n\n- **Local `$ref` inlining.** OpenAI strict mode rejects\n `{ \"$ref\": \"...\", \"description\": \"...\" }` with sibling keys. The\n sanitizer pre-resolves local `#/...` refs against the root and merges\n with **sibling keys winning** over the resolved def — same precedence\n as `openai-python`'s `_ensure_strict_json_schema`. Recursive refs are\n guarded by the per-walk epoch.\n- **Single-item `allOf`.** A `{ \"allOf\": [X], ...siblings }` collapses to\n `{ ...X, ...siblings }` with the inlined entry's keys winning over the\n original siblings (matches `openai-python`'s `_pydantic.py:79-83`). Multi-\n item `allOf` is left intact for the downstream validator to reject if\n needed.\n- **Type-array branches and nullable unions.** When a node has\n `type: [\"T\", \"U\"]`, the sanitizer emits one variant schema per type,\n pruning type-specific keywords (e.g. `properties`/`required` only stay on\n the `object` variant, `items` only on the `array` variant). The shared\n `description` is **hoisted onto the `anyOf` wrapper** instead of being\n duplicated on every branch — so a strict nullable union becomes\n `{ anyOf: [T, { type: \"null\" }], description: \"...\" }`, not\n `anyOf: [{ ..., description }, { ..., description }]`.\n- **Enum/const without a `type`.** Both sanitize and enforce paths call\n `inferStrictPrimitiveTypeFromEnumOrConst` to infer the primitive `type`\n from `enum` / `const` values. Mixed-primitive enums (`[1, \"two\", null]`),\n enums containing objects/arrays, and non-primitive `const` values\n (`{a:1}`, `[1,2,3]`) cannot be described by a single `type` keyword and\n trigger the strict-mode fail-open path — emitting a typeless schema\n would just be rejected on the wire by OpenAI.\n\n## Performance: static fingerprint cache\n\n`resolveProviderModels` in `packages/catalog/src/model-manager.ts` and\n`readModelCache`/`writeModelCache` in `packages/catalog/src/model-cache.ts`\ncooperate via a `static_fingerprint` column on the `model_cache` SQLite\ntable (current cache schema version 6).\n\n- `fingerprintStatic(staticModels)` hashes the static catalog slice\n (`Bun.hash(JSON.stringify(models))` in base36) and memoizes the result\n by tagging the array with a symbol property. Multiple cold-start arms\n calling `resolveProviderModels` with the same `staticModels` array pay\n the JSON+hash cost once.\n- On cache read, if the network fetch is being skipped, the cached row is\n fresh + authoritative, and the cached `static_fingerprint` matches the\n current one, `resolveProviderModels` returns the cached models verbatim\n — the cache already incorporates the same static state, so re-running\n `mergeDynamicModels(static, cache)` would just rebuild the same objects.\n- `mergeModelSources` and `mergeDynamicModels` short-circuit on\n empty-source inputs (the common shape after `(static, [])` or for\n providers without a static catalog), avoiding Map churn entirely.\n\nCache rows written before the current schema version are dropped by the\ncache-version check; the column defaults to `''` for any row that survives\na version upgrade so the fingerprint-equality check naturally fails closed\nand the full merge re-runs.\n\n## Related\n\n- `docs/models.md` — registry, equivalence, compat flags\n (`supportsStrictMode`, `toolStrictMode`, `disableStrictTools`).\n- `docs/provider-streaming-internals.md` — how the normalized schemas are\n used downstream during the provider stream loop.\n- `docs/mcp-server-tool-authoring.md` — MCP `inputSchema` ingestion via\n `normalizeSchemaForMCP`.\n- `packages/ai/src/utils/schema/CONSTRAINTS.md` — operational contract for\n every normalization rule.\n",
10
- "approval-mode.md": "# Tool approval mode\n\nTool approval has two independent inputs:\n\n1. **Tool declaration** — every tool may declare an `approval` tier:\n - `read`: reads data or updates UI-only session metadata.\n - `write`: mutates workspace/session state but does not execute arbitrary code.\n - `exec`: executes code, shells out, drives a browser, spawns agents, or performs similarly broad actions.\n2. **User policy** — `tools.approval.<toolName>: allow | deny | prompt` overrides the mode for that tool unless a non-yolo safety override forces a prompt.\n\nTools without an `approval` declaration are treated as `exec`. This is the safe default for unknown custom tools. MCP server tools declare `write`.\n\n## Modes\n\nConfigure with `tools.approvalMode`:\n\n| Mode | Auto-approves | Prompts for |\n| ---------------- | ----------------------- | --------------- |\n| `always-ask` | `read` | `write`, `exec` |\n| `write` | `read`, `write` | `exec` |\n| `yolo` (default) | `read`, `write`, `exec` | none |\n\n`--auto-approve` and `--yolo` force `tools.approvalMode: yolo` for the session.\n\n## User overrides\n\n`tools.approval` is honored in every mode:\n\n```yaml\ntools:\n approvalMode: write\n approval:\n bash: prompt\n read: allow\n mcp__filesystem__delete: deny\n```\n\nResolution per tool call:\n\n1. Compute the tool's approval decision from `tool.approval(args)`; omitted means `exec`.\n2. Normalize `tools.approval.<tool>` if present; invalid values are ignored.\n3. In `yolo` mode, the user policy is used when present; otherwise the call is allowed. Safety `override` reasons do not force a prompt in `yolo`.\n4. In non-yolo modes, if the tool sets `override: true`, `deny` is blocked and all other cases prompt, even if user policy says `allow`.\n5. Otherwise, a valid user policy wins.\n6. Otherwise, the active mode auto-approves or prompts by tier.\n\n## Safety overrides\n\nA tool can force a prompt with object-form approval:\n\n```ts\napproval: { tier: \"exec\", override: true, reason: \"Critical pattern detected\" }\n```\n\n`bash` uses this for critical destructive patterns such as `rm -rf /`, fork bombs, remote-fetch-then-execute, writes to `/etc/passwd`, and host shutdown commands. These surface as `reason` in the approval prompt, but in `yolo` mode they are auto-approved unless a user policy for the tool is set to `prompt` or `deny`.\n\n## Per-tool prompt details\n\nTools can add approval-prompt body lines with `formatApprovalDetails(args)`. The standard prompt includes:\n\n- `Allow tool: <name>`\n- `Origin: MCP server tool` for unannotated `mcp__...` tools\n- `Reason: <reason>` when the tool decision supplies one\n- tool-specific details such as command, path, code, browser action, or subagent assignment\n\n## Defining approval on tools\n\nBuilt-in and custom tools share the same shape:\n\n```ts\nexport type ToolTier = \"read\" | \"write\" | \"exec\";\nexport type ToolApprovalDecision = ToolTier | { tier: ToolTier; reason?: string; override?: boolean };\nexport type ToolApproval = ToolApprovalDecision | ((args: unknown) => ToolApprovalDecision);\n\napproval?: ToolApproval;\nformatApprovalDetails?: (args: unknown) => string | string[] | undefined;\n```\n\nExamples:\n\n```ts\napproval: \"read\";\n\napproval: (args) => (LSP_READONLY_ACTIONS.has(args.action) ? \"read\" : \"write\");\n\napproval: (args) =>\n isCritical(args.command)\n ? { tier: \"exec\", override: true, reason: \"Critical pattern detected\" }\n : \"exec\";\n```\n\n## Subagents\n\nSubagents run headless with `tools.approvalMode: yolo` so they do not stall waiting for UI. The parent `task` approval is the authorization boundary. User `tools.approval.<tool>` settings continue to control whether a tool is allowed, prompted, or blocked.\n",
10
+ "approval-mode.md": "# Tool approval mode\n\nTool approval has two independent inputs:\n\n1. **Tool declaration** — every tool may declare an `approval` tier:\n - `read`: reads data or updates UI-only session metadata.\n - `write`: mutates workspace/session state but does not execute arbitrary code.\n - `exec`: executes code, shells out, drives a browser, spawns agents, or performs similarly broad actions.\n2. **User policy** — `tools.approval.<toolName>: allow | deny | prompt` overrides the mode for that tool unless a non-yolo safety override forces a prompt.\n\nTools without an `approval` declaration are treated as `exec`. This is the safe default for unknown custom tools. MCP server tools declare `write`.\n\n## Modes\n\nConfigure with `tools.approvalMode`:\n\n| Mode | Auto-approves | Prompts for |\n| ---------------- | ----------------------- | --------------- |\n| `always-ask` | `read` | `write`, `exec` |\n| `write` | `read`, `write` | `exec` |\n| `yolo` (default) | `read`, `write`, `exec` | none |\n\n`--auto-approve` and `--yolo` force `tools.approvalMode: yolo` for the session.\n\n## User overrides\n\n`tools.approval` is honored in every mode:\n\n```yaml\ntools:\n approvalMode: write\n approval:\n bash: prompt\n read: allow\n mcp__filesystem__delete: deny\n```\n\nResolution per tool call:\n\n1. Compute the tool's approval decision from `tool.approval(args)`; omitted means `exec`.\n2. Normalize `tools.approval.<tool>` if present; invalid values are ignored.\n3. In `yolo` mode, the user policy is used when present; otherwise the call is allowed. Safety `override` reasons do not force a prompt in `yolo`.\n4. In non-yolo modes, if the tool sets `override: true`, `deny` is blocked and all other cases prompt, even if user policy says `allow`.\n5. Otherwise, a valid user policy wins.\n6. Otherwise, the active mode auto-approves or prompts by tier.\n\n## Safety overrides\n\nA tool can force a prompt with object-form approval:\n\n```ts\napproval: { tier: \"exec\", override: true, reason: \"Critical pattern detected\" }\n```\n\n`bash` uses this for critical destructive patterns such as `rm -rf /`, fork bombs, remote-fetch-then-execute, writes to `/etc/passwd`, and host shutdown commands. These surface as `reason` in the approval prompt, but in `yolo` mode they are auto-approved unless a user policy for the tool is set to `prompt` or `deny`.\n\n## Per-tool prompt details\n\nTools can add approval-prompt body lines with `formatApprovalDetails(args)`. The standard prompt includes:\n\n- `Allow tool: <name>`\n- `Origin: MCP server tool` for unannotated `mcp__...` tools\n- `Reason: <reason>` when the tool decision supplies one\n- tool-specific details such as command, path, code, browser action, or subagent assignment\n\n## Defining approval on tools\n\nBuilt-in and custom tools share the same shape:\n\n```ts\nexport type ToolTier = \"read\" | \"write\" | \"exec\";\nexport type ToolApprovalDecision = ToolTier | { tier: ToolTier; reason?: string; override?: boolean };\nexport type ToolApproval = ToolApprovalDecision | ((args: unknown) => ToolApprovalDecision);\n\napproval?: ToolApproval;\nformatApprovalDetails?: (args: unknown) => string | string[] | undefined;\n```\n\nExamples:\n\n```ts\napproval: \"read\";\n\napproval: (args) => (LSP_READONLY_ACTIONS.has(args.action) ? \"read\" : \"write\");\n\napproval: (args) =>\n isCritical(args.command)\n ? { tier: \"exec\", override: true, reason: \"Critical pattern detected\" }\n : \"exec\";\n```\n\n## ACP sessions\n\nACP (`omp acp`) uses the same settings resolver as normal OMP launches. Global `~/.omp/agent/config.yml` applies, project config for the ACP session `cwd` applies, and any `--config <file>` overlays passed to the ACP server process apply to sessions created by that process.\n\nTo auto-approve ACP tool calls, set the mode in global or project config:\n\n```yaml\ntools:\n approvalMode: yolo\n```\n\nOr launch the ACP server with a runtime override or a one-process config overlay:\n\n```bash\nomp acp --yolo\nomp acp --auto-approve\nomp acp --approval-mode yolo\nomp acp --config ./acp-yolo.yml # file contains tools.approvalMode: yolo\n```\n\nPrecedence is the normal settings precedence: runtime flags (`--approval-mode`, `--auto-approve`, `--yolo`) override `--config` overlays, which override project config, which overrides global config. ACP does not currently define a `session/new`, `session/load`, or `session/resume` approval-policy field, so ACP clients that need per-session yolo should launch a separate `omp acp` process with one of the flags above or with a session-specific `--config` overlay.\n\n`tools.approvalMode: yolo` fully applies to ACP when it is explicitly configured or supplied by a runtime flag. It skips OMP's approval prompts and also skips the ACP client permission gate for `bash`, `edit`, `delete`, and `move` unless `tools.approval.<tool>` is `prompt` or `deny`. The schema default is `yolo`, but default-config ACP sessions still keep the client permission gate; set `tools.approvalMode: yolo` explicitly when the client wants unattended execution.\n\nWhen ACP approval is required, OMP routes it through the ACP client instead of the terminal TUI. Client-gated `bash`, `edit`, `delete`, and `move` calls use ACP `session/request_permission`; generic approval prompts use form elicitation when the client advertises `elicitation.form`. A rejected, cancelled, or unsupported prompt rejects/cancels the tool call; OMP does not silently allow it.\n\n## Subagents\n\nSubagents run headless with `tools.approvalMode: yolo` so they do not stall waiting for UI. The parent `task` approval is the authorization boundary. User `tools.approval.<tool>` settings continue to control whether a tool is allowed, prompted, or blocked.\n",
11
+ "arktype-guide.md": "# ArkType Guide (for migrating Zod → ArkType in this repo)\n\nPinned to **arktype 2.2.0** (installed). Verified against the installed `.d.ts` and runtime this\nsession. Author types with `import { type } from \"arktype\"`.\n\n> **Scope rule (READ FIRST).** Zod stays supported at the **external boundary** — `Tool.parameters`\n> accepts Zod *or* ArkType *or* JSON Schema, and the public `pi.zod` extension API + the Zod-backed\n> `typebox` shim are untouched. Migrate **internal** schemas to ArkType. If a file genuinely cannot be\n> expressed cleanly in ArkType (see \"Resilient parsing\" below) and it parses an external/untrusted\n> payload, it MAY stay on Zod — say so in your report rather than shipping broken ArkType.\n\n## The detection contract (don't break it)\n`packages/ai/src/utils/schema/wire.ts` distinguishes the three schema kinds:\n- **ArkType** = a *callable function* with `.toJsonSchema` and `.assert` methods (`isArkSchema`).\n- **Zod** = a non-callable object carrying `_zod` + `.parse` (`isZodSchema`).\n- **JSON Schema** = a plain object.\n\nSo an ArkType `Type` is a function. NEVER detect it via `$`/`_arktype`/`__arktype` markers — those\ndon't exist. `isArkSchema`, `arkToWireSchema`, `isZodSchema`, `zodToWireSchema` all remain exported.\n\n## Core translation table (Zod → ArkType)\n| Zod | ArkType |\n|---|---|\n| `z.object({ a: ... })` | `type({ a: ... })` |\n| `z.string()` / `z.number()` / `z.boolean()` | `\"string\"` / `\"number\"` / `\"boolean\"` |\n| `z.number().int()` | `\"number.integer\"` |\n| `z.literal(\"x\")` | `\"'x'\"` ; `z.literal(5)` → `\"5\"` |\n| `z.enum([\"a\",\"b\"])` (static) | `\"'a' | 'b'\"` |\n| `z.enum(RUNTIME_ARRAY)` (dynamic) | `type.enumerated(...RUNTIME_ARRAY)` — NOT `type(arr.join(\"|\"))` |\n| `z.array(z.string())` | `\"string[]\"` |\n| `z.array(Item)` (Item is a `type`) | `Item.array()` |\n| `z.union([A,B])` | `A.or(B)` or `\"a | b\"` |\n| `z.record(z.string(), z.number())` | `type({ \"[string]\": \"number\" })` — use the real value type, NOT `\"unknown\"` unless it was `z.unknown()` |\n| `z.unknown()` / `z.any()` | `\"unknown\"` |\n| `z.null()` | `\"null\"` |\n| `z.nullable(X)` | `X.or(\"null\")` or `\"X | null\"` |\n| field `.optional()` | optional **key**: `{ \"a?\": \"string\" }` (NOT a value method) |\n| string length `.min(n)`/`.max(n)` | `\"string >= n\"` / `\"string <= n\"` / `\"1 <= string <= 10\"` |\n| number `.min/.max/.gt/.lt` | `\"number >= n\"` / `\"number > n\"` / `\"1 <= number <= 10\"` |\n| dynamic bound (runtime var) | chain methods: `type(\"string\").atLeastLength(1).atMostLength(MAX)` — NOT a template string |\n| `.describe(\"d\")` | `.describe(\"d\")` (emits JSON Schema `description`) |\n| `.strict()` (reject extras) | add key `\"+\": \"reject\"`: `type({ \"+\": \"reject\", ... })` |\n| `.strip()` (drop extras — Zod default) | add key `\"+\": \"delete\"` |\n| `.passthrough()` / `.loose()` | drop it (ArkType keeps undeclared keys by default) |\n| `.refine(fn, msg)` | `.narrow((d, ctx) => fn(d) || ctx.mustBe(\"<expectation>\"))` |\n| `z.infer<typeof S>` | `typeof S.infer` |\n| `z.input<typeof S>` | `typeof S.inferIn` |\n\n## FOOTGUNS (these caused real breakage — avoid them)\n1. **Never put `.default()` on an optional `?` key.** `z.X.default(v).optional()` in Zod is\n **output-optional** (default applied in code via `?? `) → translate to an **optional key, no\n default**: `\"limit?\": \"number\"`. Only `z.X.default(v)` *without* `.optional()` (output-required)\n becomes `field: type(\"number\").default(v)` (key has NO `?`).\n2. **`.default()` only works as an object-property value.** `type(\"number = 0\")` standalone throws —\n use it inline (`type({ count: \"number = 0\" })`) or `.default()` on a non-optional key.\n3. **A described literal union emits `anyOf` of `const`, not `enum`.** That is correct and validates\n identically; assert semantic wire properties (`description`, required, `additionalProperties`), not\n the exact `enum` vs `anyOf` shape.\n4. **`type()` needs a statically-known definition.** A runtime-built string (`type(arr.join(\"|\"))`,\n `type(\\`1 <= string <= ${MAX}\\`)`) fails TS. Use `type.enumerated(...)` / chain methods instead.\n5. **Integer ranges:** `\"1 <= number.integer <= 3600\"` (NOT `\"number.integer >= 1 <= 3600\"`).\n6. **`$schema` is emitted by `toJsonSchema()`** — strip it for wire parity (`delete raw.$schema`).\n\n## Validating with a schema (replacing `.parse` / `.safeParse`)\nArkType `Type` is **invoked** to validate; failure returns an `ArkErrors` instance:\n```ts\nimport { type } from \"arktype\";\nconst out = schema(value);\nif (out instanceof type.errors) {\n // out.summary -> human message; out.map(e => `${e.path}: ${e.message}`)\n throw new Error(out.summary);\n}\n// else `out` is the validated/morphed value\n```\n- `.parse(x)` → `const out = schema(x); if (out instanceof type.errors) throw new Error(out.summary); use out;`\n- `.safeParse(x).success` → `!(schema(x) instanceof type.errors)`\n- NEVER use `.allows()` for tool validation — it skips morphs/defaults/narrows.\n- `.infer` (output) and `.inferIn` (input) are inference-only properties (no runtime value).\n\n## Advanced\n\n### Scopes (reusable aliases / mutually-referential schemas)\nReplace a cluster of cross-referencing Zod schemas with a scope, then `.export()` to a module:\n```ts\nimport { scope } from \"arktype\";\nconst myScope = scope({\n inner: { id: \"string\" },\n outer: { inner: \"inner\", tags: \"string[]\" },\n});\nconst m = myScope.export(); // Module — m.outer, m.inner are Type instances\n```\nUse `.export()` — NOT `.compile()` (that method does not exist on a Scope).\n\n### Morphs / transforms (replacing `.transform()`)\n```ts\nconst n = type(\"string\").pipe(s => Number.parseInt(s)); // validate then transform\nconst o = type(\"string\").to(\"number.integer\"); // .to(def) == .pipe(type(def))\n```\n\n### narrow (cross-field / post-validation predicate, replacing `.refine`)\n`narrow` runs AFTER all validators/morphs (output side). `ctx.mustBe(\"<expectation>\")` returns `false`\nand records `must be <expectation>`:\n```ts\ntype({ action: \"string\", \"body?\": \"string\" })\n .narrow((p, ctx) => p.action === \"delete\" || p.body !== undefined || ctx.mustBe(\"a body unless deleting\"));\n```\n\n### Resilient parsing (replacing Zod `.catch(fallback)`)\nArkType has **no built-in `.catch()`**. For \"parse, else fallback\", wrap the unsafe work in a morph:\n```ts\nconst resilient = type(\"unknown\").pipe(raw => {\n const out = innerSchema(raw);\n return out instanceof type.errors ? FALLBACK : out; // never throws\n});\n```\nFor \"missing → default\", use the `=` default syntax (`\"number = 5\"`). If a parser relies heavily on\nper-field `.catch()` over an untrusted external payload and the morph rewrite gets unwieldy, that file\nis a candidate to **stay on Zod** (external-boundary exception) — note it in your report.\n\n### Defaults recap\n- `type({ count: \"number = 0\", flag: \"boolean = false\" })` — inline, output-required, wire `default`.\n- `type({ x: type(\"number\").describe(\"d\").default(0) })` — `.default()` on a NON-optional key when you\n also need `.describe()`.\n\n## When you finish a file\n- Replace `import { z } from \"zod/v4\"` with `import { type } from \"arktype\"` (keep `z` only if still used).\n- Preserve every `.describe()` string and field optionality EXACTLY.\n- Convert every `.parse`/`.safeParse` call site in the file.\n- Do NOT run build/test/lint/format — the orchestrator runs gates once at the end.\n- Report: files changed, any `.strict`→`\"+\"`, `.refine`→`.narrow`, `.catch`→morph, and any file you\n intentionally left on Zod (with the reason).\n",
11
12
  "auth-broker-gateway.md": "# Auth Broker and Auth Gateway\n\nThe auth broker and auth gateway are two cooperating HTTP services that move OAuth refresh tokens and provider access tokens off developer laptops and into a single broker host.\n\n- **`omp auth-broker serve`** holds the canonical SQLite credential vault, performs OAuth refreshes, and exposes a small REST API (`/v1/snapshot`, `/v1/snapshot/stream`, `/v1/credential/:id/refresh`, `/v1/credential/:id/disable`, `/v1/credential`, `/v1/usage`, `/v1/healthz`).\n- **`omp auth-gateway serve`** is a forward-proxy. It accepts OpenAI Chat Completions, Anthropic Messages, OpenAI Responses, and pi-native stream requests, resolves the broker-backed credential, and dispatches through `pi-ai` provider logic. Clients (containerised omp, llm-git, the macOS usage widget, …) never see the access token.\n\nTransport security between operator, broker, and gateway is delegated to the operator (Tailscale / Wireguard / reverse proxy + TLS). Every endpoint except `/v1/healthz` (broker) and `/healthz` (gateway) requires a bearer token.\n\nSource: `packages/ai/src/auth-broker/`, `packages/ai/src/auth-gateway/`, `packages/coding-agent/src/cli/auth-broker-cli.ts`, `packages/coding-agent/src/cli/auth-gateway-cli.ts`, `packages/coding-agent/src/session/auth-broker-config.ts`.\n\n## Data flow\n\n```\n ┌────────────────────────────────────────────────────────────┐\n │ broker host │\n │ │\n developer ──▶ │ ┌──────────────────────────┐ ┌────────────────────┐ │\n laptop / │ │ omp auth-broker serve │◀──▶│ SQLite agent.db │ │\n CI / robomp │ │ - holds refresh tokens │ │ (canonical writer)│ │\n │ │ - background refresher │ └────────────────────┘ │\n │ │ /v1/{snapshot,refresh,…}│ │\n │ └─────────┬────────────────┘ │\n │ │ bearer ($CONFIG_DIR/auth-broker.token) │\n │ ▼ │\n │ ┌──────────────────────────┐ │\n │ │ omp auth-gateway serve │ RemoteAuthCredentialStore │\n │ │ /v1/{chat,messages,…} │ receives snapshot stream, │\n │ │ /v1/usage,/v1/models │ refreshes credentials by id │\n │ │ /v1/credentials/check │ via the broker on expiry │\n │ └─────────┬────────────────┘ │\n └────────────┼───────────────────────────────────────────────┘\n │ bearer ($CONFIG_DIR/auth-gateway.token)\n ▼\n gateway clients\n (llm-git, macOS widget, robomp containers, IDE plugins, …)\n │\n ▼ provider request with broker-resolved credential\n api.anthropic.com / api.openai.com / …\n```\n\nThe broker is the only writer of OAuth refresh tokens. Clients (including the gateway itself) load a redacted snapshot in which every `refresh` field has been replaced with `REMOTE_REFRESH_SENTINEL`; when an access token expires the client calls `POST /v1/credential/:id/refresh` and the broker performs the refresh server-side. `RemoteAuthCredentialStore` rejects local replace/upsert/delete-by-provider mutations, with errors pointing at `omp auth-broker login` / `omp auth-broker logout`.\n\n## auth-broker\n\n### CLI\n\n```\nomp auth-broker serve [--bind=host:port] # boot the broker\nomp auth-broker token [--regenerate] [--json] # print or rotate the bearer token\nomp auth-broker login [<provider>] [--via=user@host] [--dry-run]\nomp auth-broker logout [<provider>]\nomp auth-broker list [--json]\nomp auth-broker import <file|dir> [--provider=<id>] [--include-disabled] [--dry-run] [--json]\nomp auth-broker migrate --from-local [--include-oauth] [--include-env] [--dry-run] [--json]\nomp auth-broker status [--json]\n```\n\n- `serve` opens the local SQLite store at `getAgentDbPath()` and binds an HTTP listener (default `127.0.0.1:8765`). On startup a token is ensured at `<config-dir>/auth-broker.token` (mode `0600`, `0700` parent dir). The background refresher refreshes any OAuth credential whose `expires - Date.now() < refreshSkewMs` (default 5 min) every `refreshIntervalMs` (default 60 s).\n- `token` prints the cached bearer or generates a new one. `--regenerate` rotates it.\n- `login [<provider>]` runs the per-provider OAuth flow locally — when no provider is supplied, it falls back to an interactive numbered picker. With `--via=user@host` it shells out `ssh -L <callback-port>:127.0.0.1:<callback-port> user@host omp auth-broker login <provider>` so the OAuth callback hits the local browser but the credential is written on the broker host (`--via` requires `<provider>`). Built-in callback ports: `anthropic:54545`, `openai-codex:1455`, `google-gemini-cli:8085`, `google-antigravity:51121`, `gitlab-duo:8080`. The OAuth dance is driven in-process via `AuthStorage.login()` — there is no longer a `pi-ai` bin to spawn.\n- `logout [<provider>]` deletes every credential row for `<provider>`. With no argument it shows an interactive numbered picker of currently-stored providers.\n- `list` enumerates every registered OAuth provider id/name (the union of built-ins + `registerOAuthProvider` custom providers). `--json` emits a machine-readable array.\n- `import <file|dir>` imports CLIProxyAPI-style JSON credentials into the local SQLite store. Maps `type` field → omp provider (`claude → anthropic`, `codex → openai-codex`, `gemini → google-gemini-cli`, `antigravity → google-antigravity`, `gemini-cli → google-gemini-cli`).\n- `migrate --from-local` uploads local SQLite credentials to the configured broker (`POST /v1/credential`). Local API keys are included by default; local OAuth rows are skipped unless `--include-oauth` is set; environment-derived API keys are skipped unless `--include-env` is set. Re-runs are idempotent against the broker snapshot.\n- `status` health-pings the configured remote broker.\n\n### Endpoints\n\n| Method | Path | Auth | Purpose |\n| ------ | ---------------------------- | ------ | ------------------------------------------------------- |\n| `GET` | `/v1/healthz` | none | Liveness + version |\n| `GET` | `/v1/snapshot` | bearer | Redacted snapshot (refresh tokens replaced by sentinel) |\n| `GET` | `/v1/snapshot/stream` | bearer | SSE snapshot stream with delta events and keepalives |\n| `POST` | `/v1/credential` | bearer | Upsert one OAuth or API-key credential |\n| `POST` | `/v1/credential/:id/refresh` | bearer | Force-refresh one OAuth credential |\n| `POST` | `/v1/credential/:id/disable` | bearer | Disable one credential with a recorded cause |\n| `GET` | `/v1/usage` | bearer | Aggregate `UsageReport[]` across credentials |\n\nRequests use `Authorization: Bearer <token>`. The server compares against an in-memory token allow-list; the gateway’s implementation uses a timing-safe comparison.\n\n### Background refresher\n\n`AuthBrokerRefresher` iterates active OAuth credentials at `refreshIntervalMs` cadence and refreshes any within `refreshSkewMs` of expiry. Refreshes are single-flighted per credential id so a slow refresh cannot be retriggered. The refresher distinguishes:\n\n- **definitive failures** (`invalid_grant`, `invalid_token`, `revoked`, unauthorized refresh-token, 401/403 not from a network blip) — credentials are passed to `AuthStorage.disableCredentialById(id, cause)` so the next snapshot pull surfaces a clean delete on the client;\n- **transient failures** (timeout / ECONNREFUSED / fetch failed) — left in place for the next sweep.\n\n## auth-gateway\n\n### CLI\n\n```\nomp auth-gateway serve [--bind=host:port] [--no-auth]\nomp auth-gateway token [--regenerate] [--json]\nomp auth-gateway status [--json]\nomp auth-gateway check [--strict] [--json]\n```\n\n- `serve` requires `OMP_AUTH_BROKER_URL` (or `auth.broker.url` in `config.yml`) — the gateway is itself a broker client. It calls `AuthBrokerClient.fetchSnapshot()`, wraps it in `RemoteAuthCredentialStore`, and constructs an `AuthStorage` that resolves access tokens through the broker. Default bind is `127.0.0.1:4000`. The gateway token is stored at `<config-dir>/auth-gateway.token` (`0600`); `--no-auth` disables the bearer check entirely (loopback-only use).\n- `token` / `status` manage and inspect the gateway bearer token and upstream broker readiness.\n- `check` probes broker-backed credentials through the gateway store. Without `--strict` it uses provider usage probes; `--strict` also exercises each credential against its chat-completion endpoint and can consume a small amount of quota.\n\n### Endpoints\n\n| Method | Path | Auth | Purpose |\n| ------ | ----------------------- | ------ | ------------------------------------------------------------ |\n| `GET` | `/healthz` | none | Liveness + version |\n| `GET` | `/v1/usage` | bearer | Aggregate `UsageReport[]` (proxied through `AuthStorage`) |\n| `GET` | `/v1/models` | bearer | Bundled-model catalog filtered to providers with credentials |\n| `GET` | `/v1/credentials/check` | bearer | Per-credential auth health probe |\n| `POST` | `/v1/chat/completions` | bearer | OpenAI Chat Completions wire format |\n| `POST` | `/v1/messages` | bearer | Anthropic Messages wire format |\n| `POST` | `/v1/responses` | bearer | OpenAI Responses wire format |\n| `POST` | `/v1/pi/stream` | bearer | Native `pi-ai` stream wire format |\n\nThe model id is read from the top-level `model` field for foreign wire formats and from the pi-native request body for `/v1/pi/stream`. The gateway picks the first bundled `Model<Api>` matching that id, parses the inbound wire format into an omp `Context`, resolves the provider credential from broker-backed `AuthStorage`, dispatches through `streamSimple()`, and re-encodes the result to the inbound format (SSE for streamed responses).\n\nThere is no raw provider passthrough path. All supported routes go through `pi-ai` provider logic so credential-specific request shaping, OAuth refresh-on-auth-error, and provider quirks stay centralized.\n\n`idleTimeout` on the underlying `Bun.serve` is set to `255 s` so long thinking-budget calls do not get killed by Bun’s default idle timeout.\n\n## Usage cache: server-side 5-min jitter + client-side 15 s single-flight\n\nTwo layers cache the aggregate provider-usage report. Both are intentional and stacked.\n\n### Server-side cache (broker `AuthStorage`)\n\n`AuthStorage` caches each credential’s `UsageReport` in the broker’s SQLite store at a **5-minute per-credential TTL with ±25 % jitter**. Anthropic and OpenAI rate-limit `/usage` aggressively per source IP, and a synchronized 5-credential fan-out trips 429s every cycle; the jitter decorrelates refresh times within a few cycles. On fetch failure the store keeps the **last-good** report for up to 24 h with a short jittered re-poll window — so a transient upstream blip never blanks out the widget.\n\nConstants: `USAGE_REPORT_TTL_MS = 5 * 60_000`, `USAGE_LAST_GOOD_RETENTION_MS = 24 * 60 * 60_000` (`packages/ai/src/auth-storage.ts`).\n\n### Client-side single-flight (`RemoteAuthCredentialStore`)\n\nWhen the gateway (or any other broker client) calls `fetchUsageReports()` / `getUsageReport(provider, credential)`, `RemoteAuthCredentialStore` coalesces concurrent calls into a single `GET /v1/usage` round-trip and caches the result for **15 s** in memory.\n\n- `USAGE_CACHE_TTL_MS = 15_000` (`packages/ai/src/auth-broker/remote-store.ts`).\n- A single `#usageInflight` promise is shared across all callers; a per-caller `AbortSignal` is **raced** against the shared promise, not threaded into it, so one caller’s abort never cascades into a peer’s in-flight request.\n- On fetch failure the rejected promise is logged and the awaited value is `null` — callers (`AuthStorage.fetchUsageReports`, `#getUsageReport`) treat a `null` report as \"no usage signal for this cycle\" and proceed without it. **This is the 15 s TTL fallback**: the client absorbs transient broker outages by suppressing the error, returning `null` to ranking, and re-attempting after the 15 s window.\n\nThe 15 s client window deliberately sits below the broker’s 5 min server cache, so almost every client poll is served from the broker’s already-cached value; the client cache exists to absorb the parallel fan-out generated by `AuthStorage.#rankOAuthSelections` into a single broker round-trip.\n\n## Client snapshot cache\n\n`discoverAuthStorage()` persists the broker snapshot to `~/.omp/cache/auth-broker-snapshot.enc` after the initial `/v1/snapshot` fetch and after later broker-sourced full snapshots. The file is AES-256-GCM encrypted with `SHA-256(OMP_AUTH_BROKER_TOKEN)` and authenticated with the broker URL as additional data, so changing either the token or URL makes the cache unreadable. The file is written atomically with mode `0600`.\n\nFreshness is anchored to the broker-stamped `snapshot.generatedAt`, not local write time. Default TTL is 1 h (`OMP_AUTH_BROKER_SNAPSHOT_TTL_MS`); `0` disables the cache and restores the old always-fetch boot path. When the cached snapshot is still fresh, `omp` boots from it and skips the blocking `/v1/snapshot` query. `RemoteAuthCredentialStore` still starts its normal SSE / long-poll background sync immediately, so deleted or rotated credentials reconcile after startup, and expired OAuth access tokens still refresh through `POST /v1/credential/:id/refresh`.\n\nIf the broker is down at boot and a fresh cache exists, startup now succeeds from the cached snapshot. If the cache is missing, expired, corrupt, written for a different URL, or encrypted with a different token, startup falls back to the live fetch and fails the same way it did before if the broker is unreachable.\n\n## Operator opt-in\n\nThe broker is **off** unless `OMP_AUTH_BROKER_URL` (or `auth.broker.url` in `config.yml`) is set. When set, `discoverAuthStorage` in `packages/coding-agent/src/sdk.ts` swaps the local SQLite credential store for `RemoteAuthCredentialStore` and every API call resolves credentials through the broker.\n\n### Environment variables\n\n| Variable | Purpose | Required when |\n| ----------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------- |\n| `OMP_AUTH_BROKER_URL` | Base URL of the remote auth-broker (e.g. `https://broker.tailnet:8765`). Selecting this puts the client in broker mode — local SQLite is bypassed. | Any time the omp client should resolve credentials through a broker (and required by `omp auth-gateway serve`). |\n| `OMP_AUTH_BROKER_TOKEN` | Bearer token used for every broker endpoint except `/v1/healthz`. | When `OMP_AUTH_BROKER_URL` is set and no token is available from `auth.broker.token` or `<config-dir>/auth-broker.token`. |\n| `OMP_AUTH_BROKER_SNAPSHOT_TTL_MS` | Freshness window for the encrypted local snapshot cache. Default `3600000` (1 h); `0` disables cache reads and writes. | Optional in broker mode. |\n| `OMP_AUTH_BROKER_SNAPSHOT_CACHE` | Path override for the encrypted local snapshot cache. Default `~/.omp/cache/auth-broker-snapshot.enc` (or XDG cache equivalent). | Optional in broker mode. |\n\nResolution order in `resolveAuthBrokerConfig()`:\n\n1. `OMP_AUTH_BROKER_URL` env (else `auth.broker.url` from `config.yml`, resolved through `resolveConfigValue`);\n2. `OMP_AUTH_BROKER_TOKEN` env (else `auth.broker.token` from `config.yml`, else `<config-dir>/auth-broker.token`);\n3. URL set but no token resolvable → hard error pointing at the token file path.\n\nThe gateway has no dedicated env vars — it inherits `OMP_AUTH_BROKER_*` because it is itself a broker client.\n\n### `config.yml` keys\n\n| Key | Default | Purpose |\n| ------------------- | ------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| `auth.broker.url` | unset | Same as `OMP_AUTH_BROKER_URL`; env wins. Hidden from the settings UI. Values are resolved as a literal, an environment variable name, or `!<shell command>` to use trimmed stdout. |\n| `auth.broker.token` | unset | Same as `OMP_AUTH_BROKER_TOKEN`; env wins. Values are resolved the same way. |\n\n### Token files\n\n| Path | Owner | Mode |\n| --------------------------------- | ---------------------------------------------------- | ----------------------------- |\n| `<config-dir>/auth-broker.token` | `omp auth-broker serve` (created at first start) | `0600` in a `0700` parent dir |\n| `<config-dir>/auth-gateway.token` | `omp auth-gateway serve` (skipped under `--no-auth`) | `0600` in a `0700` parent dir |\n\n`<config-dir>` resolves to `~/.omp/` (respecting `PI_CONFIG_DIR`).\n\n## Interaction with the local API-key resolution order\n\nThe broker only owns OAuth credentials and provider-API-key credentials that were uploaded to it. The standard credential ladder in `models.md` (`Auth and API key resolution order`) is preserved, with one addition committed alongside the gateway:\n\n- `AuthStorage.setConfigApiKey / removeConfigApiKey / clearConfigApiKeys` let a `models.yml` `apiKey` beat a stored OAuth token **without** overriding an explicit `--api-key`. This is what allows a broker-resolved OAuth credential to be reliably shadowed by a per-environment `models.yml` config key when both are present.\n\n## See also\n\n- [`secrets.md`](./secrets.md) — secret obfuscation around tokens that _do_ leak through (e.g. `OMP_AUTH_BROKER_TOKEN` in shell output).\n- [`models.md`](./models.md) — provider auth resolution order; the broker plugs in at layers 2–3 (stored credentials).\n- [`environment-variables.md`](./environment-variables.md) — full env reference including `OMP_AUTH_BROKER_URL` / `OMP_AUTH_BROKER_TOKEN`.\n",
12
13
  "bash-tool-runtime.md": "# Bash tool runtime\n\nThis document describes the **`bash` tool** runtime path used by agent tool calls, from command normalization to execution, truncation/artifacts, and rendering.\n\nIt also calls out where behavior diverges in interactive TUI, print mode, RPC mode, and user-initiated bang (`!`) shell execution.\n\n## Scope and runtime surfaces\n\nThere are two different bash execution surfaces in coding-agent:\n\n1. **Tool-call surface** (`toolName: \"bash\"`): used when the model calls the bash tool.\n - Entry point: `BashTool.execute()`.\n - Parameters include `command`, optional `env`, `timeout`, `cwd`, `pty`, and, when `async.enabled` is true, `async`.\n2. **User bang-command surface** (`!cmd` from interactive input or RPC `bash` command): session-level helper path.\n - Entry point: `AgentSession.executeBash()`.\n\nBoth eventually use `executeBash()` in `src/exec/bash-executor.ts` for non-PTY execution, but only the tool-call path runs normalization/interception, optional managed background-job handling, and tool renderer logic.\n\nSet `bash.enabled: false` in settings to remove the model-facing `bash` tool from the active tool registry. This does not disable user-initiated bang commands or RPC `bash` requests.\n\n## End-to-end tool-call pipeline\n\n## 1) Input handling and parameter merge\n\n`BashTool.execute()` currently handles input before execution as follows:\n\n- validates optional `env` names against shell-variable syntax,\n- when `bash.stripTrailingHeadTail` is enabled (default), applies conservative native fixups that remove safe trailing `| head` / `| tail` pipes and redundant trailing `2>&1`,\n- extracts a leading single-line `cd <path> && ...` into `cwd` when `cwd` was not supplied,\n- rejects `async: true` when `async.enabled` is false.\n\nThere are no structured `head` or `tail` tool parameters in the current schema. Output limiting is handled by `OutputSink` truncation/artifacts, and the optional trailing-pipe fixup exists to avoid hiding output before the harness can capture it.\n\n## 2) Optional interception (blocked-command path)\n\nIf `bashInterceptor.enabled` is true, `BashTool` loads rules from settings (`getBashInterceptorRules()`) and runs `checkBashInterception()` against the command — checking both the original and the cwd-normalized form (after a leading `cd … &&` is extracted) when they differ.\n\nInterception behavior:\n\n- command is blocked **only** when:\n - regex rule matches, and\n - the suggested tool is present in `ctx.toolNames`.\n- invalid regex rules are silently skipped.\n- on block, `BashTool` throws `ToolError` with message:\n - `Blocked: ...`\n - original command included.\n\nDefault rule patterns (defined in code) target common misuses:\n\n- file readers (`cat`, `head`, `tail`, ...)\n- search tools (`grep`, `rg`, ...)\n- file finders (`find`, `fd`, ...)\n- in-place editors (`sed -i`, `perl -i`, `awk -i inplace`)\n- shell redirection writes (`echo ... > file`, heredoc redirection)\n\n### Caveat\n\n`InterceptionResult` includes `suggestedTool`, but `BashTool` currently surfaces only the message text (no structured suggested-tool field in `details`).\n\n## 3) CWD validation and timeout clamping\n\n`cwd` is resolved relative to session cwd (`resolveToCwd`), then validated via `stat`:\n\n- missing path -> `ToolError(\"Working directory does not exist: ...\")`\n- non-directory -> `ToolError(\"Working directory is not a directory: ...\")`\n\nTimeout is clamped to `[1, 3600]` seconds and converted to milliseconds.\n\n## 4) Artifact allocation\n\nBefore execution, the tool allocates an artifact path/id (best-effort) for truncated output storage.\n\n- artifact allocation failure is non-fatal (execution continues without artifact spill file),\n- artifact id/path are passed into execution path for full-output persistence on truncation.\n\n## 5) PTY vs non-PTY execution selection\n\nPTY eligibility is decided by `canUseInteractiveBashPty(pty, ctx)` (`src/tools/bash-pty-selection.ts`); the local PTY overlay runs only when all are true:\n\n- tool input `pty === true`\n- `PI_NO_PTY !== \"1\"`\n- tool context has UI (`ctx.hasUI === true` and `ctx.ui` set)\n\nIf `pty` is requested but unavailable, the call falls back to non-PTY and appends a `pty requested but unavailable …` notice.\n\nBefore the local PTY/non-PTY choice, a foreground (`async: false`) call can route to a managed background job (auto-backgrounding; see below) or — when the session's client advertises a terminal capability (`clientBridge.capabilities.terminal` + `createTerminal`, with `pty` false) — to a **client-bridge editor terminal** that runs the command remotely (streaming `terminalId` updates, killing on timeout, mapping a signal kill to exit code `137`). Otherwise it uses non-interactive `executeBash()`.\n\nThat means print mode and non-UI RPC/tool contexts always use non-PTY.\n\n## Non-interactive execution engine (`executeBash`)\n\n## Shell session reuse model\n\n`executeBash()` caches native `Shell` instances in a process-global map keyed by:\n\n- shell path,\n- configured command prefix,\n- snapshot path,\n- serialized shell env,\n- optional agent session key,\n- minimizer configuration.\n\nSession-level bang-command executions pass `sessionKey: this.sessionId`.\n\nTool-call executions pass `sessionKey: this.session.getSessionId?.()`, when available. In both surfaces, a session key isolates shell reuse per session; without one, reuse falls back to shell config/snapshot/env.\n\nConcurrent calls never share one `Shell`: the native session runs one command at a time and `Shell.abort()` kills every in-flight run on it. `executeBash()` tracks in-flight keys in `shellSessionsInUse`; while a key is busy, overlapping calls skip the cache and run through one-shot `executeShell()` (same isolation as quarantined sessions). Only the owning call releases the in-use flag or deletes the cached session in its `finally`.\n\n## Shell config and snapshot behavior\n\nAt each call, executor loads settings shell config (`shell`, `env`, optional `prefix`).\n\nIf selected shell includes `bash`, it attempts `getOrCreateSnapshot()`:\n\n- snapshot captures aliases/functions/options from user rc,\n- snapshot creation is best-effort,\n- failure falls back to no snapshot.\n\nIf `prefix` is configured, command becomes:\n\n```text\n<prefix> <command>\n```\n\nThe per-command child environment is built by `buildNonInteractiveEnv()` (`src/exec/non-interactive-env.ts`), which layers non-interactive hardening defaults **under** the caller's `env` overrides:\n\n- pagers disabled (`PAGER=cat`, `GIT_PAGER=cat`, … and `LESS=FRX`),\n- editor prompts disabled (`GIT_EDITOR=true`, `EDITOR=true`, `VISUAL=true`),\n- terminal/credential prompts reduced (`TERM=dumb`, `GIT_TERMINAL_PROMPT=0`, `SSH_ASKPASS=/usr/bin/false`, `NO_COLOR=1`, `CI=1`),\n- package-manager/tooling automation flags for non-interactive behavior (npm/pnpm/yarn/pip/cargo/terraform/gh, …),\n- on Windows, UTF-8 locale/codepage defaults are added when absent.\n\n## Streaming and cancellation\n\n`Shell.run()` streams chunks to `OutputSink` and optional `onChunk` callback.\n\nCancellation:\n\n- aborted signal triggers `shellSession.abort(...)`,\n- timeout from native result is mapped to `cancelled: true` + annotation text,\n- explicit cancellation similarly returns `cancelled: true` + annotation.\n\nNo exception is thrown inside executor for timeout/cancel; it returns structured `BashResult` and lets caller map error semantics.\n\n## Interactive PTY path (`runInteractiveBashPty`)\n\nWhen PTY is enabled, tool runs `runInteractiveBashPty()` which opens an overlay console component and drives a native `PtySession`.\n\nBehavior highlights:\n\n- xterm-headless virtual terminal renders viewport in overlay,\n- keyboard input is normalized (including Kitty sequences and application cursor mode handling),\n- `esc` while running kills the PTY session,\n- terminal resize propagates to PTY (`session.resize(cols, rows)`).\n\nUnlike the non-PTY engine, the interactive PTY path does **not** apply the non-interactive hardening. It inherits the user's environment and sets a real `TERM=xterm-256color` (applied as an override on the Rust side) so editors, pagers, and TUIs behave like a normal terminal.\n\nPTY output is normalized (`CRLF`/`CR` to `LF`, `sanitizeText`) and written into `OutputSink`, including artifact spill support.\n\nOn PTY startup/runtime error, sink receives `PTY error: ...` line and command finalizes with undefined exit code.\n\n## Output handling: streaming, truncation, artifact spill\n\nBoth PTY and non-PTY paths use `OutputSink`.\n\n## OutputSink semantics\n\nThe bash executor builds the sink with `headBytes` and `maxColumns` from settings (`resolveOutputSinkHeadBytes` / `resolveOutputMaxColumns`).\n\n- keeps a UTF-8-safe rolling **tail** window (`spillThreshold`, `DEFAULT_MAX_BYTES`, currently 50KB); on overflow it trims to the tail (UTF-8 boundary safe) and marks `truncated`,\n- when `headBytes > 0` (`tools.artifactHeadBytes`, default 20KB) it also retains a **head** window and elides the middle, splicing an elision marker between head and tail in `dump()`,\n- per-line column cap: when `maxColumns > 0` (`tools.outputMaxColumns`, default 768 bytes) over-wide lines are ellipsis-truncated at write time and the rest of the line is dropped,\n- tracks total bytes/lines seen,\n- mirrors the **raw, uncapped** stream to the artifact file when output overflows, a column cap dropped bytes, or the file is already active,\n- marks `truncated` on tail overflow, middle elision, column-cap drops, or file spill.\n\n`dump()` returns:\n\n- `output` (possibly annotated prefix),\n- `truncated`,\n- `totalLines/totalBytes`,\n- `outputLines/outputBytes`,\n- `elidedBytes/elidedLines` when the middle was elided,\n- `columnDroppedBytes/columnTruncatedLines` when the per-line cap fired,\n- `artifactId` if artifact file was active.\n\n### Long-output caveat\n\nRuntime truncation is byte-threshold based in `OutputSink` (50KB tail window by default, plus an optional head window for middle elision). It does not enforce a hard line-count cap in this code path.\n\n### Shell output minimizer\n\nNon-PTY execution also passes shell-minimizer settings into the native `Shell` session. When the minimizer rewrites verbose output, the executor replaces the sink's visible text with the minimized text and, when possible, saves the raw original capture as a separate `bash-original` artifact referenced by a `[raw output: artifact://<id>]` footer.\n\n## Live tool updates and async jobs\n\nFor non-PTY foreground execution, `BashTool` uses a separate `TailBuffer` for partial updates and emits `onUpdate` snapshots while command is running.\n\nFor PTY execution, live rendering is handled by custom UI overlay, not by `onUpdate` text chunks.\n\nWhen `async.enabled` is true and the call passes `async: true`, `BashTool` starts a managed bash job, returns a running job result with a job id, and stores completion through the session managed-job path. Auto-backgrounding can also start this path after `bash.autoBackground.thresholdMs`.\n\n## Result shaping, metadata, and error mapping\n\nAfter execution:\n\n1. `cancelled` handling:\n - if abort signal is aborted -> throw `ToolAbortError` (abort semantics),\n - else -> throw `ToolError` (treated as tool failure).\n2. PTY `timedOut` -> throw `ToolError`.\n3. empty output becomes `(no output)`.\n4. attach truncation metadata via `toolResult(...).truncationFromSummary(result, { direction: \"tail\" })`.\n5. exit-code mapping:\n - missing exit code -> throw `ToolError(\"... missing exit status\")`\n - non-zero exit -> error result with `\"Command exited with code N\"` and `details.exitCode`\n - zero exit -> success result.\n\nSuccess payload structure:\n\n- `content`: text output,\n- `details.meta.truncation` when truncated, including:\n - `direction`, `truncatedBy`, total/output line+byte counts,\n - `shownRange`,\n - `artifactId` when available.\n\nBecause built-in tools are wrapped with `wrapToolWithMetaNotice()`, truncation notice text is appended to final text content automatically (for example: `Read artifact://<id> for full output`).\n\n## Rendering paths\n\n## Tool-call renderer (`bashToolRenderer`)\n\n`bashToolRenderer` is used for tool-call messages (`toolCall` / `toolResult`):\n\n- collapsed mode shows visual-line-truncated preview,\n- expanded mode shows all currently available output text,\n- warning line includes truncation reason and `artifact://<id>` when truncated,\n- timeout value (from args) is shown in footer metadata line.\n\n### Caveat: full artifact expansion\n\n`BashRenderContext` has `isFullOutput`, but current renderer context builder does not set it for bash tool results. Expanded view still uses the text already in result content (tail/truncated output) unless another caller provides full artifact content.\n\n## User bang-command component (`BashExecutionComponent`)\n\n`BashExecutionComponent` is for user `!` commands in interactive mode (not model tool calls):\n\n- streams chunks live,\n- collapsed preview keeps last 20 logical lines,\n- line clamp at 4000 chars per line,\n- shows truncation + artifact warnings when metadata is present,\n- marks cancelled/error/exit state separately.\n\nThis component is wired by `CommandController.handleBashCommand()` and fed from `AgentSession.executeBash()`.\n\n## Mode-specific behavior differences\n\n| Surface | Entry path | PTY eligible | Live output UX | Error surfacing |\n| ------------------------------ | ----------------------------------------------------- | ----------------------------------------------------- | ------------------------------------------------------------------------ | ------------------------------------------------ |\n| Interactive tool call | `BashTool.execute` | Yes, when `pty=true` and UI exists and `PI_NO_PTY!=1` | PTY overlay (interactive) or streamed tail updates | Tool errors become `toolResult.isError` |\n| Print mode tool call | `BashTool.execute` | No (no UI context) | No TUI overlay; output appears in event stream/final assistant text flow | Same tool error mapping |\n| RPC tool call (agent tooling) | `BashTool.execute` | Usually no UI -> non-PTY | Structured tool events/results | Same tool error mapping |\n| Interactive bang command (`!`) | `AgentSession.executeBash` + `BashExecutionComponent` | No (uses executor directly) | Dedicated bash execution component | Controller catches exceptions and shows UI error |\n| RPC `bash` command | `rpc-mode` -> `session.executeBash` | No | Returns `BashResult` directly | Consumer handles returned fields |\n\n## Operational caveats\n\n- Interceptor only blocks commands when suggested tool is currently available in context.\n- If artifact allocation fails, truncation still occurs but no `artifact://` back-reference is available.\n- Shell session cache has no explicit eviction in this module; lifetime is process-scoped.\n- PTY and non-PTY timeout surfaces differ:\n - PTY exposes explicit `timedOut` result field,\n - non-PTY maps timeout into `cancelled + annotation` summary.\n\n## Implementation files\n\n- [`src/tools/bash.ts`](../packages/coding-agent/src/tools/bash.ts) — tool entrypoint, input handling/interception, async and PTY/non-PTY selection, result/error mapping, bash tool renderer.\n- [`src/tools/bash-pty-selection.ts`](../packages/coding-agent/src/tools/bash-pty-selection.ts) — `canUseInteractiveBashPty` predicate for choosing the local PTY overlay.\n- [`src/tools/bash-command-fixup.ts`](../packages/coding-agent/src/tools/bash-command-fixup.ts) — native-backed conservative cleanup for trailing `head`/`tail` pipes and redundant `2>&1`.\n- [`src/tools/bash-interceptor.ts`](../packages/coding-agent/src/tools/bash-interceptor.ts) — interceptor rule matching and blocked-command messages.\n- [`src/exec/bash-executor.ts`](../packages/coding-agent/src/exec/bash-executor.ts) — non-PTY executor, shell session reuse, cancellation wiring, output sink integration.\n- [`src/exec/non-interactive-env.ts`](../packages/coding-agent/src/exec/non-interactive-env.ts) — non-interactive child-process env defaults (`buildNonInteractiveEnv`) used by the non-PTY executor.\n- [`src/tools/bash-interactive.ts`](../packages/coding-agent/src/tools/bash-interactive.ts) — PTY runtime, overlay UI, input normalization, and interactive `TERM` setup.\n- [`src/session/streaming-output.ts`](../packages/coding-agent/src/session/streaming-output.ts) — `OutputSink`, `TailBuffer`, truncation/artifact spill, and summary metadata.\n- [`src/tools/output-meta.ts`](../packages/coding-agent/src/tools/output-meta.ts) — truncation metadata shape + notice injection wrapper.\n- [`src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts) — session-level `executeBash`, message recording, abort lifecycle.\n- [`src/modes/components/bash-execution.ts`](../packages/coding-agent/src/modes/components/bash-execution.ts) — interactive `!` command execution component.\n- [`src/modes/controllers/command-controller.ts`](../packages/coding-agent/src/modes/controllers/command-controller.ts) — wiring for interactive `!` command UI stream/update completion.\n- [`src/modes/rpc/rpc-mode.ts`](../packages/coding-agent/src/modes/rpc/rpc-mode.ts) — RPC `bash` and `abort_bash` command surface.\n- [`src/internal-urls/artifact-protocol.ts`](../packages/coding-agent/src/internal-urls/artifact-protocol.ts) — `artifact://<id>` resolution.\n",
13
14
  "blob-artifact-architecture.md": "# Blob and artifact storage architecture\n\nThis document describes how coding-agent stores large/binary payloads outside session JSONL, how truncated tool output is persisted, and how internal URLs (`artifact://`, `agent://`) resolve back to stored data.\n\n## Why two storage systems exist\n\nThe runtime uses two different persistence mechanisms for different data shapes:\n\n- **Content-addressed blobs** (`blob:sha256:<hash>`): global storage used to externalize large image base64 payloads and provider image data URLs from persisted session entries.\n- **Session-scoped artifacts** (files under `<sessionFile-without-.jsonl>/`): per-session text files used for full tool outputs and subagent outputs.\n\nThey are intentionally separate:\n\n- blob storage optimizes deduplication and stable references by content hash,\n- artifact storage optimizes append-only session tooling and human/tool retrieval by local IDs.\n\n## Storage boundaries and on-disk layout\n\n### Blob store boundary (global)\n\n`SessionManager` constructs `BlobStore(getBlobsDir())`, so blob files live in a shared global blob directory, not in a session folder.\n\nBlob file naming:\n\n- file path: `<blobsDir>/<sha256-hex>`\n- canonical file has no extension; when an extension is supplied (image MIME type), a typed sidecar `<sha256-hex>.<ext>` is hardlinked (or copied) next to it so OS openers can type-detect\n- reference string stored in entries: `blob:sha256:<sha256-hex>`\n\nImplications:\n\n- same binary content across sessions resolves to the same hash/path,\n- writes are idempotent at the content level,\n- blobs can outlive any individual session file.\n\n## Artifact boundary (session-local)\n\n`ArtifactManager` derives artifact directory from session file path:\n\n- session file: `.../<timestamp>_<sessionId>.jsonl`\n- artifacts directory: `.../<timestamp>_<sessionId>/` (strip `.jsonl`)\n\nArtifact types share this directory:\n\n- truncated tool output files: `<numericId>.<toolType>.log` (for `artifact://`)\n- subagent output files: `<outputId>.md` (for `agent://`)\n- subagent session JSONL sidecars: `<outputId>.jsonl` when task execution receives an artifacts directory\n\nSubagents can adopt the parent `ArtifactManager`; in that case parent and subagent tree share one artifact directory and numeric artifact ID space.\n\n## ID and name allocation schemes\n\n### Blob IDs: content hash\n\n`BlobStore.put()` / `putSync()` computes SHA-256 over the bytes it is given and returns:\n\n- `hash`: hex digest,\n- `path`: `<blobsDir>/<hash>`,\n- `displayPath`: `<blobsDir>/<hash>.<ext>` when an extension was supplied, otherwise the canonical path,\n- `ref`: `blob:sha256:<hash>`.\n\nNo session-local counter is used.\n\n### Artifact IDs: session-local monotonic integer\n\n`ArtifactManager` scans existing `*.log` artifact files on first directory-backed allocation to find max existing numeric ID and sets `nextId = max + 1`.\n\nAllocation behavior:\n\n- file format: `{id}.{toolType}.log`\n- IDs are sequential strings (`\"0\"`, `\"1\"`, ...)\n- resume does not overwrite existing artifacts because scan happens before allocation\n- the directory is created lazily on first save/allocation\n\nIf the artifact directory is missing, scanning yields an empty list and allocation starts from `0`.\n\nNon-persistent sessions without an adopted manager can store `saveArtifact(...)` content in memory under numeric IDs, but `artifact://` resolution is file-backed through registered artifact directories.\n\n### Agent output IDs (`agent://`)\n\n`AgentOutputManager` allocates IDs for subagent outputs from the requested name, used verbatim the first time and suffixed (`-2`, `-3`, …) only when the same name repeats (e.g. `Anna`, `Anna-2`). Nested outputs are grouped under the parent prefix (e.g. `Parent.Child`). It scans existing `.md` files on initialization so a resumed session never reuses a name that would clobber a prior output.\n\n## Persistence dataflow\n\n### 1) Session entry persistence rewrite path\n\nBefore a session entry is written — incremental append (`#appendToSessionFile`) or a full-file rewrite (`#rewriteSynchronously` / `#rewriteAtomically`) — `SessionManager` serializes it through `#lineFor()`, which runs `prepareEntryForPersistence()` over the truncation pipeline.\n\nKey behaviors:\n\n1. **Large string truncation**: oversized strings are cut and suffixed with `\"[Session persistence truncated large content]\"`; signature fields (`thinkingSignature`, `thoughtSignature`, `textSignature`) are cleared instead of truncated.\n2. **Transient field stripping**: `partialJson` and `jsonlEvents` are removed from persisted entries.\n3. **Image externalization to blobs**:\n - image blocks in `content` arrays are externalized when `data` is not already a blob ref and base64 length is at least threshold (`BLOB_EXTERNALIZE_THRESHOLD = 1024`),\n - provider-style `image_url` data URLs are externalized when they start with `data:image/` and contain `;base64,`,\n - image block `data` is stored as decoded binary bytes,\n - provider data URLs are stored as the original UTF-8 data URL string,\n - persisted values are replaced with `blob:sha256:<hash>`.\n\nThis keeps session JSONL compact while preserving recoverability.\n\n### 2) Session load rehydration path\n\nWhen opening a session (`setSessionFile`), after migrations, `SessionManager` runs `resolveBlobRefsInEntries()`.\n\nFor message/custom-message image blocks with `blob:sha256:<hash>` and for persisted provider `image_url` fields with blob refs:\n\n- reads blob bytes from blob store,\n- converts image-block bytes back to base64,\n- converts provider `image_url` blobs back to the original string,\n- mutates in-memory entry fields for runtime consumers.\n\nIf a blob is missing:\n\n- image-block resolution logs a warning and keeps the original `blob:sha256:` ref string in memory,\n- provider `image_url` resolution logs a warning and keeps the original ref string,\n- load continues.\n\n### 3) Tool output spill/truncation path\n\n`OutputSink` powers streaming output in bash/python/ssh and related executors.\n\nBehavior:\n\n1. Every chunk is sanitized with `sanitizeWithOptionalSixelPassthrough(..., sanitizeText)` and appended to in-memory accounting.\n2. Optional live `onChunk` receives sanitized pre-column-cap chunks, throttled if configured.\n3. A per-line column cap can drop bytes from long lines in the LLM-facing buffer; when this happens, artifact mirroring starts so the on-disk file keeps the full sanitized stream.\n4. When the in-memory tail buffer would exceed spill threshold (`DEFAULT_MAX_BYTES`, 50KB), sink marks output truncated and starts artifact mirroring if an artifact path is available.\n5. If a file sink is opened, it first writes the current buffer, then all queued/subsequent sanitized chunks.\n6. In-memory buffer is trimmed to a tail window, or to head + elision marker + tail when head retention is configured.\n7. `dump()` returns summary including `artifactId` only when file sink creation succeeded.\n\nPractical effect:\n\n- UI/tool return shows bounded output,\n- full sanitized output is preserved in artifact file and referenced as `artifact://<id>` when file-backed artifact mirroring succeeded.\n\nIf file sink creation fails (I/O error, missing path, etc.), sink falls back to in-memory truncation only; full output is not persisted.\n\n## URL access model\n\n### `blob:` references\n\n`blob:sha256:<hash>` is a persistence reference inside session entry payloads, not an internal URL scheme handled by the router. Resolution is done by `SessionManager` during session load.\n\n### `artifact://<id>`\n\nHandled by `ArtifactProtocolHandler` over registered active session artifact directories:\n\n- requires a numeric ID,\n- searches each registered artifacts directory for filename prefix `<id>.`,\n- returns raw text (`text/plain`) from the matched `.log` file,\n- when missing, error includes available numeric artifact IDs from existing artifact files.\n\nFailure behavior:\n\n- if no artifact directories are registered: throws `No session - artifacts unavailable`,\n- if registered directories exist but none are present on disk: throws `No artifacts directory found`,\n- if ID is not numeric: throws `artifact:// ID must be numeric, got: <id>`.\n\n### `agent://<id>`\n\nHandled by `AgentProtocolHandler` over registered active session artifact directories and `<artifactsDir>/<id>.md`:\n\n- plain form returns markdown text,\n- `/path` or `?q=` forms perform JSON extraction,\n- path and query extraction cannot be combined,\n- if extraction requested, file content must parse as JSON.\n\nFailure behavior:\n\n- if no artifact directories are registered: throws `No session - agent outputs unavailable`,\n- if registered directories exist but none are present on disk: throws `No artifacts directory found`,\n- missing output throws `Not found: <id>` with available `.md` output IDs when directory listing succeeds.\n\nRead tool integration:\n\n- `read` supports offset/limit pagination for non-extraction internal URL reads,\n- rejects offset/limit when `agent://` extraction is used.\n\n## Resume, fork, and move semantics\n\n### Resume\n\n- `ArtifactManager` scans existing `{id}.*.log` files on first allocation and continues numbering.\n- `AgentOutputManager` scans existing `.md` output IDs and continues numbering.\n- `SessionManager` rehydrates blob refs to base64/data URLs on load.\n\n### Fork\n\n`SessionManager.fork()` creates a new session file with new session ID and `parentSession` link, then returns old/new file paths. Artifact copying is handled by `AgentSession.fork()`:\n\n- flushes current session first,\n- attempts recursive copy of old artifact directory to new artifact directory,\n- missing old directory is tolerated,\n- non-ENOENT copy errors are logged as warnings and fork still completes.\n\nID implications after fork:\n\n- if copy succeeded, artifact counters in the new session continue after max copied ID when the new `ArtifactManager` first scans,\n- if copy failed/skipped, new session artifact IDs start from `0`.\n\nBlob implications after fork:\n\n- blobs are global and content-addressed, so no blob directory copy is required.\n\n### Move to new cwd\n\n`SessionManager.moveTo()` renames both session file and artifact directory to the new default session directory, with rollback logic if a later step fails. This preserves artifact identity while relocating session scope.\n\n## Failure handling and fallback paths\n\n| Case | Behavior |\n| --------------------------------------------------------- | -------------------------------------------------------------------- |\n| Blob file missing during image-block rehydration | Warn and keep `blob:sha256:` ref string in memory |\n| Blob file missing during provider `image_url` rehydration | Warn and keep `blob:sha256:` ref string in memory |\n| Blob read ENOENT via `BlobStore.get` | Returns `null` |\n| Artifact directory missing (`ArtifactManager.listFiles`) | Returns empty list (allocation can start fresh) |\n| No registered artifact dirs (`artifact://`) | Throws `No session - artifacts unavailable` |\n| No registered artifact dirs (`agent://`) | Throws `No session - agent outputs unavailable` |\n| Registered artifact dirs missing on disk | Throws explicit `No artifacts directory found` |\n| Artifact ID not found | Throws with available IDs listing |\n| OutputSink artifact writer init fails | Continues with bounded in-memory output only |\n| Non-persistent `saveArtifact` | Stores text in `SessionManager` memory map; not file-backed URL data |\n\n## Binary blob externalization vs text-output artifacts\n\n- **Blob externalization** is for image payloads inside persisted session entry content and provider image data URLs; it replaces inline payload strings in JSONL with stable content refs.\n- **Artifacts** are plain text files for execution output and subagent output; file-backed artifacts are addressable by session-local IDs through internal URLs.\n\nThe two systems intersect only indirectly: both reduce session JSONL bloat, but they have different identity, lifetime, and retrieval paths.\n\n## Implementation files\n\n- [`src/session/blob-store.ts`](../packages/coding-agent/src/session/blob-store.ts) — blob reference format, hashing, put/get, externalize/resolve helpers.\n- [`src/session/artifacts.ts`](../packages/coding-agent/src/session/artifacts.ts) — session artifact directory model and numeric artifact ID/path allocation.\n- [`src/session/streaming-output.ts`](../packages/coding-agent/src/session/streaming-output.ts) — `OutputSink` truncation/spill-to-file behavior and summary metadata.\n- [`src/session/session-manager.ts`](../packages/coding-agent/src/session/session-manager.ts) — `BlobStore`/`ArtifactManager` construction, persistence-transform and blob-rehydration call sites, session fork/move interactions.\n- [`src/session/session-persistence.ts`](../packages/coding-agent/src/session/session-persistence.ts) — `prepareEntryForPersistence()`: large-string truncation, transient-field stripping, and synchronous image-blob externalization.\n- [`src/session/session-loader.ts`](../packages/coding-agent/src/session/session-loader.ts) — `resolveBlobRefsInEntries()`: blob-ref rehydration to base64 / data URLs on load.\n- [`src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts) — artifact directory copy during interactive fork.\n- [`src/internal-urls/artifact-protocol.ts`](../packages/coding-agent/src/internal-urls/artifact-protocol.ts) — `artifact://` resolver.\n- [`src/internal-urls/agent-protocol.ts`](../packages/coding-agent/src/internal-urls/agent-protocol.ts) — `agent://` resolver + JSON extraction.\n- [`src/internal-urls/router.ts`](../packages/coding-agent/src/internal-urls/router.ts) — internal URL router wiring.\n- [`src/task/output-manager.ts`](../packages/coding-agent/src/task/output-manager.ts) — session-scoped agent output ID allocation for `agent://`.\n- [`src/task/executor.ts`](../packages/coding-agent/src/task/executor.ts) — subagent output artifact writes (`<id>.md`) and session JSONL sidecars.\n",
@@ -16,15 +17,15 @@ export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
16
17
  "config-usage.md": "# Configuration Discovery and Resolution\n\nThis document describes how the coding-agent resolves configuration today: which roots are scanned, how precedence works, and how resolved config is consumed by settings, skills, hooks, tools, and extensions.\n\n## Scope\n\nPrimary implementation:\n\n- `packages/coding-agent/src/config.ts`\n- `packages/coding-agent/src/config/config-file.ts` (re-exported from `config.ts`)\n- `packages/coding-agent/src/config/settings.ts`\n- `packages/coding-agent/src/config/settings-schema.ts`\n- `packages/coding-agent/src/discovery/builtin.ts`\n- `packages/coding-agent/src/discovery/helpers.ts`\n\nKey integration points:\n\n- `packages/coding-agent/src/capability/index.ts`\n- `packages/coding-agent/src/discovery/index.ts`\n- `packages/coding-agent/src/extensibility/skills.ts`\n- `packages/coding-agent/src/extensibility/hooks/loader.ts`\n- `packages/coding-agent/src/extensibility/custom-tools/loader.ts`\n- `packages/coding-agent/src/extensibility/extensions/loader.ts`\n\n---\n\n## Resolution flow (visual)\n\n```text\n Generic helper order (`config.ts`)\n┌───────────────────────────────────────┐\n│ 1) ~/.omp/agent, ~/.claude, ... │\n│ 2) <cwd>/.omp, <cwd>/.claude, ... │\n└───────────────────────────────────────┘\n │\n ▼\n capability providers enumerate items\n (native provider scans project .omp before user .omp;\n other providers have their own loading rules)\n │\n ▼\n provider priority sort + capability dedup\n │\n ▼\n subsystem-specific consumption\n (settings, skills, hooks, tools, extensions)\n```\n\n## 1) Config roots and source order\n\n## Canonical roots\n\n`src/config.ts` defines a fixed source priority list:\n\n1. `.omp` (native)\n2. `.claude`\n3. `.codex`\n4. `.gemini`\n\nUser-level bases:\n\n- `~/.omp/agent`\n- `~/.claude`\n- `~/.codex`\n- `~/.gemini`\n\nProject-level bases:\n\n- `<cwd>/.omp`\n- `<cwd>/.claude`\n- `<cwd>/.codex`\n- `<cwd>/.gemini`\n\n`CONFIG_DIR_NAME` is `.omp` (`packages/utils/src/dirs.ts`).\n\n## Profiles\n\nA named profile (`omp --profile <name>`, the `--alias` shortcut, or `OMP_PROFILE` / `PI_PROFILE`) relocates the OMP user base. When a profile is active, every OMP-native user-level path written here as `~/.omp/agent/...` resolves to `~/.omp/profiles/<name>/agent/...` instead.\n\nThe relocation is uniform across the native provider (`builtin.ts`) and the generic `config.ts` helpers, so it covers slash commands, rules, prompts, instructions, hooks, tools, extensions, settings, skills, and MCP, plus the top-level `SYSTEM.md` / `RULES.md` / `AGENTS.md` files and runtime state (sessions, blobs, `agent.db`). A profile sees only its own OMP config, never the default profile's `~/.omp/agent`.\n\nThe other source bases are not profile-scoped and load identically under every profile: the external-tool bases (`~/.claude`, `~/.codex`, `~/.gemini`) belong to those tools, and the project-level bases (`<cwd>/.omp`, `<cwd>/.claude`, ...) are keyed to the working directory. Throughout this document, read `~/.omp/agent` as shorthand for the active profile's agent directory.\n\n## Important constraint\n\nThe generic helpers in `src/config.ts` do **not** include `.pi` in source discovery order.\n\n---\n\n## 2) Core discovery helpers (`src/config.ts`)\n\n## `getConfigDirs(subpath, options)`\n\nReturns ordered entries:\n\n- User-level entries first (by source priority)\n- Then project-level entries (by same source priority)\n\nOptions:\n\n- `user` (default `true`)\n- `project` (default `true`)\n- `cwd` (default `getProjectDir()`)\n- `existingOnly` (default `false`)\n\nThis API is used for directory-based config lookups (commands, hooks, tools, agents, etc.).\n\n## `findConfigFile(subpath, options)` / `findConfigFileWithMeta(...)`\n\nSearches for the first existing file across ordered bases, returns first match (path-only or path+metadata).\n\n## `findAllNearestProjectConfigDirs(subpath, cwd)`\n\nWalks parent directories upward and returns the **nearest existing directory per source base** (`.omp`, `.claude`, `.codex`, `.gemini`), then sorts results by source priority.\n\nUse this when project config should be inherited from ancestor directories (monorepo/nested workspace behavior).\n\n---\n\n## 3) File config wrapper (`ConfigFile<T>` in `src/config/config-file.ts`, re-exported from `src/config.ts`)\n\n`ConfigFile<T>` is the schema-validated loader for single config files.\n\nSupported formats:\n\n- `.yml` / `.yaml`\n- `.json` / `.jsonc`\n\nBehavior:\n\n- Validates parsed data against a provided Zod schema.\n- Caches load result until `invalidate()`.\n- Returns tri-state result via `tryLoad()`:\n - `ok`\n - `not-found`\n - `error` (`ConfigError` with schema/parse context)\n\nLegacy migration still supported:\n\n- If target path is `.yml`/`.yaml`, a sibling `.json` is auto-migrated once (`migrateJsonToYml`).\n\n---\n\n## 4) Settings resolution model (`src/config/settings.ts`)\n\nThe runtime settings model is layered:\n\n1. Global settings: `~/.omp/agent/config.yml`\n2. Project settings: discovered via settings capability (`settings.json` and `config.yml` from providers)\n3. CLI config overlays: `omp --config <path>` / repeated `--config` files, loaded as `config.yml`-style YAML for this process only\n4. Runtime overrides: in-memory, non-persistent\n5. Schema defaults: from `SETTINGS_SCHEMA`\n\nEffective precedence:\n\n`defaults <- global <- project <- CLI config overlays <- overrides`\n\nWrite behavior:\n\n- `settings.set(...)` writes to the **global** layer (`config.yml`) and queues background save.\n- Project settings are read-only from capability discovery.\n\n## Migration behavior still active\n\nOn startup, if `config.yml` is missing:\n\n1. Migrate from `~/.omp/agent/settings.json` (renamed to `.bak` on success)\n2. Merge with legacy DB settings from `agent.db`\n3. Write merged result to `config.yml`\n\nField-level migrations in `#migrateRawSettings`:\n\n- `queueMode` -> `steeringMode`\n- `ask.timeout` milliseconds -> seconds when old value looks like ms (`> 1000`)\n- Legacy flat `theme: \"...\"` -> `theme.dark/theme.light` structure\n\n---\n\n## 5) Capability/discovery integration\n\nMost non-core config loading flows through the capability registry (`src/capability/index.ts` + `src/discovery/index.ts`).\n\n## Provider ordering\n\nProviders are sorted by numeric priority (higher first). Example priorities:\n\n- Native OMP (`builtin.ts`): `100`\n- Claude: `80`\n- Codex / agents / Claude marketplace: `70`\n- Gemini: `60`\n\n```text\nProvider precedence (higher wins)\n\nnative (.omp) priority 100\nclaude priority 80\ncodex / agents / ... priority 70\ngemini priority 60\n```\n\n## Dedup semantics\n\nCapabilities define a `key(item)`:\n\n- same key => first item wins (higher-priority/earlier-loaded item)\n- no key (`undefined`) => no dedup, all items retained\n\nRelevant keys:\n\n- skills: `name`\n- tools: `name`\n- hooks: `${type}:${tool}:${name}`\n- extension modules: `name`\n- extensions: `name`\n- settings: no dedup (all items preserved)\n\n---\n\n## 6) Native `.omp` provider behavior (`packages/coding-agent/src/discovery/builtin.ts`)\n\nNative provider (`id: native`) reads native config from:\n\n- project: `<cwd>/.omp/...`\n- user: `~/.omp/agent/...`\n\n### Directory admission rules\n\n- Slash commands, rules, prompts, instructions, hooks, tools, extensions, extension modules, and settings use a project/user root only when the root directory exists and is non-empty.\n- Skills scan `<ancestor>/.omp/skills` for each ancestor from the current working directory up to the repo root/home boundary, plus `~/.omp/agent/skills`, without requiring the root `.omp` directory itself to be non-empty.\n- `SYSTEM.md` and `AGENTS.md` read user-level files directly and use nearest-ancestor project `.omp` lookup for project files, but the project `.omp` directory must be non-empty. See [`docs/system-prompt-customization.md`](./system-prompt-customization.md) for the full `SYSTEM.md` / `APPEND_SYSTEM.md` contract (replace vs. append, templating).\n\n### Scope-specific loading\n\n- Skills: `<ancestor>/.omp/skills/*/SKILL.md` and `~/.omp/agent/skills/*/SKILL.md`\n- Slash commands: `commands/*.md`\n- Rules: `rules/*.{md,mdc}`\n- Prompts: `prompts/*.md`\n- Instructions: `instructions/*.md`\n- Hooks: `hooks/pre/*`, `hooks/post/*`\n- Tools: `tools/*.{json,md,ts,js,sh,bash,py}` and `tools/<name>/index.ts`\n- Extension modules: discovered under `extensions/` (+ legacy `settings.json.extensions` string array)\n- Extensions: `extensions/<name>/gemini-extension.json`\n- Settings capability: `settings.json`, then `config.yml`\n\n### Nearest-project lookup nuance\n\n## For `SYSTEM.md` and `AGENTS.md`, native provider uses nearest-ancestor project `.omp` directory search (walk-up) and still requires the project `.omp` dir to be non-empty.\n\n## 7) How major subsystems consume config\n\n## Settings subsystem\n\n- `Settings.init()` loads global `config.yml` + discovered project settings capability items.\n- Only capability items with `level === \"project\"` are merged into project layer.\n\n### Session title prompt override\n\nCreate `TITLE_SYSTEM.md` in the same config locations as `SYSTEM.md` / `APPEND_SYSTEM.md`:\n\n```text\n# ~/.omp/agent/TITLE_SYSTEM.md\nGenerate a session name using lowercase `<type>:<primary-objective>`.\n```\n\n- Missing `TITLE_SYSTEM.md` keeps the bundled title prompts.\n- Discovery uses the same project-then-user config directory pattern as `SYSTEM.md`: project `.omp/TITLE_SYSTEM.md` first, then user `~/.omp/agent/TITLE_SYSTEM.md` and the other supported config bases.\n- The override replaces only the automatic session-title generation system prompt; normal `SYSTEM.md` / `APPEND_SYSTEM.md` prompt customization is unaffected.\n- The online path forces the `set_title` tool call when the title model honors a forced `tool_choice`. Tool-choice-less providers (chat-completions hosts without `tool_choice` support, Claude Fable/Mythos) instead receive a marker-based prompt and emit the title wrapped in `<title>...</title>`, which is parsed leniently (a plain sentence or a truncated/unclosed tag still works). A `TITLE_SYSTEM.md` override is reused in both modes; in marker mode the wrap-in-`<title>` instruction is appended after it. The local tiny-title path keeps the `<title>...</title>` prefill/stop wrapper and uses this file as its system turn.\n\n## Skills subsystem\n\n- `extensibility/skills.ts` loads via `loadCapability(skillCapability.id, { cwd })`.\n- Applies source toggles and filters (`ignoredSkills`, `includeSkills`, custom dirs).\n- Legacy-named toggles still exist (`skills.enablePiUser`, `skills.enablePiProject`) but they gate the native provider (`provider === \"native\"`).\n\n## Hooks subsystem\n\n- `discoverAndLoadHooks()` resolves hook paths from hook capability + explicit configured paths.\n- Then loads modules via Bun import.\n\n## Tools subsystem\n\n- `discoverAndLoadCustomTools()` resolves tool paths from tool capability + plugin tool paths + explicit configured paths.\n- Declarative `.md/.json` tool files are metadata only; executable loading expects code modules.\n\n## Extensions subsystem\n\n- `discoverAndLoadExtensions()` resolves extension modules from extension-module capability plus explicit paths.\n- Current implementation intentionally keeps only capability items with `_source.provider === \"native\"` before loading.\n\n---\n\n## 8) Precedence rules to rely on\n\nUse this mental model:\n\n1. Source directory ordering from `config.ts` determines candidate path order.\n2. Capability provider priority determines cross-provider precedence.\n3. Capability key dedup determines collision behavior (first wins for keyed capabilities).\n4. Subsystem-specific merge logic can further change effective precedence (especially settings).\n\n### Settings-specific caveat\n\nSettings capability items are not deduplicated; `Settings.#loadProjectSettings()` deep-merges project items in returned order. Because merge applies later item values over earlier values, effective override behavior depends on provider emission order, not just capability key semantics.\n\n---\n\n## 9) Legacy/compatibility behaviors still present\n\n- `ConfigFile` JSON -> YAML migration for YAML-targeted files.\n- Settings migration from `settings.json` and `agent.db` to `config.yml`.\n- Settings key migrations include `queueMode`, `ask.timeout`, flat `theme`, `task.isolation.enabled`, legacy `task.isolation.mode` values, removed edit modes, `statusLine.plan_mode`, `memories.enabled`, and hindsight scoping/name fields.\n- Legacy setting names `skills.enablePiUser` / `skills.enablePiProject` are still active gates for native skill source.\n\nIf these compatibility paths are removed in code, update this document immediately; several runtime behaviors still depend on them today.\n",
17
18
  "context-files.md": "# Context files\n\nContext files are Markdown instruction files that `omp` discovers automatically before a session starts and injects into the agent's project context. Use them for repository conventions, architecture notes, test and review expectations, and instructions that should travel with a user account or a project.\n\nYou never have to ask the agent to go read `AGENTS.md`, `CLAUDE.md`, `GEMINI.md`, or similar files — the relevant ones are already discovered, loaded, and placed in context when the session begins.\n\n## How context files relate to other concepts\n\nFour similarly named things behave differently. Keep them straight:\n\n- **Context files** are read as plain Markdown and shown to the agent inside a `<context>` block. They are advisory background that stays in the session's opening context.\n- **Sticky rules** come from a top-level `RULES.md`. They are converted into an always-apply rule that is re-attached near the current turn, so they keep their hold even after the visible conversation grows. See \"Sticky rules vs normal context\" below.\n- **Discovery providers** are the config-source adapters (`native`, `claude`, `codex`, `gemini`, `opencode`, `github`, `agents`, `agents-md`) that know where each tool keeps its files. The same provider that contributes context files may also contribute MCP servers, slash commands, skills, hooks, tools, prompts, and settings.\n- **Model providers** are inference backends such as `anthropic`, `openai`, `google`, `groq`, `ollama`, and `openrouter`. They have nothing to do with context files except that both kinds of id share the one `disabledProviders` list — see \"Disabling discovery providers\" below and [Providers](./providers.md).\n\nAuthoring **skills** and **rule** files (as opposed to the sticky `RULES.md`) is covered in [Skills](./skills.md). Customizing the system prompt with `SYSTEM.md` is covered in [System prompt customization](./system-prompt-customization.md).\n\n## Native `.omp` files\n\nThe native provider is the recommended format for new projects. It reads from your user agent directory and from `.omp/` directories inside a project, and it has the highest discovery priority, so its files win over every other convention at the same scope.\n\n| File | Scope | Behavior |\n|---|---|---|\n| `~/.omp/agent/AGENTS.md` | User | User-level context for every session unless the `native` provider is disabled. |\n| `<ancestor>/.omp/AGENTS.md` | Project | Project context. `omp` walks upward from the current directory to the repository root and uses the **nearest** non-empty `.omp/AGENTS.md`. Farther native project files are not also included. |\n| `~/.omp/agent/RULES.md` | User | User-level sticky rule content. Loaded as an always-apply rule, not as a context file. |\n| `<ancestor>/.omp/RULES.md` | Project | Project sticky rule content. Same nearest-ancestor walk-up as above. Loaded as an always-apply rule. |\n\nTwo details matter:\n\n- **Walk-up to the repository root.** Discovery starts in the current working directory and climbs through each ancestor up to the repository root, stopping at the first ancestor that has a usable `.omp/` directory. The *nearest* match wins; ancestors above it are not loaded as native context.\n- **The `.omp/` directory must be non-empty.** An empty `.omp/` directory is skipped during the walk-up, so the search continues to the next ancestor. An empty `AGENTS.md` or `RULES.md` file contributes nothing.\n\n`~/.omp/agent` is the user base. If `PI_CODING_AGENT_DIR` is set, it relocates that base, so the user files become `$PI_CODING_AGENT_DIR/AGENTS.md` and `$PI_CODING_AGENT_DIR/RULES.md`.\n\n### Monorepo example\n\n```text\nrepo/\n .omp/\n AGENTS.md\n RULES.md\n packages/api/\n .omp/\n AGENTS.md\n```\n\nStarting a session in `repo/packages/api`:\n\n- The native context file is `repo/packages/api/.omp/AGENTS.md` (the nearest one). `repo/.omp/AGENTS.md` is **not** also included.\n- The project sticky rule is `repo/packages/api/.omp/RULES.md` if present; otherwise the walk-up continues and `repo/.omp/RULES.md` is used.\n\nPut broad, durable project background in `AGENTS.md`. Reserve `RULES.md` for short, hard requirements that must stay visible across long conversations.\n\n## Other supported context conventions\n\n`omp` also discovers the context and rule files of other agent tools so existing projects keep working without migration.\n\n| Provider id | Convention path | Scope | Notes |\n|---|---|---|---|\n| `native` | `.omp/AGENTS.md` | User + project | Recommended `omp` format. User file at `~/.omp/agent/AGENTS.md`; project file is the nearest non-empty `.omp/AGENTS.md` walking up to the repo root. |\n| `claude` | `.claude/CLAUDE.md` | User + project | User file `~/.claude/CLAUDE.md`; project file `<cwd>/.claude/CLAUDE.md` only (no ancestor walk-up). |\n| `codex` | `.codex/AGENTS.md` | User | User file `~/.codex/AGENTS.md` only. Project-level Codex context comes from a standalone `AGENTS.md` via the `agents-md` provider, not from `<cwd>/.codex/AGENTS.md`. |\n| `gemini` | `.gemini/GEMINI.md` | User + project | User file `~/.gemini/GEMINI.md`; project file `<cwd>/.gemini/GEMINI.md` only (no ancestor walk-up). |\n| `opencode` | `.config/opencode/AGENTS.md` | User | User file `~/.config/opencode/AGENTS.md` only. |\n| `github` | `.github/copilot-instructions.md` | User + project | Project file `<cwd>/.github/copilot-instructions.md` only (no ancestor walk-up), plus a user-global `~/.copilot/copilot-instructions.md` (relocate with `COPILOT_HOME`) and an `AGENTS.md` from each `COPILOT_CUSTOM_INSTRUCTIONS_DIRS` entry. |\n| `agents` | `.agent/AGENTS.md`, `.agents/AGENTS.md` | User + project | User files from `~/.agent/` and `~/.agents/`; project files discovered while walking up from the current directory to the repository root. |\n| `agents-md` | `AGENTS.md` | Project | Standalone (non-config-directory) `AGENTS.md` files, discovered by walking up from the current directory to the repository root (or home when no repo root is known). Files whose parent directory name starts with `.` are ignored — those belong to a config-directory provider instead. |\n| `github` | `.github/instructions/**/*.instructions.md` | Project rules | GitHub Copilot / VS Code instruction files become rules. `applyTo: '*'` or `applyTo: '**'` is injected as always-apply context; other `applyTo` globs are listed in the rulebook with `description` and are readable as `rule://<name>`. |\n\nProviders marked \"(no ancestor walk-up)\" only look in the current working directory's config directory. If you need ancestor walk-up behavior, prefer the native `.omp/AGENTS.md` format or a standalone `AGENTS.md` (the `agents-md` provider), or launch `omp` from the directory that holds the config directory.\n\n## Load order and shadowing\n\nWhen two providers describe the *same* scope, the higher-priority provider wins. Provider priorities:\n\n| Priority | Provider id |\n|---:|---|\n| 100 | `native` |\n| 80 | `claude` |\n| 70 | `agents`, `codex` |\n| 60 | `gemini` |\n| 55 | `opencode` |\n| 30 | `github` |\n| 10 | `agents-md` |\n\nDiscovered files are then deduplicated by scope:\n\n- **One user context file** is kept across all providers. Because `native` has the highest priority, `~/.omp/agent/AGENTS.md` shadows every other user-level context file.\n- **One project context file per directory depth.** Depth is measured from the current directory: the cwd is depth 0, its parent depth 1, and so on. Config subdirectories of an ancestor (`.claude/`, `.github/`, `.gemini/`, …) count as the same depth as that ancestor.\n- **At the same depth, the higher-priority provider shadows the rest.**\n- **Across depths, multiple files survive.** In a monorepo, an ancestor `AGENTS.md` and a package-level one are different depths and both load.\n- **Byte-identical files are collapsed.** If two surviving files have exactly the same content, only the copy closest to the cwd is kept.\n\nAfter deduplication, project files are sorted so **farther ancestors appear first** and files **closer to the cwd appear last**. Later files sit nearer the end of the context block, where they are most prominent.\n\n### Worked shadowing example\n\n```text\nrepo/\n AGENTS.md\n packages/api/\n AGENTS.md\n .github/copilot-instructions.md\n```\n\nStarting in `repo/packages/api`:\n\n- `repo/AGENTS.md` is found by `agents-md` at depth 2 and kept.\n- `repo/packages/api/AGENTS.md` (`agents-md`, priority 10) and `repo/packages/api/.github/copilot-instructions.md` (`github`, priority 30) both resolve to depth 0. GitHub's higher priority shadows the package-level standalone `AGENTS.md`, so the Copilot file wins at that depth.\n- The two kept files are ordered root-first, package-last, so `packages/api`'s file is the more prominent one.\n- If you add `repo/packages/api/.omp/AGENTS.md`, `native` (priority 100) wins depth 0 outright, shadowing both lower-priority files.\n\n## Injection behavior\n\nDiscovered context files are injected into the opening project prompt as a single `<context>` block, one `<file>` element per surviving file, in the sort order above:\n\n```xml\n<context>\nYou MUST follow the context files below for all tasks:\n<file path=\"/abs/path/to/repo/AGENTS.md\">\n...root content...\n</file>\n<file path=\"/abs/path/to/repo/packages/api/.github/copilot-instructions.md\">\n...package content...\n</file>\n</context>\n```\n\nThe agent sees each file's absolute path and its fully expanded Markdown content (with `@` imports already resolved — see below). Loading is automatic — there is no need to instruct the agent to search for `AGENTS.md`, `CLAUDE.md`, `GEMINI.md`, `.cursorrules`, or similar files during a session.\n\nDeeper-directory `AGENTS.md` files that were *not* auto-loaded (for example, ones below the current directory) are surfaced separately in a `<dir-context>` block that lists their paths and tells the agent to read them before editing those directories. Those files are pointers, not full injected content.\n\n## `@` imports\n\nInside any context file, an `@path` token expands inline to the referenced file's content before injection:\n\n```markdown\n# Project notes\n\nRead @docs/architecture.md before changing storage code.\nShared release steps live in @../RELEASE.md and personal aliases in @~/.notes/aliases.md.\n```\n\nThe exact rules:\n\n- **Relative paths resolve from the importing file's own directory**, not the session's working directory.\n- **`~/` and `~`** resolve from the user's home directory; absolute paths are used as-is.\n- **Tokens inside fenced code blocks and inline code spans are left untouched** — useful when you want to *write about* an `@token` without expanding it.\n- **`git@github.com:org/repo.git` and `user@example.com`-style tokens are not treated as imports.** A token only counts when the `@` sits at the start of a line or after a space or tab.\n- **Trailing sentence punctuation is trimmed** off the path (`. , ; : ! ? ) ] } \" '`), so `@docs/setup.md.` imports `docs/setup.md`.\n- **Imports recurse up to five hops.** An imported file may itself contain `@` imports, up to a total depth of five.\n- **Cycles are skipped.** A file already pulled into the current expansion tree is not re-expanded, so mutual imports terminate cleanly.\n- **A missing or unreadable target leaves the original `@token` text in place** rather than erroring.\n\n## Sticky rules vs normal context\n\nUse a normal context file (`AGENTS.md`, `CLAUDE.md`, `GEMINI.md`, `.github/copilot-instructions.md`, …) for the bulk of your guidance: repository overview, code style, build and test commands, review expectations, and local conventions. These load into the opening `<context>` block.\n\nUse a top-level **`RULES.md`** for the handful of hard requirements that must stay active even after a long conversation has pushed the opening context far up the transcript:\n\n```markdown\n# ~/.omp/agent/RULES.md\n\nNever commit or push unless the user explicitly asks.\nDo not edit generated files.\n```\n\n`RULES.md` is special:\n\n- It is read **only** at the native locations — `~/.omp/agent/RULES.md` and the nearest `<ancestor>/.omp/RULES.md` from the cwd up to the repo root. A `RULES.md` anywhere else is not a context-file convention and is ignored.\n- It is loaded as an **always-apply rule**, not as a context file, so it is re-attached near the current turn and keeps its hold across long sessions.\n- It is **always sticky**: frontmatter cannot make it non-sticky. If you want conditional or opt-in behavior, write a normal rule file instead (see [Skills](./skills.md)).\n\nKeep `RULES.md` short. Long background belongs in `AGENTS.md`, where it costs context budget only once.\n\n## Disabling discovery providers\n\nTurn a provider off with the `disabledProviders` setting in `~/.omp/agent/config.yml`, a project's `.omp/config.yml`, or a `--config` overlay:\n\n```yaml\n# .omp/config.yml\ndisabledProviders:\n - claude\n - github\n```\n\n`disabledProviders` is a **whole-provider switch with one shared id namespace**, used by two unrelated subsystems:\n\n| Id kind | Examples | Effect when listed |\n|---|---|---|\n| Discovery provider ids | `native`, `claude`, `codex`, `gemini`, `opencode`, `github`, `agents`, `agents-md` | The entire config source is removed — not just its context files, but also any MCP servers, slash commands, skills, hooks, tools, prompts, and settings it would have contributed. |\n| Model provider ids | `anthropic`, `openai`, `google`, `groq`, `ollama`, `openrouter` | The model backend is removed from selection even when its credentials are present. See [Providers](./providers.md). |\n\nIds are exact and the two namespaces do not collide by accident: `google` disables the Google model backend, while `gemini` disables the Gemini CLI discovery files. Disabling a discovery provider is heavier than it looks — disabling `claude`, for instance, also drops Claude-discovered MCP servers, commands, skills, hooks, tools, and settings, not only `CLAUDE.md`.\n\nOnly `enabledModels` and `disabledProviders` support **path-scoped** entries, so you can vary provider availability per subtree:\n\n```yaml\ndisabledProviders:\n - github # disabled everywhere\n - path: ~/work/legacy-claude\n providers:\n - claude # disabled only under this directory\n```\n\nA scoped entry applies when the cwd equals the configured path or sits beneath it; `~` expands to home. Bare string entries apply everywhere.\n\nRemember that higher-precedence settings layers **replace** array settings rather than appending to them. If your global config disables `claude` but a project config sets `disabledProviders: [github]`, then inside that project Claude discovery is re-enabled and only GitHub is disabled. See [Settings](./settings.md) for the full layer precedence, merge rules, and path-scoped array details.\n\n## Troubleshooting\n\n### A file is not loaded\n\n- Native project context must live at `.omp/AGENTS.md`, and the `.omp/` directory must be non-empty; an empty `.omp/` is skipped and the walk-up continues to the next ancestor.\n- A standalone `AGENTS.md` is handled by `agents-md`, not `native`.\n- `.claude/CLAUDE.md`, `.gemini/GEMINI.md`, and `.github/copilot-instructions.md` are read only from the current working directory's config directory — not from every ancestor.\n- `~/.codex/AGENTS.md` and `~/.config/opencode/AGENTS.md` are user-level only and have no project equivalent.\n- Empty files contribute nothing for the native and standalone providers.\n- A disabled discovery provider contributes nothing — check `disabledProviders` across your global, project, and `--config` layers.\n\n### The wrong file wins\n\nAt one user scope or project depth, the higher-priority provider shadows the others (native > claude > agents/codex > gemini > opencode > github > agents-md). To force deterministic behavior, move your guidance into `.omp/AGENTS.md` (native always wins) or disable the competing discovery provider.\n\n### User context disappeared\n\nOnly one user-level context file survives, and `~/.omp/agent/AGENTS.md` has the highest priority. If it exists, it shadows user-level `~/.claude/CLAUDE.md`, `~/.codex/AGENTS.md`, `~/.gemini/GEMINI.md`, `~/.config/opencode/AGENTS.md`, `~/.copilot/copilot-instructions.md`, and `~/.agent`/`~/.agents` files. Consolidate user guidance into the native file or remove the native one if you prefer another tool's file.\n\n### A `RULES.md` file is ignored\n\nOnly the native `RULES.md` locations are sticky: `~/.omp/agent/RULES.md` and the nearest `<ancestor>/.omp/RULES.md` from cwd to the repo root. A `RULES.md` in any other directory is not a recognized convention and will not be loaded.\n\n### An `@` import did not expand\n\nConfirm the target exists relative to the importing file (not the cwd). Imports inside fenced code blocks or inline code spans are intentionally left literal, `git@`/email-looking tokens are never imported, cycles are skipped, expansion stops after five hops, and a missing target leaves the original `@path` text unchanged.\n",
18
19
  "custom-tools.md": "# Custom Tools\n\nCustom tools are model-callable functions that plug into the same tool execution pipeline as built-in tools.\n\nA custom tool is a TypeScript/JavaScript module that exports a factory. The factory receives a host API (`CustomToolAPI`) and returns one tool or an array of tools.\n\n## What this is (and is not)\n\n- **Custom tool**: callable by the model during a turn (`execute` + Zod parameter schema).\n- **Extension**: lifecycle/event framework that can register tools and intercept/modify events.\n- **Hook**: external pre/post command scripts.\n- **Skill**: static guidance/context package, not executable tool code.\n\nIf you need the model to call code directly, use a custom tool.\n\n## Integration paths in current code\n\nThere are two active integration styles:\n\n1. **SDK-provided custom tools** (`options.customTools`)\n - Wrapped into agent tools via `CustomToolAdapter` or extension wrappers.\n - Always included in the initial active tool set in SDK bootstrap.\n\n2. **Filesystem-discovered modules via loader API** (`discoverAndLoadCustomTools` / `loadCustomTools`)\n - Exposed as library APIs in `src/extensibility/custom-tools/loader.ts`.\n - Host code can call these to discover and load tool modules from config/provider/plugin paths.\n\n```text\nModel tool call flow\n\nLLM tool call\n │\n ▼\nTool registry (built-ins + custom tool adapters)\n │\n ▼\nCustomTool.execute(toolCallId, params, onUpdate, ctx, signal)\n │\n ├─ onUpdate(...) -> streamed partial result\n └─ return result -> final tool content/details\n```\n\n## Discovery locations (loader API)\n\n`discoverAndLoadCustomTools(configuredPaths, cwd, builtInToolNames)` merges:\n\n1. Capability providers (`toolCapability`), including:\n - Native OMP config (`~/.omp/agent/tools`, `.omp/tools`)\n - Claude config (`~/.claude/tools`, `.claude/tools`)\n - Codex config (`~/.codex/tools`, `.codex/tools`)\n - Claude marketplace plugin cache provider\n2. Installed plugin manifests (`~/.omp/plugins/node_modules/*` via plugin loader)\n3. Explicit configured paths passed to the loader\n\n### Important behavior\n\n- Duplicate resolved paths are deduplicated.\n- Tool name conflicts are rejected against built-ins and already-loaded custom tools.\n- `.md` and `.json` files are discovered as tool metadata by some providers, but the executable module loader rejects them as runnable tools.\n- Relative configured paths are resolved from `cwd`; `~` is expanded.\n\n## Module contract\n\nA custom tool module must export a function (default export preferred):\n\n```ts\nimport type { CustomToolFactory } from \"@oh-my-pi/pi-coding-agent\";\n\nconst factory: CustomToolFactory = (pi) => ({\n name: \"repo_stats\",\n label: \"Repo Stats\",\n description: \"Counts tracked TypeScript files\",\n parameters: pi.zod.object({\n glob: pi.zod.string().optional().default(\"**/*.ts\"),\n }),\n\n async execute(toolCallId, params, onUpdate, ctx, signal) {\n onUpdate?.({\n content: [{ type: \"text\", text: \"Scanning files...\" }],\n details: { phase: \"scan\" },\n });\n\n const result = await pi.exec(\n \"git\",\n [\"ls-files\", params.glob ?? \"**/*.ts\"],\n { signal, cwd: pi.cwd },\n );\n if (result.killed) {\n throw new Error(\"Scan was cancelled\");\n }\n if (result.code !== 0) {\n throw new Error(result.stderr || \"git ls-files failed\");\n }\n\n const files = result.stdout.split(\"\\n\").filter(Boolean);\n return {\n content: [{ type: \"text\", text: `Found ${files.length} files` }],\n details: { count: files.length, sample: files.slice(0, 10) },\n };\n },\n\n onSession(event) {\n if (event.reason === \"shutdown\") {\n // cleanup resources if needed\n }\n },\n});\n\nexport default factory;\n```\n\nSchemas are authored with Zod (`pi.zod`) and flow through the shared validation/wire pipeline.\n\nFactory return type:\n\n- `CustomTool`\n- `CustomTool[]`\n- `Promise<CustomTool | CustomTool[]>`\n\n## API surface passed to factories (`CustomToolAPI`)\n\nFrom `types.ts` and `loader.ts`:\n\n- `cwd`: host working directory\n- `exec(command, args, options?)`: process execution helper\n- `ui`: UI context (can be no-op in headless modes)\n- `hasUI`: `false` in non-interactive flows\n- `logger`: shared file logger\n- `typebox`: zod-backed compatibility shim for legacy TypeBox-style schemas\n- `zod`: injected `zod/v4` module (canonical for new schemas)\n- `pi`: injected `@oh-my-pi/pi-coding-agent` exports\n- `pushPendingAction(action)`: register a preview action for hidden `resolve` tool (`docs/resolve-tool-runtime.md`)\n Loader starts with a no-op UI context and requires host code to call `setUIContext(...)` when real UI is ready.\n\n## Execution contract and typing\n\n`CustomTool.execute` signature:\n\n```ts\nexecute(toolCallId, params, onUpdate, ctx, signal);\n```\n\n- `params` is statically typed from your Zod/TypeBox schema via `Static<TParams>`.\n- Runtime argument validation happens before execution in the agent loop.\n- `onUpdate` emits partial results for UI streaming.\n- `ctx` includes `sessionManager`, `modelRegistry`, current `model`, `isIdle()`, `hasQueuedMessages()`, `abort()`, and optional `settings`, `fetch`, and `autoApprove`.\n- `signal` carries cancellation.\n\n`CustomToolAdapter` bridges this to the agent tool interface and forwards calls in the correct argument order.\n\nTool definitions may also declare `strict`, `hidden`, `deferrable`, `mcpServerName`, `mcpToolName`, `approval`, and `formatApprovalDetails`.\n\n## How tools are exposed to the model\n\n- Tools are wrapped into `AgentTool` instances (`CustomToolAdapter` or extension wrappers).\n- They are inserted into the session tool registry by name.\n- In SDK bootstrap, custom and extension-registered tools are force-included in the initial active set.\n- CLI `--tools` currently validates only built-in tool names; custom tool inclusion is handled through discovery/registration paths and SDK options.\n\n## Rendering hooks\n\nOptional rendering hooks:\n\n- `renderCall(args, options, theme)`\n- `renderResult(result, options, theme, args?)`\n\nRuntime behavior in TUI:\n\n- If hooks exist, tool output is rendered inside a `Box` container.\n- `renderResult` receives `{ expanded, isPartial, spinnerFrame? }`.\n- Renderer errors are caught and logged; UI falls back to default text rendering.\n\n## Session/state handling\n\nOptional `onSession(event, ctx)` receives session lifecycle events, including:\n\n- `start`, `switch`, `branch`, `tree`, `shutdown`\n- `auto_compaction_start`, `auto_compaction_end`\n- `auto_retry_start`, `auto_retry_end`\n- `ttsr_triggered`, `todo_reminder`\n\nUse `ctx.sessionManager` to reconstruct state from history when branch/session context changes.\n\n## Failures and cancellation semantics\n\n### Synchronous/async failures\n\n- Throwing (or rejected promises) in `execute` is treated as tool failure.\n- Agent runtime converts failures into tool result messages with `isError: true` and error text content.\n- With extension wrappers, `tool_result` handlers can further rewrite content/details and even override error status.\n\n### Cancellation\n\n- Agent abort propagates through `AbortSignal` to `execute`.\n- Forward `signal` to subprocess work (`pi.exec(..., { signal })`) for cooperative cancellation.\n- `ctx.abort()` lets a tool request abort of the current agent operation.\n\n### onSession errors\n\n- `onSession` errors are caught and logged as warnings; they do not crash the session.\n\n## Real constraints to design for\n\n- Tool names must be globally unique in the active registry.\n- Prefer deterministic, schema-shaped outputs in `details` for renderer/state reconstruction.\n- Guard UI usage with `pi.hasUI`.\n- Treat `.md`/`.json` in tool directories as metadata, not executable modules.\n",
19
- "environment-variables.md": "# Environment Variables (Current Runtime Reference)\n\nThis reference is derived from current code paths in:\n\n- `packages/coding-agent/src/**`\n- `packages/ai/src/**` (provider/auth resolution used by coding-agent)\n- `packages/utils/src/**` and `packages/tui/src/**` where those vars directly affect coding-agent runtime\n\nIt documents only active behavior.\n\n## Resolution model and precedence\n\nMost runtime lookups use `$env` from `@oh-my-pi/pi-utils` (`packages/utils/src/env.ts`).\n\n`$env` loading order:\n\n1. Existing process environment (`Bun.env`)\n2. Project `.env` (`$PWD/.env`) for keys not already set\n3. Agent `.env` (`~/.omp/agent/.env`, respecting `PI_CONFIG_DIR` / `PI_CODING_AGENT_DIR`) for keys not already set\n4. Config-root `.env` (`~/.omp/.env`, respecting `PI_CONFIG_DIR`) for keys not already set\n5. Home `.env` (`~/.env`) for keys not already set\n\nAdditional rule inside each `.env` file: `OMP_*` keys are mirrored to `PI_*` keys in that parsed file.\n\n---\n\n## 1) Model/provider authentication\n\nThese are consumed via `getEnvApiKey()` (`packages/ai/src/stream.ts`) unless noted otherwise.\n\n### Core provider credentials\n\n| Variable | Used for | Required when | Notes / precedence |\n| ------------------------------- | ------------------------------------------------ | -------------------------------------------------------------- | --------------------------------------------------------------------------------------------------- |\n| `ANTHROPIC_OAUTH_TOKEN` | Anthropic API auth | Using Anthropic with OAuth token auth | Takes precedence over `ANTHROPIC_API_KEY` for provider auth resolution |\n| `ANTHROPIC_API_KEY` | Anthropic API auth | Using Anthropic without OAuth token | Fallback after `ANTHROPIC_OAUTH_TOKEN` |\n| `ANTHROPIC_FOUNDRY_API_KEY` | Anthropic via Azure Foundry / enterprise gateway | `CLAUDE_CODE_USE_FOUNDRY` enabled | Takes precedence over `ANTHROPIC_OAUTH_TOKEN` and `ANTHROPIC_API_KEY` when Foundry mode is enabled |\n| `OPENAI_API_KEY` | OpenAI auth | Using OpenAI-family providers without explicit apiKey argument | Used by OpenAI Completions/Responses providers |\n| `GEMINI_API_KEY` | Google Gemini auth | Using `google` provider models | Primary key for Gemini provider mapping |\n| `GOOGLE_API_KEY` | Gemini image tool auth fallback | Using `gemini_image` tool without `GEMINI_API_KEY` | Used by coding-agent image tool fallback path |\n| `GROQ_API_KEY` | Groq auth | Using Groq models | |\n| `CEREBRAS_API_KEY` | Cerebras auth | Using Cerebras models | |\n| `FIREWORKS_API_KEY` | Fireworks auth | Using Fireworks models | |\n| `FIREPASS_API_KEY` | Fire Pass auth | Using Fire Pass models | |\n| `TOGETHER_API_KEY` | Together auth | Using `together` provider | |\n| `AIMLAPI_API_KEY` | AIML API auth | Using `aimlapi` provider | OpenAI-compatible AIML API endpoint at `https://api.aimlapi.com/v1` |\n| `HUGGINGFACE_HUB_TOKEN` | Hugging Face auth | Using `huggingface` provider | Primary Hugging Face token env var |\n| `HF_TOKEN` | Hugging Face auth | Using `huggingface` provider | Fallback when `HUGGINGFACE_HUB_TOKEN` is unset |\n| `SYNTHETIC_API_KEY` | Synthetic auth | Using Synthetic models | |\n| `NVIDIA_API_KEY` | NVIDIA auth | Using `nvidia` provider | |\n| `NANO_GPT_API_KEY` | NanoGPT auth | Using `nanogpt` provider | |\n| `VENICE_API_KEY` | Venice auth | Using `venice` provider | |\n| `LITELLM_API_KEY` | LiteLLM auth | Using `litellm` provider | OpenAI-compatible LiteLLM proxy key |\n| `LM_STUDIO_API_KEY` | LM Studio auth (optional) | Using `lm-studio` provider with authenticated hosts | Local LM Studio usually runs without auth; any non-empty token works when a key is required |\n| `OLLAMA_API_KEY` | Ollama auth (optional) | Using `ollama` provider with authenticated hosts | Local Ollama usually runs without auth; any non-empty token works when a key is required |\n| `LLAMA_CPP_API_KEY` | llama.cpp auth (optional) | Using `llama.cpp` provider with authenticated hosts | Local llama.cpp usually runs without auth; any non-empty token works when a key is configured |\n| `XIAOMI_API_KEY` | Xiaomi MiMo auth | Using `xiaomi` provider | |\n| `XIAOMI_TOKEN_PLAN_AMS_API_KEY` | Xiaomi MiMo Token Plan auth (AMS) | Using `xiaomi-token-plan-ams` provider | |\n| `XIAOMI_TOKEN_PLAN_CN_API_KEY` | Xiaomi MiMo Token Plan auth (CN) | Using `xiaomi-token-plan-cn` provider | |\n| `XIAOMI_TOKEN_PLAN_SGP_API_KEY` | Xiaomi MiMo Token Plan auth (SGP) | Using `xiaomi-token-plan-sgp` provider | |\n| `MOONSHOT_API_KEY` | Moonshot auth | Using `moonshot` provider | |\n| `XAI_API_KEY` | xAI auth | Using xAI models or as fallback for `xai-oauth` | |\n| `XAI_OAUTH_TOKEN` | xAI OAuth/SuperGrok auth | Using `xai-oauth` provider | Takes precedence over `XAI_API_KEY` for `xai-oauth` |\n| `OPENROUTER_API_KEY` | OpenRouter auth | Using OpenRouter models | Also used by image tool when preferred/auto provider is OpenRouter |\n| `MISTRAL_API_KEY` | Mistral auth | Using Mistral models | |\n| `ZAI_API_KEY` | z.ai auth | Using z.ai models | Also used by z.ai web search provider |\n| `ZHIPU_API_KEY` | Zhipu Coding Plan auth | Using `zhipu-coding-plan` provider | |\n| `UMANS_AI_CODING_PLAN_API_KEY` | Umans AI Coding Plan auth | Using `umans` provider | |\n| `MINIMAX_API_KEY` | MiniMax auth | Using `minimax` provider | |\n| `MINIMAX_CODE_API_KEY` | MiniMax Code auth | Using `minimax-code` provider | |\n| `MINIMAX_CODE_CN_API_KEY` | MiniMax Code CN auth | Using `minimax-code-cn` provider | |\n| `OPENCODE_API_KEY` | OpenCode auth | Using `opencode-go` / `opencode-zen` models | |\n| `QIANFAN_API_KEY` | Qianfan auth | Using `qianfan` provider | |\n| `QWEN_OAUTH_TOKEN` | Qwen Portal auth | Using `qwen-portal` with OAuth token | Takes precedence over `QWEN_PORTAL_API_KEY` |\n| `QWEN_PORTAL_API_KEY` | Qwen Portal auth | Using `qwen-portal` with API key | Fallback after `QWEN_OAUTH_TOKEN` |\n| `ZENMUX_API_KEY` | ZenMux auth | Using `zenmux` provider | Used for ZenMux OpenAI and Anthropic-compatible routes |\n| `VLLM_API_KEY` | vLLM auth/discovery opt-in | Using `vllm` provider (local OpenAI-compatible servers) | Any non-empty value works for no-auth local servers |\n| `CURSOR_ACCESS_TOKEN` | Cursor provider auth | Using Cursor provider | |\n| `AI_GATEWAY_API_KEY` | Vercel AI Gateway auth | Using `vercel-ai-gateway` provider | |\n| `CLOUDFLARE_AI_GATEWAY_API_KEY` | Cloudflare AI Gateway auth | Using `cloudflare-ai-gateway` provider | Base URL must be configured as `https://gateway.ai.cloudflare.com/v1/<account>/<gateway>/anthropic` |\n| `ALIBABA_CODING_PLAN_API_KEY` | Alibaba Coding Plan auth | Using `alibaba-coding-plan` provider | |\n| `DEEPSEEK_API_KEY` | DeepSeek auth | Using DeepSeek models | |\n| `KILO_API_KEY` | Kilo auth | Using Kilo models | |\n| `OLLAMA_CLOUD_API_KEY` | Ollama Cloud auth | Using `ollama-cloud` provider | |\n| `WAFER_PASS_API_KEY` | Wafer Pass auth | Using `wafer-pass` provider | Flat-rate Wafer subscription; validated against `https://pass.wafer.ai/v1/models` |\n| `WAFER_SERVERLESS_API_KEY` | Wafer Serverless auth | Using `wafer-serverless` provider | Pay-as-you-go Wafer SKU; validated against `https://pass.wafer.ai/v1/models` |\n| `GITLAB_TOKEN` | GitLab Duo auth | Using `gitlab-duo` provider | |\n\n### GitHub/Copilot tokens\n\n| Variable | Used for | Notes |\n| ---------------------- | ------------------------------------------------ | ------------------------------------------ |\n| `COPILOT_GITHUB_TOKEN` | GitHub Copilot provider auth | Generic GitHub tokens are not used here |\n| `GH_TOKEN` | GitHub API auth in web scraper | Web scraper fallback after `GITHUB_TOKEN` |\n| `GITHUB_TOKEN` | GitHub API auth in web scraper | Web scraper checks this before `GH_TOKEN` |\n\n### Auth broker / auth gateway (remote credential vault)\n\nWhen the broker is enabled, the local SQLite credential store is bypassed and all OAuth refresh / access tokens live on the broker host. See [`auth-broker-gateway.md`](./auth-broker-gateway.md) for the full protocol, CLI surface, and 5-min/15-s usage cache layering.\n\n| Variable | Used for | Required when | Notes / precedence |\n| ----------------------- | -------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |\n| `OMP_AUTH_BROKER_URL` | Base URL of the remote auth-broker (e.g. `https://broker.tailnet:8765`); selects broker mode | Resolving credentials through a broker; also required by `omp auth-gateway serve` (the gateway is itself a broker client) | Wins over `auth.broker.url` in `config.yml`. When set with no resolvable token, `resolveAuthBrokerConfig()` hard-errors instead of falling back to local SQLite. |\n| `OMP_AUTH_BROKER_TOKEN` | Bearer token sent on every broker endpoint except `/v1/healthz` | `OMP_AUTH_BROKER_URL` is set and no token is available from `auth.broker.token` or `<config-dir>/auth-broker.token` | Resolution: this env → `auth.broker.token` (`$ENV_NAME` indirection supported) → `<config-dir>/auth-broker.token` (mode `0600`). `<config-dir>` is `~/.omp/` (respecting `PI_CONFIG_DIR`). |\n| `OMP_AUTH_BROKER_SNAPSHOT_TTL_MS` | Freshness window for the encrypted local broker snapshot cache | Optional in broker mode | Default `3600000` (1 h). Freshness is based on broker `snapshot.generatedAt`; `0` disables cache reads/writes and forces the old blocking fetch every startup. |\n| `OMP_AUTH_BROKER_SNAPSHOT_CACHE` | Path to the encrypted local broker snapshot cache | Optional in broker mode | Defaults to `~/.omp/cache/auth-broker-snapshot.enc` (or XDG cache equivalent). Useful for tests, ephemeral hosts, or relocating the `0600` cache file. |\n\nThe gateway has no dedicated env vars — it inherits `OMP_AUTH_BROKER_*`. Its own inbound bearer token lives at `<config-dir>/auth-gateway.token` and is managed via `omp auth-gateway token`.\n\n---\n\n## 2) Provider-specific runtime configuration\n\n### Anthropic Foundry Gateway (Azure / enterprise proxy)\n\nWhen `CLAUDE_CODE_USE_FOUNDRY` is enabled, Anthropic requests switch to Foundry mode:\n\n- Base URL resolves from `FOUNDRY_BASE_URL` (fallback remains model/default base URL if unset).\n- API key resolution for provider `anthropic` becomes:\n `ANTHROPIC_FOUNDRY_API_KEY` → `ANTHROPIC_OAUTH_TOKEN` → `ANTHROPIC_API_KEY`.\n- `ANTHROPIC_CUSTOM_HEADERS` is parsed as comma/newline-separated `key: value`\n pairs and merged into request headers. They are also forwarded when\n `ANTHROPIC_BASE_URL` points to a non-Anthropic host (e.g. a corporate API\n gateway), so enterprise gateways requiring proprietary auth headers work\n without enabling Foundry mode.\n- TLS client/server material can be injected from env values:\n `NODE_EXTRA_CA_CERTS`, `CLAUDE_CODE_CLIENT_CERT`, `CLAUDE_CODE_CLIENT_KEY`.\n Each accepts either:\n - a filesystem path to PEM content, or\n - inline PEM (including escaped `\\n` sequences).\n\n| Variable | Value type | Behavior |\n| --------------------------- | ---------------------------------------------- | ----------------------------------------------------------------------------- |\n| `CLAUDE_CODE_USE_FOUNDRY` | Boolean-like string (`1`, `true`, `yes`, `on`) | Enables Foundry mode for Anthropic provider |\n| `FOUNDRY_BASE_URL` | URL string | Anthropic endpoint base URL in Foundry mode |\n| `ANTHROPIC_FOUNDRY_API_KEY` | Token string | Used for `Authorization: Bearer <token>` |\n| `ANTHROPIC_CUSTOM_HEADERS` | Header list string | Extra headers; format `header-a: value, header-b: value` or newline-separated. Also forwarded outside Foundry whenever `ANTHROPIC_BASE_URL` is non-Anthropic. |\n| `NODE_EXTRA_CA_CERTS` | PEM path or inline PEM | Extra CA chain for server certificate validation |\n| `CLAUDE_CODE_CLIENT_CERT` | PEM path or inline PEM | mTLS client certificate |\n| `CLAUDE_CODE_CLIENT_KEY` | PEM path or inline PEM | mTLS client private key (must be paired with cert) |\n\n### Amazon Bedrock\n\n| Variable | Default / behavior |\n| ------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------- |\n| `AWS_REGION` | Primary region source |\n| `AWS_DEFAULT_REGION` | Fallback if `AWS_REGION` unset |\n| `AWS_PROFILE` | Enables named profile auth path |\n| `AWS_ACCESS_KEY_ID` + `AWS_SECRET_ACCESS_KEY` | Enables IAM key auth path |\n| `AWS_BEARER_TOKEN_BEDROCK` | Highest-precedence bearer token auth path; skips AWS profile/credential-chain lookup when set |\n| `AWS_CONTAINER_CREDENTIALS_RELATIVE_URI` / `AWS_CONTAINER_CREDENTIALS_FULL_URI` | Marks Bedrock as available in provider detection (credential resolution itself covers env keys, profiles/SSO/`credential_process`, then IMDSv2) |\n| `AWS_WEB_IDENTITY_TOKEN_FILE` + `AWS_ROLE_ARN` | Marks Bedrock as available in provider detection (same caveat as the ECS variables above) |\n| `AWS_BEDROCK_SKIP_AUTH` | If `1`, injects dummy credentials (proxy/non-auth scenarios) |\n| `HTTPS_PROXY` / `HTTP_PROXY` | Honored via Bun's native fetch proxy support (the provider no longer ships an AWS SDK / proxy-agent transport) |\n| `NO_PROXY` | Excludes matching hosts from Bun's native proxy routing |\n\nRegion fallback in provider code: `options.region` → `AWS_REGION` → `AWS_DEFAULT_REGION` → `us-east-1`.\n\n### Azure OpenAI Responses\n\n| Variable | Default / behavior |\n| ---------------------------------- | --------------------------------------------------------------------------- |\n| `AZURE_OPENAI_API_KEY` | Required unless API key passed as option |\n| `AZURE_OPENAI_API_VERSION` | Default `v1` |\n| `AZURE_OPENAI_BASE_URL` | Direct base URL override |\n| `AZURE_OPENAI_RESOURCE_NAME` | Used to construct base URL: `https://<resource>.openai.azure.com/openai/v1` |\n| `AZURE_OPENAI_DEPLOYMENT_NAME_MAP` | Optional mapping string: `modelId=deploymentName,model2=deployment2` |\n\nBase URL resolution: option `azureBaseUrl` → env `AZURE_OPENAI_BASE_URL` → option/env resource name → `model.baseUrl`.\n\n### Google Vertex AI\n\n| Variable | Required? | Notes |\n| -------------------------------- | ------------------------------ | ------------------------------------------------------------------------------------------------------------------------- |\n| `GOOGLE_CLOUD_PROJECT` | Yes (unless passed in options) | Primary project ID source |\n| `GCP_PROJECT` | Fallback | Alternate project ID source |\n| `GCLOUD_PROJECT` | Fallback | Alternate project ID source |\n| `GOOGLE_CLOUD_PROJECT_ID` | OAuth login helper only | Used by Gemini CLI OAuth project discovery |\n| `GOOGLE_VERTEX_LOCATION` | Yes (unless passed in options) | Primary Vertex location source |\n| `GOOGLE_CLOUD_LOCATION` | Fallback | Alternate Vertex location source |\n| `VERTEX_LOCATION` | Fallback | Alternate Vertex location source |\n| `GOOGLE_CLOUD_API_KEY` | Conditional | Direct Vertex API-key auth; otherwise ADC fallback can authenticate when project and location are set |\n| `GOOGLE_APPLICATION_CREDENTIALS` | Conditional | If set, file must exist; otherwise ADC fallback path is checked (`~/.config/gcloud/application_default_credentials.json`) |\n\n### Kimi\n\n| Variable | Default / behavior |\n| ---------------------- | -------------------------------------------------------- |\n| `KIMI_CODE_OAUTH_HOST` | Primary OAuth host override |\n| `KIMI_OAUTH_HOST` | Fallback OAuth host override |\n| `KIMI_CODE_BASE_URL` | Overrides Kimi usage endpoint base URL (`usage/kimi.ts`) |\n\nOAuth host chain: `KIMI_CODE_OAUTH_HOST` → `KIMI_OAUTH_HOST` → `https://auth.kimi.com`.\n\n### Gemini CLI compatibility\n\n| Variable | Default / behavior |\n| -------------------------- | --------------------------------------------------------------- |\n| `PI_AI_GEMINI_CLI_VERSION` | Overrides Gemini CLI user-agent version tag (`0.35.3` if unset) |\n\n### OpenAI Codex responses (feature/debug controls)\n\n| Variable | Behavior |\n| ------------------------------------------ | ---------------------------------------------------- |\n| `PI_CODEX_DEBUG` | `1`/`true` enables Codex provider debug logging |\n| `PI_CODEX_WEBSOCKET` | `1`/`true` enables websocket transport preference |\n| `PI_OPENAI_STATEFUL` | Overrides the stateful-chaining default for the platform OpenAI Responses API (`previous_response_id`, forces `store: true`): on by default against api.openai.com, off elsewhere |\n| `PI_CODEX_WEBSOCKET_IDLE_TIMEOUT_MS` | Positive integer override (default 300000) |\n| `PI_CODEX_WEBSOCKET_RETRY_BUDGET` | Non-negative integer override (default 5) |\n| `PI_CODEX_WEBSOCKET_RETRY_DELAY_MS` | Positive integer base backoff override (default 500) |\n| `PI_OPENAI_STREAM_FIRST_EVENT_TIMEOUT_MS` | Positive integer OpenAI first-event timeout override |\n| `PI_OPENAI_STREAM_IDLE_TIMEOUT_MS` | Positive integer OpenAI stream idle timeout override |\n\n### Cursor provider debug\n\n| Variable | Behavior |\n| ------------------ | ------------------------------------------------------------------------ |\n| `DEBUG_CURSOR` | Enables provider debug logs; `2`/`verbose` for detailed payload snippets |\n| `DEBUG_CURSOR_LOG` | Optional file path for JSONL debug log output |\n\n### Prompt cache compatibility switch\n\n| Variable | Behavior |\n| -------------------- | ----------------------------------------------------------------------------------------------------------------- |\n| `PI_CACHE_RETENTION` | If `long`, enables long retention where supported (`anthropic`, `openai-responses`, Bedrock retention resolution) |\n\n---\n\n## 3) Web search subsystem\n\n### Search provider credentials\n\n| Variable | Used by |\n| --------------------------------------------------- | ------------------------------------------------------------- |\n| `EXA_API_KEY` | Exa search provider and Exa MCP tools |\n| `BRAVE_API_KEY` | Brave search provider |\n| `PERPLEXITY_API_KEY` | Perplexity search provider API-key mode |\n| `PERPLEXITY_COOKIES` | Perplexity cookie-auth search mode |\n| `TAVILY_API_KEY` | Tavily search provider |\n| `ZAI_API_KEY` | z.ai search provider (also checks stored OAuth in `agent.db`) |\n| `OPENAI_API_KEY` / Codex OAuth in DB | Codex search provider availability/auth |\n| `PI_CODEX_WEB_SEARCH_MODEL` | Codex search provider model override |\n| `MOONSHOT_SEARCH_API_KEY` / `KIMI_SEARCH_API_KEY` | Kimi/Moonshot search provider env auth |\n| `MOONSHOT_SEARCH_BASE_URL` / `KIMI_SEARCH_BASE_URL` | Kimi/Moonshot search endpoint override |\n| `KAGI_API_KEY` | Kagi search provider |\n| `JINA_API_KEY` | Jina search provider |\n| `PARALLEL_API_KEY` | Parallel search provider |\n| `SEARXNG_ENDPOINT`, `SEARXNG_TOKEN` | SearXNG endpoint and optional bearer token |\n| `SEARXNG_BASIC_USERNAME`, `SEARXNG_BASIC_PASSWORD` | SearXNG HTTP Basic Auth credentials |\n\nSearXNG also reads the equivalent `searxng.endpoint`, `searxng.token`, `searxng.basicUsername`, and `searxng.basicPassword` settings from `~/.omp/agent/config.yml`; environment variables are fallbacks.\n\n### Anthropic web search auth chain\n\n`searchAnthropic()` resolves credentials in this order:\n\n1. `ANTHROPIC_SEARCH_API_KEY`\n2. `authStorage.getApiKey(\"anthropic\")` fallback credentials (runtime/config overrides, stored API-key credentials, stored OAuth credentials, then generic Anthropic env fallback: `ANTHROPIC_FOUNDRY_API_KEY` in Foundry mode, otherwise `ANTHROPIC_OAUTH_TOKEN` / `ANTHROPIC_API_KEY`)\n\nFor either credential path, base URL resolution is:\n\n1. `ANTHROPIC_SEARCH_BASE_URL`\n2. `FOUNDRY_BASE_URL` when `CLAUDE_CODE_USE_FOUNDRY` is enabled\n3. `ANTHROPIC_BASE_URL`\n4. `https://api.anthropic.com`\n\nRelated vars:\n\n| Variable | Default / behavior |\n| --------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| `ANTHROPIC_SEARCH_API_KEY` | API key used exclusively for the Anthropic web search provider. Highest-priority search auth; overrides `ANTHROPIC_API_KEY` / OAuth / Foundry for search calls without affecting chat completions. |\n| `ANTHROPIC_SEARCH_BASE_URL` | Base URL used exclusively for the Anthropic web search provider. Applied to either `ANTHROPIC_SEARCH_API_KEY` or fallback Anthropic credentials; overrides `ANTHROPIC_BASE_URL` (and `FOUNDRY_BASE_URL` in Foundry mode) for search calls. |\n| `ANTHROPIC_SEARCH_MODEL` | Search model override. Defaults to `claude-haiku-4-5`. |\n| `ANTHROPIC_BASE_URL` | Generic fallback base URL for Anthropic requests when no search-specific base URL is set. |\n\nUse `ANTHROPIC_SEARCH_BASE_URL` (optionally with `ANTHROPIC_SEARCH_API_KEY`) to keep chat routed through an enterprise gateway (`ANTHROPIC_BASE_URL` or `CLAUDE_CODE_USE_FOUNDRY=true`) while pointing web search at a direct Anthropic endpoint, or vice versa.\n\n### Perplexity OAuth flow behavior flag\n\n| Variable | Behavior |\n| ------------------- | ------------------------------------------------------------------------------- |\n| `PI_AUTH_NO_BORROW` | If set, disables macOS native-app token borrowing path in Perplexity login flow |\n\n---\n\n## 4) Python tooling and kernel runtime\n\n| Variable | Default / behavior |\n| ----------------------- | ------------------------------------------------------------------------------------------------------------------- |\n| `PI_PY` | Boolean-like override for the Python eval backend: truthy (`1`/`true`/`yes`/`on`) enables, any other value disables; unset defers to the `eval.py` setting (default enabled) |\n| `PI_JS` | Same boolean-like override for the JavaScript eval backend; unset defers to the `eval.js` setting (default enabled) |\n| `PI_PYTHON_SKIP_CHECK` | If `1`, skips Python interpreter availability checks (subprocess runner still starts on demand) |\n| `PI_PYTHON_INTEGRATION` | If `1`, opts gated integration tests in (e.g. `python-runner.integration.test.ts`) into running against real Python |\n| `PI_PYTHON_IPC_TRACE` | If `1`, logs NDJSON frames exchanged with the Python runner subprocess |\n| `VIRTUAL_ENV` | Highest-priority venv path for Python runtime resolution |\n\nExtra conditional behavior:\n\n- If `BUN_ENV=test` or `NODE_ENV=test`, Python availability checks are treated as OK and warming is skipped.\n- Python env filtering denies common API keys and allows safe base vars + `LC_`, `XDG_`, `PI_` prefixes.\n\n---\n\n## 5) Agent/runtime behavior toggles\n\n| Variable | Default / behavior |\n| ---------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| `PI_SMOL_MODEL` | Ephemeral model-role override for `smol` (CLI `--smol` takes precedence) |\n| `PI_SLOW_MODEL` | Ephemeral model-role override for `slow` (CLI `--slow` takes precedence) |\n| `PI_PLAN_MODEL` | Ephemeral model-role override for `plan` (CLI `--plan` takes precedence) |\n| `PI_NO_TITLE` | If set (any non-empty value), disables auto session title generation on first user message |\n| `PI_TINY_DEVICE` | ONNX execution provider for local tiny models; overrides the `providers.tinyModelDevice` setting (default: CPU; supports `cpu`, `gpu`, `metal`/`webgpu`, `auto`, `cuda`, `dml`, `coreml`, `wasm`, `webnn`, `webnn-gpu`, `webnn-cpu`, `webnn-npu`) |\n| `PI_TINY_DTYPE` | ONNX quantization/precision for local tiny models; overrides the `providers.tinyModelDtype` setting (default: each model's shipped dtype, currently `q4`; supports `auto`, `fp32`, `fp16`, `q8`, `int8`, `uint8`, `q4`, `bnb4`, `q4f16`, `q2`, `q2f16`, `q1`, `q1f16`) |\n| `PI_NO_INTERLEAVED_THINKING` | If `1`, disables Anthropic interleaved thinking budget behavior and uses output-token inflation for older thinking mode |\n| `NULL_PROMPT` | If `true`, system prompt builder returns empty string |\n| `PI_BLOCKED_AGENT` | Blocks a specific subagent type in task tool |\n| `PI_SUBPROCESS_CMD` | Overrides subagent spawn command (`omp` / `omp.cmd` resolution bypass) |\n| `PI_TASK_MAX_OUTPUT_BYTES` | Max captured output bytes per subagent (default `500000`) |\n| `PI_TASK_MAX_OUTPUT_LINES` | Max captured output lines per subagent (default `5000`) |\n| `PI_TIMING` | If set (any non-empty value), prints a hierarchical timing-span tree to **stderr** via `logger.printTimings()`. In interactive mode the tree prints once the agent is ready (before the TUI starts); in print mode it prints after the whole prompt batch completes. Print-mode prompts are wrapped in `print:prompt:initial` / `print:prompt:next` spans so each user message shows up as its own row. `PI_TIMING=x` exits the process with code 0 right after printing in interactive mode (use to measure cold startup only). `PI_TIMING=full` lists every module-load entry instead of just the top N. |\n| `PI_DEBUG_STARTUP` | If set (any non-empty value), streams one synchronous `[startup] <phase>:start` / `:done` marker line to **stderr** as each startup phase begins/ends — including command-module imports (`cli:load:<name>`) and the native addon extraction/`dlopen` (`native:*`). Unlike `PI_TIMING` (which prints only once startup completes), the markers survive a hard hang: the last line on stderr names the phase the process is stuck in. Combine with `PI_TIMING` freely; markers and the span tree share the same phase names. |\n| `PI_PACKAGE_DIR` | Overrides package asset base dir resolution (`docs/`, `examples/`, `CHANGELOG.md`) |\n| `PI_DISABLE_LSPMUX` | If `1`, disables lspmux detection/integration and forces direct LSP server spawning |\n| `PI_RPC_EMIT_TITLE` | Boolean-like flag enabling title events in RPC mode |\n| `SMITHERY_URL` | Smithery web URL override (default `https://smithery.ai`) |\n| `SMITHERY_API_URL` | Smithery API base URL override (default `https://api.smithery.ai`) |\n| `SMITHERY_API_KEY` | Smithery API key for managed MCP auth lookup |\n| `PUPPETEER_EXECUTABLE_PATH` | Browser tool Chromium executable override |\n| `LM_STUDIO_BASE_URL` | Default implicit LM Studio discovery base URL override (`http://127.0.0.1:1234/v1` if unset) |\n| `OLLAMA_BASE_URL` | Default implicit Ollama discovery base URL override (`OLLAMA_HOST` if unset, then `http://127.0.0.1:11434`) |\n| `OLLAMA_HOST` | Ollama host used for implicit Ollama discovery when `OLLAMA_BASE_URL` is unset; accepts Ollama-style values such as `127.0.0.1:11434` or `http://host:11434` |\n| `OLLAMA_CONTEXT_LENGTH` | Positive integer context-window override for implicit Ollama discovery; affects OMP context budgeting only and does not change Ollama's runtime `num_ctx` |\n| `LLAMA_CPP_BASE_URL` | Default implicit Llama.cpp discovery base URL override (`http://127.0.0.1:8080` if unset) |\n| `PI_EDIT_VARIANT` | Forces edit tool variant when valid (`patch`, `replace`, `hashline`, `apply_patch`) |\n| `PI_FORCE_IMAGE_PROTOCOL` | Forces supported image protocol (`kitty`, `iterm2`/`iterm`, `sixel`, `none`) where used |\n| `PI_ALLOW_SIXEL_PASSTHROUGH` | Allows SIXEL passthrough when `PI_FORCE_IMAGE_PROTOCOL=sixel` |\n| `PI_NO_PTY` | If `1`, disables interactive PTY path for bash tool |\n| `OMP_MCP_TIMEOUT_MS` | Overrides MCP client request timeout (ms) for every MCP server. `0` disables client-side timeouts (`AbortSignal` never fires). Invalid (negative or non-numeric) values are ignored with a warning and the per-server config or default (`30000`) is used. |\n\n`PI_NO_PTY` is also set internally when CLI `--no-pty` is used.\n\n---\n\n## 6) Storage and config root paths\n\nThese are consumed via `@oh-my-pi/pi-utils/dirs` and affect where coding-agent stores data.\n\n| Variable | Default / behavior |\n| --------------------- | ----------------------------------------------------------------------------- |\n| `PI_CONFIG_DIR` | Config root dirname under home (default `.omp`) |\n| `PI_CODING_AGENT_DIR` | Full override for agent directory (default `~/<PI_CONFIG_DIR or .omp>/agent`) |\n| `PWD` | Used when matching canonical current working directory in path helpers |\n\n---\n\n## 7) Shell/tool execution environment\n\n(From `packages/utils/src/procmgr.ts` and coding-agent bash tool integration.)\n\n| Variable | Behavior |\n| -------------------------- | ------------------------------------------------------------------------------ |\n| `PI_BASH_NO_CI` | Suppresses automatic `CI=true` injection into spawned shell env |\n| `CLAUDE_BASH_NO_CI` | Legacy alias fallback for `PI_BASH_NO_CI` |\n| `PI_BASH_NO_LOGIN` | Disables login-shell mode; shell args become `['-c']` instead of `['-l','-c']` |\n| `CLAUDE_BASH_NO_LOGIN` | Legacy alias fallback for `PI_BASH_NO_LOGIN` |\n| `PI_SHELL_PREFIX` | Optional command prefix wrapper |\n| `CLAUDE_CODE_SHELL_PREFIX` | Legacy alias fallback for `PI_SHELL_PREFIX` |\n| `VISUAL` | Preferred external editor command |\n| `EDITOR` | Fallback external editor command |\n\nCurrent implementation: `PI_BASH_NO_LOGIN`/`CLAUDE_BASH_NO_LOGIN` are active; when either is set, `getShellArgs()` returns `['-c']`.\n\n---\n\n## 8) UI/theme/session detection (auto-detected env)\n\nThese are read as runtime signals; they are usually set by the terminal/OS rather than manually configured.\n\n| Variable | Used for |\n| ------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------- |\n| `COLORTERM`, `TERM`, `WT_SESSION` | Color capability detection (theme color mode) |\n| `COLORFGBG` | Terminal background light/dark auto-detection |\n| `TERM_PROGRAM`, `TERM_PROGRAM_VERSION`, `TERMINAL_EMULATOR` | Terminal identity in system prompt/context |\n| `TMUX_PANE`, `CMUX_SURFACE_ID`, `KITTY_WINDOW_ID`, `TERM_SESSION_ID`, `WT_SESSION` | Stable per-terminal session breadcrumb IDs |\n| `SHELL`, `ComSpec`, `TERM_PROGRAM`, `TERM` | System info diagnostics |\n| `APPDATA`, `XDG_CONFIG_HOME` | lspmux config path resolution |\n| `HOME` | Path shortening in MCP command UI |\n\n---\n\n## 9) TUI runtime flags (shared package, affects coding-agent UX)\n\n| Variable | Behavior |\n| ------------------------- | ------------------------------------------------------------------------------------- |\n| `PI_NOTIFICATIONS` | `off` / `0` / `false` suppress desktop notifications |\n| `PI_TUI_WRITE_LOG` | If set, logs TUI writes to file |\n| `PI_HARDWARE_CURSOR` | If `1`, enables hardware cursor mode |\n| `PI_NO_SYNC_OUTPUT` | If set (any non-empty value), disables DEC 2026 synchronized-output wrappers while keeping TUI autowrap guards |\n| `PI_NO_DECCARA` | If set (truthy), disables Kitty DECCARA rectangular-SGR background fills (forces padded-string rendering) |\n| `PI_DEBUG_REDRAW` | If `1`, enables redraw debug logging |\n| `PI_FORCE_IMAGE_PROTOCOL` | Forces terminal image protocol detection (`kitty`, `iterm2`/`iterm`, `sixel`, `none`) |\n| `PI_TUI_RESIZE_IN_PLACE` | `1`/`true` force in-place resize (no alt-screen borrow, no ED3 rewrap); `0`/`false` force the alt-screen fast path. Default-on for Warp, which re-reports its size on alt-screen toggles |\n\n---\n\n## 10) Commit generation controls\n\n| Variable | Behavior |\n| ------------------------- | ------------------------------------------------------------------- |\n| `PI_COMMIT_TEST_FALLBACK` | If `true` (case-insensitive), force commit fallback generation path |\n| `PI_COMMIT_NO_FALLBACK` | If `true`, disables fallback when agent returns no proposal |\n| `PI_COMMIT_MAP_REDUCE` | If `false`, disables map-reduce commit analysis path |\n| `DEBUG` | If set, commit agent error stack traces are printed |\n\n---\n\n## Security-sensitive variables\n\nTreat these as secrets; do not log or commit them:\n\n- Provider/API keys and OAuth/bearer credentials (all `*_API_KEY`, `*_TOKEN`, OAuth access/refresh tokens)\n- Cloud credentials (`AWS_*`, `GOOGLE_APPLICATION_CREDENTIALS` path may expose service-account material)\n- Search/provider auth vars (`EXA_API_KEY`, `BRAVE_API_KEY`, `PERPLEXITY_API_KEY`, Anthropic search keys)\n- Foundry mTLS material (`CLAUDE_CODE_CLIENT_CERT`, `CLAUDE_CODE_CLIENT_KEY`, `NODE_EXTRA_CA_CERTS` when it points to private CA bundles)\n\nPython runtime also explicitly strips many common key vars before spawning kernel subprocesses (`packages/coding-agent/src/eval/py/runtime.ts`).\n",
20
+ "environment-variables.md": "# Environment Variables (Current Runtime Reference)\n\nThis reference is derived from current code paths in:\n\n- `packages/coding-agent/src/**`\n- `packages/ai/src/**` (provider/auth resolution used by coding-agent)\n- `packages/utils/src/**` and `packages/tui/src/**` where those vars directly affect coding-agent runtime\n\nIt documents only active behavior.\n\n## Resolution model and precedence\n\nMost runtime lookups use `$env` from `@oh-my-pi/pi-utils` (`packages/utils/src/env.ts`).\n\n`$env` loading order:\n\n1. Existing process environment (`Bun.env`)\n2. Project `.env` (`$PWD/.env`) for keys not already set\n3. Agent `.env` (`~/.omp/agent/.env`, respecting `PI_CONFIG_DIR` / `PI_CODING_AGENT_DIR`) for keys not already set\n4. Config-root `.env` (`~/.omp/.env`, respecting `PI_CONFIG_DIR`) for keys not already set\n5. Home `.env` (`~/.env`) for keys not already set\n\nAdditional rule inside each `.env` file: `OMP_*` keys are mirrored to `PI_*` keys in that parsed file.\n\n---\n\n## 1) Model/provider authentication\n\nThese are consumed via `getEnvApiKey()` (`packages/ai/src/stream.ts`) unless noted otherwise.\n\n### Core provider credentials\n\n| Variable | Used for | Required when | Notes / precedence |\n| ------------------------------- | ------------------------------------------------ | -------------------------------------------------------------- | --------------------------------------------------------------------------------------------------- |\n| `ANTHROPIC_OAUTH_TOKEN` | Anthropic API auth | Using Anthropic with OAuth token auth | Takes precedence over `ANTHROPIC_API_KEY` for provider auth resolution |\n| `ANTHROPIC_API_KEY` | Anthropic API auth | Using Anthropic without OAuth token | Fallback after `ANTHROPIC_OAUTH_TOKEN` |\n| `ANTHROPIC_FOUNDRY_API_KEY` | Anthropic via Azure Foundry / enterprise gateway | `CLAUDE_CODE_USE_FOUNDRY` enabled | Takes precedence over `ANTHROPIC_OAUTH_TOKEN` and `ANTHROPIC_API_KEY` when Foundry mode is enabled |\n| `OPENAI_API_KEY` | OpenAI auth | Using OpenAI-family providers without explicit apiKey argument | Used by OpenAI Completions/Responses providers |\n| `GEMINI_API_KEY` | Google Gemini auth | Using `google` provider models | Primary key for Gemini provider mapping |\n| `GOOGLE_API_KEY` | Gemini image tool auth fallback | Using `gemini_image` tool without `GEMINI_API_KEY` | Used by coding-agent image tool fallback path |\n| `GROQ_API_KEY` | Groq auth | Using Groq models | |\n| `CEREBRAS_API_KEY` | Cerebras auth | Using Cerebras models | |\n| `FIREWORKS_API_KEY` | Fireworks auth | Using Fireworks models | |\n| `FIREPASS_API_KEY` | Fire Pass auth | Using Fire Pass models | |\n| `TOGETHER_API_KEY` | Together auth | Using `together` provider | |\n| `AIMLAPI_API_KEY` | AIML API auth | Using `aimlapi` provider | OpenAI-compatible AIML API endpoint at `https://api.aimlapi.com/v1` |\n| `HUGGINGFACE_HUB_TOKEN` | Hugging Face auth | Using `huggingface` provider | Primary Hugging Face token env var |\n| `HF_TOKEN` | Hugging Face auth | Using `huggingface` provider | Fallback when `HUGGINGFACE_HUB_TOKEN` is unset |\n| `SYNTHETIC_API_KEY` | Synthetic auth | Using Synthetic models | |\n| `NVIDIA_API_KEY` | NVIDIA auth | Using `nvidia` provider | |\n| `NANO_GPT_API_KEY` | NanoGPT auth | Using `nanogpt` provider | |\n| `VENICE_API_KEY` | Venice auth | Using `venice` provider | |\n| `LITELLM_API_KEY` | LiteLLM auth | Using `litellm` provider | OpenAI-compatible LiteLLM proxy key |\n| `LM_STUDIO_API_KEY` | LM Studio auth (optional) | Using `lm-studio` provider with authenticated hosts | Local LM Studio usually runs without auth; any non-empty token works when a key is required |\n| `OLLAMA_API_KEY` | Ollama auth (optional) | Using `ollama` provider with authenticated hosts | Local Ollama usually runs without auth; any non-empty token works when a key is required |\n| `LLAMA_CPP_API_KEY` | llama.cpp auth (optional) | Using `llama.cpp` provider with authenticated hosts | Local llama.cpp usually runs without auth; any non-empty token works when a key is configured |\n| `XIAOMI_API_KEY` | Xiaomi MiMo auth | Using `xiaomi` provider | |\n| `XIAOMI_TOKEN_PLAN_AMS_API_KEY` | Xiaomi MiMo Token Plan auth (AMS) | Using `xiaomi-token-plan-ams` provider | |\n| `XIAOMI_TOKEN_PLAN_CN_API_KEY` | Xiaomi MiMo Token Plan auth (CN) | Using `xiaomi-token-plan-cn` provider | |\n| `XIAOMI_TOKEN_PLAN_SGP_API_KEY` | Xiaomi MiMo Token Plan auth (SGP) | Using `xiaomi-token-plan-sgp` provider | |\n| `MOONSHOT_API_KEY` | Moonshot auth | Using `moonshot` provider | |\n| `XAI_API_KEY` | xAI auth | Using xAI models or as fallback for `xai-oauth` | |\n| `XAI_OAUTH_TOKEN` | xAI OAuth/SuperGrok auth | Using `xai-oauth` provider | Takes precedence over `XAI_API_KEY` for `xai-oauth` |\n| `OPENROUTER_API_KEY` | OpenRouter auth | Using OpenRouter models | Also used by image tool when preferred/auto provider is OpenRouter |\n| `MISTRAL_API_KEY` | Mistral auth | Using Mistral models | |\n| `ZAI_API_KEY` | z.ai auth | Using z.ai models | Also used by z.ai web search provider |\n| `ZHIPU_API_KEY` | Zhipu Coding Plan auth | Using `zhipu-coding-plan` provider | |\n| `UMANS_AI_CODING_PLAN_API_KEY` | Umans AI Coding Plan auth | Using `umans` provider | |\n| `MINIMAX_API_KEY` | MiniMax auth | Using `minimax` provider | |\n| `MINIMAX_CODE_API_KEY` | MiniMax Code auth | Using `minimax-code` provider | |\n| `MINIMAX_CODE_CN_API_KEY` | MiniMax Code CN auth | Using `minimax-code-cn` provider | |\n| `OPENCODE_API_KEY` | OpenCode auth | Using `opencode-go` / `opencode-zen` models | |\n| `QIANFAN_API_KEY` | Qianfan auth | Using `qianfan` provider | |\n| `QWEN_OAUTH_TOKEN` | Qwen Portal auth | Using `qwen-portal` with OAuth token | Takes precedence over `QWEN_PORTAL_API_KEY` |\n| `QWEN_PORTAL_API_KEY` | Qwen Portal auth | Using `qwen-portal` with API key | Fallback after `QWEN_OAUTH_TOKEN` |\n| `ZENMUX_API_KEY` | ZenMux auth | Using `zenmux` provider | Used for ZenMux OpenAI and Anthropic-compatible routes |\n| `VLLM_API_KEY` | vLLM auth/discovery opt-in | Using `vllm` provider (local OpenAI-compatible servers) | Any non-empty value works for no-auth local servers |\n| `CURSOR_ACCESS_TOKEN` | Cursor provider auth | Using Cursor provider | |\n| `AI_GATEWAY_API_KEY` | Vercel AI Gateway auth | Using `vercel-ai-gateway` provider | |\n| `CLOUDFLARE_AI_GATEWAY_API_KEY` | Cloudflare AI Gateway auth | Using `cloudflare-ai-gateway` provider | Base URL must be configured as `https://gateway.ai.cloudflare.com/v1/<account>/<gateway>/anthropic` |\n| `ALIBABA_CODING_PLAN_API_KEY` | Alibaba Coding Plan auth | Using `alibaba-coding-plan` provider | |\n| `DEEPSEEK_API_KEY` | DeepSeek auth | Using DeepSeek models | |\n| `KILO_API_KEY` | Kilo auth | Using Kilo models | |\n| `OLLAMA_CLOUD_API_KEY` | Ollama Cloud auth | Using `ollama-cloud` provider | |\n| `WAFER_PASS_API_KEY` | Wafer Pass auth | Using `wafer-pass` provider | Flat-rate Wafer subscription; validated against `https://pass.wafer.ai/v1/models` |\n| `WAFER_SERVERLESS_API_KEY` | Wafer Serverless auth | Using `wafer-serverless` provider | Pay-as-you-go Wafer SKU; validated against `https://pass.wafer.ai/v1/models` |\n| `GITLAB_TOKEN` | GitLab Duo auth | Using `gitlab-duo` provider | |\n\n### GitHub/Copilot tokens\n\n| Variable | Used for | Notes |\n| ---------------------- | ------------------------------------------------ | ------------------------------------------ |\n| `COPILOT_GITHUB_TOKEN` | GitHub Copilot provider auth | Generic GitHub tokens are not used here |\n| `GH_TOKEN` | GitHub API auth in web scraper | Web scraper fallback after `GITHUB_TOKEN` |\n| `GITHUB_TOKEN` | GitHub API auth in web scraper | Web scraper checks this before `GH_TOKEN` |\n\n### Auth broker / auth gateway (remote credential vault)\n\nWhen the broker is enabled, the local SQLite credential store is bypassed and all OAuth refresh / access tokens live on the broker host. See [`auth-broker-gateway.md`](./auth-broker-gateway.md) for the full protocol, CLI surface, and 5-min/15-s usage cache layering.\n\n| Variable | Used for | Required when | Notes / precedence |\n| ----------------------- | -------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |\n| `OMP_AUTH_BROKER_URL` | Base URL of the remote auth-broker (e.g. `https://broker.tailnet:8765`); selects broker mode | Resolving credentials through a broker; also required by `omp auth-gateway serve` (the gateway is itself a broker client) | Wins over `auth.broker.url` in `config.yml`. When set with no resolvable token, `resolveAuthBrokerConfig()` hard-errors instead of falling back to local SQLite. |\n| `OMP_AUTH_BROKER_TOKEN` | Bearer token sent on every broker endpoint except `/v1/healthz` | `OMP_AUTH_BROKER_URL` is set and no token is available from `auth.broker.token` or `<config-dir>/auth-broker.token` | Resolution: this env → `auth.broker.token` (`$ENV_NAME` indirection supported) → `<config-dir>/auth-broker.token` (mode `0600`). `<config-dir>` is `~/.omp/` (respecting `PI_CONFIG_DIR`). |\n| `OMP_AUTH_BROKER_SNAPSHOT_TTL_MS` | Freshness window for the encrypted local broker snapshot cache | Optional in broker mode | Default `3600000` (1 h). Freshness is based on broker `snapshot.generatedAt`; `0` disables cache reads/writes and forces the old blocking fetch every startup. |\n| `OMP_AUTH_BROKER_SNAPSHOT_CACHE` | Path to the encrypted local broker snapshot cache | Optional in broker mode | Defaults to `~/.omp/cache/auth-broker-snapshot.enc` (or XDG cache equivalent). Useful for tests, ephemeral hosts, or relocating the `0600` cache file. |\n\nThe gateway has no dedicated env vars — it inherits `OMP_AUTH_BROKER_*`. Its own inbound bearer token lives at `<config-dir>/auth-gateway.token` and is managed via `omp auth-gateway token`.\n\n---\n\n## 2) Provider-specific runtime configuration\n\n### Anthropic Foundry Gateway (Azure / enterprise proxy)\n\nWhen `CLAUDE_CODE_USE_FOUNDRY` is enabled, Anthropic requests switch to Foundry mode:\n\n- Base URL resolves from `FOUNDRY_BASE_URL` (fallback remains model/default base URL if unset).\n- API key resolution for provider `anthropic` becomes:\n `ANTHROPIC_FOUNDRY_API_KEY` → `ANTHROPIC_OAUTH_TOKEN` → `ANTHROPIC_API_KEY`.\n- `ANTHROPIC_CUSTOM_HEADERS` is parsed as comma/newline-separated `key: value`\n pairs and merged into request headers. They are also forwarded when\n `ANTHROPIC_BASE_URL` points to a non-Anthropic host (e.g. a corporate API\n gateway), so enterprise gateways requiring proprietary auth headers work\n without enabling Foundry mode.\n- TLS client/server material can be injected from env values:\n `NODE_EXTRA_CA_CERTS`, `CLAUDE_CODE_CLIENT_CERT`, `CLAUDE_CODE_CLIENT_KEY`.\n Each accepts either:\n - a filesystem path to PEM content, or\n - inline PEM (including escaped `\\n` sequences).\n\n| Variable | Value type | Behavior |\n| --------------------------- | ---------------------------------------------- | ----------------------------------------------------------------------------- |\n| `CLAUDE_CODE_USE_FOUNDRY` | Boolean-like string (`1`, `true`, `yes`, `on`) | Enables Foundry mode for Anthropic provider |\n| `FOUNDRY_BASE_URL` | URL string | Anthropic endpoint base URL in Foundry mode |\n| `ANTHROPIC_FOUNDRY_API_KEY` | Token string | Used for `Authorization: Bearer <token>` |\n| `ANTHROPIC_CUSTOM_HEADERS` | Header list string | Extra headers; format `header-a: value, header-b: value` or newline-separated. Also forwarded outside Foundry whenever `ANTHROPIC_BASE_URL` is non-Anthropic. |\n| `NODE_EXTRA_CA_CERTS` | PEM path or inline PEM | Extra CA chain for server certificate validation |\n| `CLAUDE_CODE_CLIENT_CERT` | PEM path or inline PEM | mTLS client certificate |\n| `CLAUDE_CODE_CLIENT_KEY` | PEM path or inline PEM | mTLS client private key (must be paired with cert) |\n\n### Amazon Bedrock\n\n| Variable | Default / behavior |\n| ------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------- |\n| `AWS_REGION` | Primary region source |\n| `AWS_DEFAULT_REGION` | Fallback if `AWS_REGION` unset |\n| `AWS_PROFILE` | Enables named profile auth path |\n| `AWS_ACCESS_KEY_ID` + `AWS_SECRET_ACCESS_KEY` | Enables IAM key auth path |\n| `AWS_BEARER_TOKEN_BEDROCK` | Highest-precedence bearer token auth path; skips AWS profile/credential-chain lookup when set |\n| `AWS_CONTAINER_CREDENTIALS_RELATIVE_URI` / `AWS_CONTAINER_CREDENTIALS_FULL_URI` | Marks Bedrock as available in provider detection (credential resolution itself covers env keys, profiles/SSO/`credential_process`, then IMDSv2) |\n| `AWS_WEB_IDENTITY_TOKEN_FILE` + `AWS_ROLE_ARN` | Marks Bedrock as available in provider detection (same caveat as the ECS variables above) |\n| `AWS_BEDROCK_SKIP_AUTH` | If `1`, injects dummy credentials (proxy/non-auth scenarios) |\n| `HTTPS_PROXY` / `HTTP_PROXY` | Honored via Bun's native fetch proxy support (the provider no longer ships an AWS SDK / proxy-agent transport) |\n| `NO_PROXY` | Excludes matching hosts from Bun's native proxy routing |\n\nRegion fallback in provider code: `options.region` → `AWS_REGION` → `AWS_DEFAULT_REGION` → `us-east-1`.\n\n### Azure OpenAI Responses\n\n| Variable | Default / behavior |\n| ---------------------------------- | --------------------------------------------------------------------------- |\n| `AZURE_OPENAI_API_KEY` | Required unless API key passed as option |\n| `AZURE_OPENAI_API_VERSION` | Default `v1` |\n| `AZURE_OPENAI_BASE_URL` | Direct base URL override |\n| `AZURE_OPENAI_RESOURCE_NAME` | Used to construct base URL: `https://<resource>.openai.azure.com/openai/v1` |\n| `AZURE_OPENAI_DEPLOYMENT_NAME_MAP` | Optional mapping string: `modelId=deploymentName,model2=deployment2` |\n\nBase URL resolution: option `azureBaseUrl` → env `AZURE_OPENAI_BASE_URL` → option/env resource name → `model.baseUrl`.\n\n### Google Vertex AI\n\n| Variable | Required? | Notes |\n| -------------------------------- | ------------------------------ | ------------------------------------------------------------------------------------------------------------------------- |\n| `GOOGLE_CLOUD_PROJECT` | Yes (unless passed in options) | Primary project ID source |\n| `GCP_PROJECT` | Fallback | Alternate project ID source |\n| `GCLOUD_PROJECT` | Fallback | Alternate project ID source |\n| `GOOGLE_CLOUD_PROJECT_ID` | OAuth login helper only | Used by Gemini CLI OAuth project discovery |\n| `GOOGLE_VERTEX_LOCATION` | Yes (unless passed in options) | Primary Vertex location source |\n| `GOOGLE_CLOUD_LOCATION` | Fallback | Alternate Vertex location source |\n| `VERTEX_LOCATION` | Fallback | Alternate Vertex location source |\n| `GOOGLE_CLOUD_API_KEY` | Conditional | Direct Vertex API-key auth; otherwise ADC fallback can authenticate when project and location are set |\n| `GOOGLE_APPLICATION_CREDENTIALS` | Conditional | If set, file must exist; otherwise ADC fallback path is checked (`~/.config/gcloud/application_default_credentials.json`) |\n\n### Kimi\n\n| Variable | Default / behavior |\n| ---------------------- | -------------------------------------------------------- |\n| `KIMI_CODE_OAUTH_HOST` | Primary OAuth host override |\n| `KIMI_OAUTH_HOST` | Fallback OAuth host override |\n| `KIMI_CODE_BASE_URL` | Overrides Kimi usage endpoint base URL (`usage/kimi.ts`) |\n\nOAuth host chain: `KIMI_CODE_OAUTH_HOST` → `KIMI_OAUTH_HOST` → `https://auth.kimi.com`.\n\n### Gemini CLI compatibility\n\n| Variable | Default / behavior |\n| -------------------------- | --------------------------------------------------------------- |\n| `PI_AI_GEMINI_CLI_VERSION` | Overrides Gemini CLI user-agent version tag (`0.35.3` if unset) |\n\n### OpenAI Codex responses (feature/debug controls)\n\n| Variable | Behavior |\n| ------------------------------------------ | ---------------------------------------------------- |\n| `PI_CODEX_DEBUG` | `1`/`true` enables Codex provider debug logging |\n| `PI_CODEX_WEBSOCKET` | `1`/`true` enables websocket transport preference |\n| `PI_OPENAI_STATEFUL` | Overrides the stateful-chaining default for the platform OpenAI Responses API (`previous_response_id`, forces `store: true`): on by default against api.openai.com, off elsewhere |\n| `PI_CODEX_WEBSOCKET_IDLE_TIMEOUT_MS` | Positive integer override (default 300000) |\n| `PI_CODEX_WEBSOCKET_RETRY_BUDGET` | Non-negative integer override (default 5) |\n| `PI_CODEX_WEBSOCKET_RETRY_DELAY_MS` | Positive integer base backoff override (default 500) |\n| `PI_OPENAI_STREAM_FIRST_EVENT_TIMEOUT_MS` | Positive integer OpenAI first-event timeout override |\n| `PI_OPENAI_STREAM_IDLE_TIMEOUT_MS` | Positive integer OpenAI stream idle timeout override |\n\n### Cursor provider debug\n\n| Variable | Behavior |\n| ------------------ | ------------------------------------------------------------------------ |\n| `DEBUG_CURSOR` | Enables provider debug logs; `2`/`verbose` for detailed payload snippets |\n| `DEBUG_CURSOR_LOG` | Optional file path for JSONL debug log output |\n\n### Prompt cache compatibility switch\n\n| Variable | Behavior |\n| -------------------- | ----------------------------------------------------------------------------------------------------------------- |\n| `PI_CACHE_RETENTION` | If `long`, enables long retention where supported (`anthropic`, `openai-responses`, Bedrock retention resolution) |\n\n---\n\n## 3) Web search subsystem\n\n### Search provider credentials\n\n| Variable | Used by |\n| --------------------------------------------------- | ------------------------------------------------------------- |\n| `EXA_API_KEY` | Exa search provider and Exa MCP tools |\n| `BRAVE_API_KEY` | Brave search provider |\n| `PERPLEXITY_API_KEY` | Perplexity search provider API-key mode |\n| `PERPLEXITY_COOKIES` | Perplexity cookie-auth search mode |\n| `TAVILY_API_KEY` | Tavily search provider |\n| `ZAI_API_KEY` | z.ai search provider (also checks stored OAuth in `agent.db`) |\n| `OPENAI_API_KEY` / Codex OAuth in DB | Codex search provider availability/auth |\n| `PI_CODEX_WEB_SEARCH_MODEL` | Codex search provider model override |\n| `MOONSHOT_SEARCH_API_KEY` / `KIMI_SEARCH_API_KEY` | Kimi/Moonshot search provider env auth |\n| `MOONSHOT_SEARCH_BASE_URL` / `KIMI_SEARCH_BASE_URL` | Kimi/Moonshot search endpoint override |\n| `KAGI_API_KEY` | Kagi search provider |\n| `JINA_API_KEY` | Jina search provider |\n| `PARALLEL_API_KEY` | Parallel search provider |\n| `SEARXNG_ENDPOINT`, `SEARXNG_TOKEN` | SearXNG endpoint and optional bearer token |\n| `SEARXNG_BASIC_USERNAME`, `SEARXNG_BASIC_PASSWORD` | SearXNG HTTP Basic Auth credentials |\n\nSearXNG also reads the equivalent `searxng.endpoint`, `searxng.token`, `searxng.basicUsername`, and `searxng.basicPassword` settings from `~/.omp/agent/config.yml`; environment variables are fallbacks.\n\n### Anthropic web search auth chain\n\n`searchAnthropic()` resolves credentials in this order:\n\n1. `ANTHROPIC_SEARCH_API_KEY`\n2. `authStorage.getApiKey(\"anthropic\")` fallback credentials (runtime/config overrides, stored API-key credentials, stored OAuth credentials, then generic Anthropic env fallback: `ANTHROPIC_FOUNDRY_API_KEY` in Foundry mode, otherwise `ANTHROPIC_OAUTH_TOKEN` / `ANTHROPIC_API_KEY`)\n\nFor either credential path, base URL resolution is:\n\n1. `ANTHROPIC_SEARCH_BASE_URL`\n2. `FOUNDRY_BASE_URL` when `CLAUDE_CODE_USE_FOUNDRY` is enabled\n3. `ANTHROPIC_BASE_URL`\n4. `https://api.anthropic.com`\n\nRelated vars:\n\n| Variable | Default / behavior |\n| --------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| `ANTHROPIC_SEARCH_API_KEY` | API key used exclusively for the Anthropic web search provider. Highest-priority search auth; overrides `ANTHROPIC_API_KEY` / OAuth / Foundry for search calls without affecting chat completions. |\n| `ANTHROPIC_SEARCH_BASE_URL` | Base URL used exclusively for the Anthropic web search provider. Applied to either `ANTHROPIC_SEARCH_API_KEY` or fallback Anthropic credentials; overrides `ANTHROPIC_BASE_URL` (and `FOUNDRY_BASE_URL` in Foundry mode) for search calls. |\n| `ANTHROPIC_SEARCH_MODEL` | Search model override. Defaults to `claude-haiku-4-5`. |\n| `ANTHROPIC_BASE_URL` | Generic fallback base URL for Anthropic requests when no search-specific base URL is set. |\n\nUse `ANTHROPIC_SEARCH_BASE_URL` (optionally with `ANTHROPIC_SEARCH_API_KEY`) to keep chat routed through an enterprise gateway (`ANTHROPIC_BASE_URL` or `CLAUDE_CODE_USE_FOUNDRY=true`) while pointing web search at a direct Anthropic endpoint, or vice versa.\n\n### Perplexity OAuth flow behavior flag\n\n| Variable | Behavior |\n| ------------------- | ------------------------------------------------------------------------------- |\n| `PI_AUTH_NO_BORROW` | If set, disables macOS native-app token borrowing path in Perplexity login flow |\n\n---\n\n## 4) Python tooling and kernel runtime\n\n| Variable | Default / behavior |\n| ----------------------- | ------------------------------------------------------------------------------------------------------------------- |\n| `PI_PY` | Boolean-like override for the Python eval backend: truthy (`1`/`true`/`yes`/`on`) enables, any other value disables; unset defers to the `eval.py` setting (default enabled) |\n| `PI_JS` | Same boolean-like override for the JavaScript eval backend; unset defers to the `eval.js` setting (default enabled) |\n| `PI_PYTHON_SKIP_CHECK` | If `1`, skips Python interpreter availability checks (subprocess runner still starts on demand) |\n| `PI_PYTHON_INTEGRATION` | If `1`, opts gated integration tests in (e.g. `python-runner.integration.test.ts`) into running against real Python |\n| `PI_PYTHON_IPC_TRACE` | If `1`, logs NDJSON frames exchanged with the Python runner subprocess |\n| `VIRTUAL_ENV` | Highest-priority venv path for Python runtime resolution |\n\nExtra conditional behavior:\n\n- If `BUN_ENV=test` or `NODE_ENV=test`, Python availability checks are treated as OK and warming is skipped.\n- Python env filtering denies common API keys and allows safe base vars + `LC_`, `XDG_`, `PI_` prefixes.\n\n---\n\n## 5) Agent/runtime behavior toggles\n\n| Variable | Default / behavior |\n| ---------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| `PI_SMOL_MODEL` | Ephemeral model-role override for `smol` (CLI `--smol` takes precedence) |\n| `PI_SLOW_MODEL` | Ephemeral model-role override for `slow` (CLI `--slow` takes precedence) |\n| `PI_PLAN_MODEL` | Ephemeral model-role override for `plan` (CLI `--plan` takes precedence) |\n| `PI_NO_TITLE` | If set (any non-empty value), disables auto session title generation on first user message |\n| `PI_TINY_DEVICE` | ONNX execution provider for local tiny models; overrides the `providers.tinyModelDevice` setting (default: CPU; supports `cpu`, `gpu`, `metal`/`webgpu`, `auto`, `cuda`, `dml`, `coreml`, `wasm`, `webnn`, `webnn-gpu`, `webnn-cpu`, `webnn-npu`) |\n| `PI_TINY_DTYPE` | ONNX quantization/precision for local tiny models; overrides the `providers.tinyModelDtype` setting (default: each model's shipped dtype, currently `q4`; supports `auto`, `fp32`, `fp16`, `q8`, `int8`, `uint8`, `q4`, `bnb4`, `q4f16`, `q2`, `q2f16`, `q1`, `q1f16`) |\n| `PI_NO_INTERLEAVED_THINKING` | If `1`, disables Anthropic interleaved thinking budget behavior and uses output-token inflation for older thinking mode |\n| `NULL_PROMPT` | If `true`, system prompt builder returns empty string |\n| `PI_BLOCKED_AGENT` | Blocks a specific subagent type in task tool |\n| `PI_SUBPROCESS_CMD` | Overrides subagent spawn command (`omp` / `omp.cmd` resolution bypass) |\n| `PI_TASK_MAX_OUTPUT_BYTES` | Max captured output bytes per subagent (default `500000`) |\n| `PI_TASK_MAX_OUTPUT_LINES` | Max captured output lines per subagent (default `5000`) |\n| `PI_TIMING` | If set (any non-empty value), prints a hierarchical timing-span tree to **stderr** via `logger.printTimings()`. In interactive mode the tree prints once the agent is ready (before the TUI starts); in print mode it prints after the whole prompt batch completes. Print-mode prompts are wrapped in `print:prompt:initial` / `print:prompt:next` spans so each user message shows up as its own row. `PI_TIMING=x` exits the process with code 0 right after printing in interactive mode (use to measure cold startup only). `PI_TIMING=full` lists every module-load entry instead of just the top N. |\n| `PI_DEBUG_STARTUP` | If set (any non-empty value), streams one synchronous `[startup] <phase>:start` / `:done` marker line to **stderr** as each startup phase begins/ends — including command-module imports (`cli:load:<name>`) and the native addon extraction/`dlopen` (`native:*`). Unlike `PI_TIMING` (which prints only once startup completes), the markers survive a hard hang: the last line on stderr names the phase the process is stuck in. Combine with `PI_TIMING` freely; markers and the span tree share the same phase names. |\n| `PI_PACKAGE_DIR` | Overrides package asset base dir resolution (`docs/`, `examples/`, `CHANGELOG.md`) |\n| `PI_DISABLE_LSPMUX` | If `1`, disables lspmux detection/integration and forces direct LSP server spawning |\n| `PI_RPC_EMIT_TITLE` | Boolean-like flag enabling title events in RPC mode |\n| `SMITHERY_URL` | Smithery web URL override (default `https://smithery.ai`) |\n| `SMITHERY_API_URL` | Smithery API base URL override (default `https://api.smithery.ai`) |\n| `SMITHERY_API_KEY` | Smithery API key for managed MCP auth lookup |\n| `PUPPETEER_EXECUTABLE_PATH` | Browser tool Chromium executable override |\n| `LITELLM_BASE_URL` | LiteLLM proxy base URL fallback (`http://localhost:4000/v1` if unset); explicit `providers.litellm.baseUrl` / `models.yml` config wins |\n| `LM_STUDIO_BASE_URL` | Default implicit LM Studio discovery base URL override (`http://127.0.0.1:1234/v1` if unset) |\n| `OLLAMA_BASE_URL` | Default implicit Ollama discovery base URL override (`OLLAMA_HOST` if unset, then `http://127.0.0.1:11434`) |\n| `OLLAMA_HOST` | Ollama host used for implicit Ollama discovery when `OLLAMA_BASE_URL` is unset; accepts Ollama-style values such as `127.0.0.1:11434` or `http://host:11434` |\n| `OLLAMA_CONTEXT_LENGTH` | Positive integer context-window override for implicit Ollama discovery; affects OMP context budgeting only and does not change Ollama's runtime `num_ctx` |\n| `LLAMA_CPP_BASE_URL` | Default implicit Llama.cpp discovery base URL override (`http://127.0.0.1:8080` if unset) |\n| `PI_EDIT_VARIANT` | Forces edit tool variant when valid (`patch`, `replace`, `hashline`, `apply_patch`) |\n| `PI_FORCE_IMAGE_PROTOCOL` | Forces supported image protocol (`kitty`, `iterm2`/`iterm`, `sixel`, `none`) where used |\n| `PI_ALLOW_SIXEL_PASSTHROUGH` | Allows SIXEL passthrough when `PI_FORCE_IMAGE_PROTOCOL=sixel` |\n| `PI_NO_PTY` | If `1`, disables interactive PTY path for bash tool |\n| `OMP_MCP_TIMEOUT_MS` | Overrides MCP client request timeout (ms) for every MCP server. `0` disables client-side timeouts (`AbortSignal` never fires). Invalid (negative or non-numeric) values are ignored with a warning and the per-server config or default (`30000`) is used. |\n\n`PI_NO_PTY` is also set internally when CLI `--no-pty` is used.\n\n---\n\n## 6) Storage and config root paths\n\nThese are consumed via `@oh-my-pi/pi-utils/dirs` and affect where coding-agent stores data.\n\n| Variable | Default / behavior |\n| --------------------- | ----------------------------------------------------------------------------- |\n| `PI_CONFIG_DIR` | Config root dirname under home (default `.omp`) |\n| `PI_CODING_AGENT_DIR` | Full override for agent directory (default `~/<PI_CONFIG_DIR or .omp>/agent`) |\n| `PWD` | Used when matching canonical current working directory in path helpers |\n\n---\n\n## 7) Shell/tool execution environment\n\n(From `packages/utils/src/procmgr.ts` and coding-agent bash tool integration.)\n\n| Variable | Behavior |\n| -------------------------- | ------------------------------------------------------------------------------ |\n| `PI_BASH_NO_CI` | Suppresses automatic `CI=true` injection into spawned shell env |\n| `CLAUDE_BASH_NO_CI` | Legacy alias fallback for `PI_BASH_NO_CI` |\n| `PI_BASH_NO_LOGIN` | Disables login-shell mode; shell args become `['-c']` instead of `['-l','-c']` |\n| `CLAUDE_BASH_NO_LOGIN` | Legacy alias fallback for `PI_BASH_NO_LOGIN` |\n| `PI_SHELL_PREFIX` | Optional command prefix wrapper |\n| `CLAUDE_CODE_SHELL_PREFIX` | Legacy alias fallback for `PI_SHELL_PREFIX` |\n| `VISUAL` | Preferred external editor command |\n| `EDITOR` | Fallback external editor command |\n\nCurrent implementation: `PI_BASH_NO_LOGIN`/`CLAUDE_BASH_NO_LOGIN` are active; when either is set, `getShellArgs()` returns `['-c']`.\n\n---\n\n## 8) UI/theme/session detection (auto-detected env)\n\nThese are read as runtime signals; they are usually set by the terminal/OS rather than manually configured.\n\n| Variable | Used for |\n| ------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------- |\n| `COLORTERM`, `TERM`, `WT_SESSION` | Color capability detection (theme color mode) |\n| `COLORFGBG` | Terminal background light/dark auto-detection |\n| `TERM_PROGRAM`, `TERM_PROGRAM_VERSION`, `TERMINAL_EMULATOR` | Terminal identity in system prompt/context |\n| `TMUX_PANE`, `CMUX_SURFACE_ID`, `KITTY_WINDOW_ID`, `TERM_SESSION_ID`, `WT_SESSION` | Stable per-terminal session breadcrumb IDs |\n| `SHELL`, `ComSpec`, `TERM_PROGRAM`, `TERM` | System info diagnostics |\n| `APPDATA`, `XDG_CONFIG_HOME` | lspmux config path resolution |\n| `HOME` | Path shortening in MCP command UI |\n\n---\n\n## 9) TUI runtime flags (shared package, affects coding-agent UX)\n\n| Variable | Behavior |\n| ------------------------- | ------------------------------------------------------------------------------------- |\n| `PI_NOTIFICATIONS` | `off` / `0` / `false` suppress desktop notifications |\n| `PI_TUI_WRITE_LOG` | If set, logs TUI writes to file |\n| `PI_HARDWARE_CURSOR` | If `1`, enables hardware cursor mode |\n| `PI_NO_SYNC_OUTPUT` | If set (any non-empty value), disables DEC 2026 synchronized-output wrappers while keeping TUI autowrap guards |\n| `PI_NO_DECCARA` | If set (truthy), disables Kitty DECCARA rectangular-SGR background fills (forces padded-string rendering) |\n| `PI_DEBUG_REDRAW` | If `1`, enables redraw debug logging |\n| `PI_FORCE_IMAGE_PROTOCOL` | Forces terminal image protocol detection (`kitty`, `iterm2`/`iterm`, `sixel`, `none`) |\n| `PI_TUI_RESIZE_IN_PLACE` | `1`/`true` force in-place resize (no alt-screen borrow, no ED3 rewrap); `0`/`false` force the alt-screen fast path. Default-on for Warp, which re-reports its size on alt-screen toggles |\n\n---\n\n## 10) Commit generation controls\n\n| Variable | Behavior |\n| ------------------------- | ------------------------------------------------------------------- |\n| `PI_COMMIT_TEST_FALLBACK` | If `true` (case-insensitive), force commit fallback generation path |\n| `PI_COMMIT_NO_FALLBACK` | If `true`, disables fallback when agent returns no proposal |\n| `PI_COMMIT_MAP_REDUCE` | If `false`, disables map-reduce commit analysis path |\n| `DEBUG` | If set, commit agent error stack traces are printed |\n\n---\n\n## Security-sensitive variables\n\nTreat these as secrets; do not log or commit them:\n\n- Provider/API keys and OAuth/bearer credentials (all `*_API_KEY`, `*_TOKEN`, OAuth access/refresh tokens)\n- Cloud credentials (`AWS_*`, `GOOGLE_APPLICATION_CREDENTIALS` path may expose service-account material)\n- Search/provider auth vars (`EXA_API_KEY`, `BRAVE_API_KEY`, `PERPLEXITY_API_KEY`, Anthropic search keys)\n- Foundry mTLS material (`CLAUDE_CODE_CLIENT_CERT`, `CLAUDE_CODE_CLIENT_KEY`, `NODE_EXTRA_CA_CERTS` when it points to private CA bundles)\n\nPython runtime also explicitly strips many common key vars before spawning kernel subprocesses (`packages/coding-agent/src/eval/py/runtime.ts`).\n",
20
21
  "extension-loading.md": "# Extension Loading (TypeScript/JavaScript Modules)\n\nThis document covers how the coding agent discovers and loads **extension modules** (`.ts`/`.js`) at startup.\n\nIt does **not** cover `gemini-extension.json` manifest extensions (documented separately).\n\n## What this subsystem does\n\nExtension loading builds a list of module entry files, imports each module with Bun, executes its factory, and returns:\n\n- loaded extension definitions\n- per-path load errors (without aborting the whole load)\n- a shared extension runtime object used later by `ExtensionRunner`\n\n## Primary implementation files\n\n- `src/extensibility/extensions/loader.ts` — path discovery + import/execution\n- `src/extensibility/extensions/index.ts` — public exports\n- `src/extensibility/extensions/runner.ts` — runtime/event execution after load\n- `src/discovery/builtin.ts` — native auto-discovery provider for extension modules\n- `src/config/settings.ts` — loads merged `extensions` / `disabledExtensions` settings\n\n---\n\n## Inputs to extension loading\n\n### 1) Auto-discovered native extension modules\n\n`discoverAndLoadExtensions()` first asks discovery providers for `extension-module` capability items, then keeps only provider `native` items.\n\nNative `extension-module` discovery comes from:\n\n- Project directory: `<cwd>/.omp/extensions`\n- User directory: `~/.omp/agent/extensions`\n- Native legacy/settings JSON entries: `<cwd>/.omp/settings.json#extensions` and `~/.omp/agent/settings.json#extensions`\n\nThe project root is the native provider's `.omp` directory (`SOURCE_PATHS.native.projectDir`), cwd-only; it does not walk ancestors. The user root is the active profile's agent directory via `getAgentDir()`, so under `omp --profile <name>` it becomes `~/.omp/profiles/<name>/agent/extensions` (and it honors `PI_CODING_AGENT_DIR`). See [Profiles](./config-usage.md#profiles).\n\nNotes:\n\n- Native auto-discovery is currently `.omp` based.\n- Legacy `.pi` is still accepted in package manifests (`pi.extensions`) and project override lookup, but `.pi/extensions` is not a native root here.\n\n### 2) Discovered JS/TS hook factories\n\nAfter native auto-discovery, `discoverAndLoadExtensions()` also appends JS/TS hook factories from the `hook` capability — any hook whose entry path is a `.ts`/`.js` file — so they load through the same module pipeline.\n\nHook-capability loading already applies its own hook-specific disabled ids, so these paths are not additionally filtered by `disabledExtensions` extension-module names.\n\n### 3) Installed plugin extension entries\n\nAfter hook discovery, `discoverAndLoadExtensions()` appends extension entry points from enabled installed plugins via `getAllPluginExtensionPaths(cwd)`.\n\nPlugin extension entries come from package `omp.extensions` / `pi.extensions` manifests, including enabled feature entries.\n\n### 4) Explicitly configured paths\n\nAfter plugin extension entries, configured paths are appended and resolved.\n\nConfigured path sources in the main session startup path (`sdk.ts`):\n\n1. CLI-provided paths (`--extension/-e`, and `--hook` is also treated as an extension path)\n2. Merged settings `extensions` array\n\nSettings files:\n\n- User: `~/.omp/agent/config.yml` (or custom agent dir via `PI_CODING_AGENT_DIR`)\n- Project/native settings capability: `<cwd>/.omp/config.yml` and `<cwd>/.omp/settings.json`\n\nNative extension-module discovery also reads legacy JSON extension lists from:\n\n- `~/.omp/agent/settings.json`\n- `<cwd>/.omp/settings.json`\n\nExamples:\n\n```yaml\n# ~/.omp/agent/config.yml\nextensions:\n - ~/my-exts/safety.ts\n - ./local/ext-pack\n```\n\n```json\n{\n \"extensions\": [\"./.omp/extensions/my-extra\"]\n}\n```\n\n---\n\n## Enable/disable controls\n\n### Disable discovery\n\n- CLI: `--no-extensions`\n- SDK option: `disableExtensionDiscovery`\n\nBehavior split:\n\n- SDK: when `disableExtensionDiscovery=true`, it still loads `additionalExtensionPaths` via `loadExtensions()`.\n- CLI path building (`main.ts`) currently clears CLI extension paths when `--no-extensions` is set, so explicit `-e/--hook` are not forwarded in that mode.\n\n### Disable specific extension modules\n\n`disabledExtensions` setting filters by extension id format:\n\n- `extension-module:<derivedName>`\n\n`derivedName` is based on entry path (`getExtensionNameFromPath`), for example:\n\n- `/x/foo.ts` -> `foo`\n- `/x/bar/index.ts` -> `bar`\n\nExample:\n\n```yaml\ndisabledExtensions:\n - extension-module:foo\n```\n\n---\n\n## Path and entry resolution\n\n### Path normalization\n\nFor configured paths:\n\n1. Normalize unicode spaces\n2. Expand `~`\n3. If relative, resolve against current `cwd`\n\n### If configured path is a file\n\nIt is used directly as a module entry candidate.\n\n### If configured path is a directory\n\nResolution order:\n\n1. `package.json` in that directory with `omp.extensions` (or legacy `pi.extensions`) -> use declared entries\n2. `index.ts`\n3. `index.js`\n4. Otherwise scan one level for extension entries:\n - direct `*.ts` / `*.js`\n - subdir `index.ts` / `index.js`\n - subdir `package.json` with `omp.extensions` / `pi.extensions`\n\nRules and constraints:\n\n- no recursive discovery beyond one subdirectory level\n- declared `extensions` manifest entries are resolved relative to that package directory\n- declared entries are included only if file exists/access is allowed\n- in `*/index.{ts,js}` pairs, TypeScript is preferred over JavaScript\n- symlinks are treated as eligible files/directories\n\n### Ignore behavior differs by source\n\n- Native auto-discovery (`discoverExtensionModulePaths` in discovery helpers) uses native glob with `gitignore: true` and `hidden: false`.\n- Explicit configured directory scanning in `loader.ts` uses `readdir` rules and does **not** apply gitignore filtering.\n\n---\n\n## Load order and precedence\n\n`discoverAndLoadExtensions()` builds one ordered list and then calls `loadExtensions()`.\n\nOrder:\n\n1. Native auto-discovered modules\n2. Discovered JS/TS hook factories\n3. Installed plugin extension entries\n4. Explicit configured paths (in provided order)\n\nIn `sdk.ts`, configured order is:\n\n1. CLI additional paths\n2. Settings `extensions`\n\nDe-duplication:\n\n- absolute path based\n- first seen path wins\n- later duplicates are ignored\n\nImplication: if the same module path is both auto-discovered and explicitly configured, it is loaded once at the first position (auto-discovered stage).\n\n---\n\n## Module import and factory contract\n\nEach candidate path is loaded via `loadLegacyPiModule()` (`src/extensibility/plugins/legacy-pi-compat.ts`):\n\n- the entry's realpath is resolved, then dynamically imported with an `?mtime` cache-buster so edited source reloads\n- a scoped Bun `onLoad` hook rewrites legacy pi-package specifiers (`@mariozechner/*`, `@earendil-works/*`) and bare `@sinclair/typebox` onto the host-bundled copies before evaluation\n- factory is selected by `getExtensionFactory(module)`: the module itself if it is a function, otherwise `module.default`\n- factory must be a function (`ExtensionFactory`)\n\nIf export is not a function, that path fails with a structured error and loading continues.\n\n---\n\n## Failure handling and isolation\n\n### During loading\n\nPer extension path, failures are captured as `{ path, error }` and do not stop other paths from loading.\n\nCommon cases:\n\n- import failure / missing file\n- invalid factory export (non-function)\n- exception thrown while executing factory\n\n### Runtime isolation model\n\n- Extensions are **not sandboxed** (same process/runtime).\n- They share one `EventBus` and one `ExtensionRuntime` instance.\n- During load, runtime action methods intentionally throw `ExtensionRuntimeNotInitializedError`; action wiring happens later in `ExtensionRunner.initialize()`.\n\n### After loading\n\nWhen events run through `ExtensionRunner`, handler exceptions are caught and emitted as extension errors instead of crashing the runner loop.\n\n---\n\n## Minimal user/project layout examples\n\n### User-level\n\n```text\n~/.omp/agent/\n config.yml\n extensions/\n guardrails.ts\n audit/\n index.ts\n```\n\n### Project-level\n\n```text\n<repo>/\n .omp/\n settings.json\n extensions/\n checks/\n package.json\n lint-gates.ts\n```\n\n`checks/package.json`:\n\n```json\n{\n \"omp\": {\n \"extensions\": [\"./src/check-a.ts\", \"./src/check-b.js\"]\n }\n}\n```\n\nLegacy manifest key still accepted:\n\n```json\n{\n \"pi\": {\n \"extensions\": [\"./index.ts\"]\n }\n}\n```\n",
21
- "extensions.md": "# Extensions\n\nPrimary guide for authoring runtime extensions in `packages/coding-agent`.\n\nThis document covers the current extension runtime in:\n\n- `src/extensibility/extensions/types.ts`\n- `src/extensibility/extensions/runner.ts`\n- `src/extensibility/extensions/wrapper.ts`\n- `src/extensibility/extensions/index.ts`\n- `src/modes/controllers/extension-ui-controller.ts`\n\nFor discovery paths and filesystem loading rules, see [`extension-loading.md`](./extension-loading.md).\n\n## What an extension is\n\nAn extension is a TS/JS module exporting a default factory:\n\n```ts\nimport type { ExtensionAPI } from \"@oh-my-pi/pi-coding-agent\";\n\nexport default function myExtension(pi: ExtensionAPI) {\n // register handlers/tools/commands/renderers\n}\n```\n\nExtensions can combine all of the following in one module:\n\n- event handlers (`pi.on(...)`)\n- LLM-callable tools (`pi.registerTool(...)`)\n- slash commands (`pi.registerCommand(...)`)\n- keyboard shortcuts and flags\n- custom message rendering\n- session/message injection APIs (`sendMessage`, `sendUserMessage`, `appendEntry`)\n\n## Runtime model\n\n1. Extensions are imported and their factory functions run.\n2. During that load phase, registration methods are valid; runtime action methods are not yet initialized.\n3. `ExtensionRunner.initialize(...)` wires live actions/contexts for the active mode.\n4. Session/agent/tool lifecycle events are emitted to handlers.\n5. Every tool execution is wrapped with extension interception (`tool_call` / `tool_result`).\n\n```text\nExtension lifecycle (simplified)\n\nload paths\n │\n ▼\nimport module + run factory (registration only)\n │\n ▼\nExtensionRunner.initialize(mode/session/tool registry)\n │\n ├─ emit session/agent events to handlers\n ├─ wrap tool execution (tool_call/tool_result)\n └─ expose runtime actions (sendMessage, setActiveTools, ...)\n```\n\nImportant constraint from `loader.ts`:\n\n- calling action methods like `pi.sendMessage()` during extension load throws `ExtensionRuntimeNotInitializedError`\n- register first; perform runtime behavior from events/commands/tools\n\n## Quick start\n\n```ts\nimport type { ExtensionAPI } from \"@oh-my-pi/pi-coding-agent\";\n\nexport default function (pi: ExtensionAPI) {\n const { z } = pi.zod;\n\n pi.setLabel(\"Safety + Utilities\");\n\n pi.on(\"session_start\", async (_event, ctx) => {\n ctx.ui.notify(`Extension loaded in ${ctx.cwd}`, \"info\");\n });\n\n pi.on(\"tool_call\", async (event) => {\n if (event.toolName === \"bash\" && event.input.command?.includes(\"rm -rf\")) {\n return { block: true, reason: \"Blocked by extension policy\" };\n }\n });\n\n pi.registerTool({\n name: \"hello_extension\",\n label: \"Hello Extension\",\n description: \"Return a greeting\",\n parameters: z.object({ name: z.string() }),\n async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {\n return {\n content: [{ type: \"text\", text: `Hello, ${params.name}` }],\n details: { greeted: params.name },\n };\n },\n });\n\n pi.registerCommand(\"hello-ext\", {\n description: \"Show queue state\",\n handler: async (_args, ctx) => {\n ctx.ui.notify(`pending=${ctx.hasPendingMessages()}`, \"info\");\n },\n });\n}\n```\n\n## Extension API surfaces\n\n## 1) Registration and actions (`ExtensionAPI`)\n\nCore methods:\n\n- `on(event, handler)`\n- `registerTool`, `registerCommand`, `registerShortcut`, `registerFlag`\n- `registerMessageRenderer`, `registerAssistantThinkingRenderer`\n- `setLabel`, `getFlag`\n- `sendMessage`, `sendUserMessage`, `appendEntry`, `exec`\n- `getActiveTools`, `getAllTools`, `setActiveTools`\n- `getCommands`\n- `getSessionName`, `setSessionName`\n- `setModel`, `getThinkingLevel`, `setThinkingLevel`\n- `registerProvider`\n- `events` (shared event bus)\n\nIn interactive mode, `input` handlers run before the built-in first-message auto-title check. Extensions that call `await pi.setSessionName(...)` from `input` can set the persisted session name and prevent the default auto-generated title from running for that session.\n\nAlso exposed:\n\n- `pi.logger`\n- `pi.typebox` (zod-backed compatibility shim for legacy TypeBox-style schemas)\n- `pi.zod` (injected `zod/v4` module — canonical for tool parameter schemas)\n- `pi.pi` (package exports)\n\n### Message delivery semantics\n\n`pi.sendMessage(message, options)` supports:\n\n- `deliverAs: \"steer\"` (default) — interrupts current run\n- `deliverAs: \"followUp\"` — queued to run after current run\n- `deliverAs: \"nextTurn\"` — stored and injected on the next user prompt\n- `triggerTurn: true` — starts a turn when idle (also honored with `deliverAs: \"nextTurn\"`: idle prompts immediately; while streaming the queued message schedules an internal continuation)\n\n`pi.sendUserMessage(content, { deliverAs })` always goes through prompt flow; while streaming it queues as steer/follow-up.\n\n## 2) Handler context (`ExtensionContext`)\n\nHandlers and tool `execute` receive `ctx` with:\n\n- `ui`\n- `hasUI`\n- `cwd`\n- `sessionManager` (read-only)\n- `modelRegistry`, `model`\n- `models` (read-only model query — see below)\n- `getContextUsage()`\n- `compact(...)`\n- `isIdle()`, `hasPendingMessages()`, `abort()`\n- `shutdown()`\n- `getSystemPrompt()`\n- `memory` (optional structured memory runtime — status/search/save across the configured backend)\n\n### Model selection (`ctx.models`)\n\n`ctx.models` is a read-only facade for picking and comparing models the same way core does:\n\n- `list()` — authenticated models available this session.\n- `current()` — the live session model (read lazily, so it reflects `/model` switches).\n- `resolve(spec)` — a model string (`provider/id`, bare id) or role alias (`pi/slow`, a configured role) → `Model`, honoring the same settings-backed aliases and match preferences as `--model`. Returns `undefined` when nothing matches.\n- `family(model)` — an opaque lineage token for \"same family?\" checks (Claude point releases share a token; Claude and GPT differ). Compare it; don't persist it (the vocabulary tracks new releases).\n\n```ts\n// Pick a model from a different family than the current one (e.g. a cross-family reviewer).\nconst current = ctx.models.current();\nconst contrasting = ctx.models\n .list()\n .find(m => current && ctx.models.family(m) !== ctx.models.family(current));\n```\n\n## 3) Command context (`ExtensionCommandContext`)\n\nCommand handlers additionally get:\n\n- `waitForIdle()`\n- `newSession(...)`\n- `switchSession(...)`\n- `branch(entryId)`\n- `navigateTree(targetId, { summarize })`\n- `reload()`\n\nUse command context for session-control flows; these methods are intentionally separated from general event handlers.\n\n## Event surface (current names and behavior)\n\nCanonical event unions and payload types are in `types.ts`.\n\n### Session lifecycle\n\n- `session_start`\n- `session_before_switch` / `session_switch`\n- `session_before_branch` / `session_branch`\n- `session_before_compact` / `session.compacting` / `session_compact`\n- `session_before_tree` / `session_tree`\n- `session_shutdown`\n\nCancelable pre-events:\n\n- `session_before_switch` → `{ cancel?: boolean }`\n- `session_before_branch` → `{ cancel?: boolean; skipConversationRestore?: boolean }`\n- `session_before_compact` → `{ cancel?: boolean; compaction?: CompactionResult }`\n- `session_before_tree` → `{ cancel?: boolean; summary?: { summary: string; details?: unknown } }`\n\n### Prompt and turn lifecycle\n\n- `input`\n- `before_agent_start`\n- `before_provider_request` (may replace provider request payload)\n- `after_provider_response`\n- `context`\n- `agent_start` / `agent_end`\n- `turn_start` / `turn_end`\n- `message_start` / `message_update` / `message_end`\n\n### Tool lifecycle\n\n- `tool_call` (pre-exec, may block)\n- `tool_result` (post-exec, may patch content/details/isError)\n- `tool_execution_start` / `tool_execution_update` / `tool_execution_end` (observability)\n- `tool_approval_requested` / `tool_approval_resolved` (observability; emitted by `wrapper.ts` only when a tool requires approval and an approval handler is registered)\n\n`tool_result` is middleware-style: handlers run in extension order and each sees prior modifications.\n\n### Reliability/runtime signals\n\n- `auto_compaction_start` / `auto_compaction_end`\n- `auto_retry_start` / `auto_retry_end`\n- `ttsr_triggered`\n- `todo_reminder`\n- `goal_updated`\n- `credential_disabled`\n\n### User command interception\n\n- `user_bash` (override with `{ result }`)\n- `user_python` (override with `{ result }`)\n\n### `resources_discover`\n\n`resources_discover` exists in extension types and `ExtensionRunner`.\nCurrent runtime note: `ExtensionRunner.emitResourcesDiscover(...)` is implemented, but there are no `AgentSession` callsites invoking it in the current codebase.\n\n## Tool authoring details\n\n`registerTool` uses `ToolDefinition` from `types.ts`.\n\nCurrent `execute` signature:\n\n```ts\nexecute(\n\ttoolCallId,\n\tparams,\n\tsignal,\n\tonUpdate,\n\tctx,\n): Promise<AgentToolResult>\n```\n\nTemplate:\n\n```ts\nconst { z } = pi.zod;\n\npi.registerTool({\n name: \"my_tool\",\n label: \"My Tool\",\n description: \"...\",\n parameters: z.object({}),\n hidden: false,\n defaultInactive: false,\n deferrable: false,\n async execute(_id, _params, signal, onUpdate, ctx) {\n if (signal?.aborted) {\n return { content: [{ type: \"text\", text: \"Cancelled\" }] };\n }\n onUpdate?.({ content: [{ type: \"text\", text: \"Working...\" }] });\n return { content: [{ type: \"text\", text: \"Done\" }], details: {} };\n },\n onSession(event, ctx) {\n // reason: start|switch|branch|tree|shutdown\n },\n renderCall(args, options, theme) {\n // optional TUI render\n },\n renderResult(result, options, theme, args) {\n // optional TUI render\n },\n});\n```\n\n`tool_call`/`tool_result` intercept all tools once the registry is wrapped in `sdk.ts`, including built-ins and extension/custom tools. `ToolDefinition` also supports optional `hidden`, `defaultInactive`, `deferrable`, `approval`, `mcpServerName`, `mcpToolName`, `renderCall`, and `renderResult` fields.\n\n## UI integration points\n\n`ctx.ui` implements the `ExtensionUIContext` interface. Support differs by mode.\n\n### Interactive mode (`extension-ui-controller.ts`)\n\nSupported:\n\n- dialogs: `select`, `confirm`, `input`, `editor`\n- input editing: `setEditorText`, `getEditorText`, `pasteToEditor`, `editor`\n- terminal title and working message (`setTitle`, `setWorkingMessage`)\n- notifications/status/editor text/terminal input/custom overlays\n- theme listing/loading by name (`setTheme` supports string names)\n- tools expanded toggle\n\nCurrent no-op methods in this controller:\n\n- `setFooter`\n- `setHeader`\n\n`setEditorComponent` is wired to the live editor (`ctx.setEditorComponent(factory)`). `setWidget` renders real widget components above or below the editor via `setHookWidget(...)` (`placement: \"aboveEditor\" | \"belowEditor\"`; string-array content capped at 10 lines).\n\n### RPC mode (`rpc-mode.ts`)\n\n`ctx.ui` is backed by RPC `extension_ui_request` events:\n\n- dialog methods (`select`, `confirm`, `input`, `editor`) round-trip to client responses\n- fire-and-forget methods emit requests (`notify`, `setStatus`, `setWidget` for string arrays, `setEditorText`; `setTitle` emits only when `PI_RPC_EMIT_TITLE=1`)\n\nUnsupported/no-op in RPC implementation:\n\n- `onTerminalInput`\n- `custom`\n- `setFooter`, `setHeader`, `setEditorComponent`\n- `setWorkingMessage`\n- theme switching/loading (`setTheme` returns failure)\n- tool expansion controls are inert\n\n### Print/headless/subagent paths\n\nWhen no UI context is supplied to runner init, `ctx.hasUI` is `false` and methods are no-op/default-returning.\n\n### ACP mode\n\nACP installs an elicitation-bridged UI context (`createAcpExtensionUiContext` in `acp-agent.ts`). `ctx.hasUI` is `true` while only `select`/`confirm`/`input` round-trip (as ACP elicitations; defaults are returned when the client lacks the `elicitation.form` capability). The non-elicitation surface (widgets, editor, theming, terminal input) is stubbed no-op.\n\n## Session and state patterns\n\nFor durable extension state:\n\n1. Persist with `pi.appendEntry(customType, data)`.\n2. Rebuild state from `ctx.sessionManager.getBranch()` on `session_start`, `session_branch`, `session_tree`.\n3. Keep tool result `details` structured when state should be visible/reconstructible from tool result history.\n\nExample reconstruction pattern:\n\n```ts\npi.on(\"session_start\", async (_event, ctx) => {\n let latest;\n for (const entry of ctx.sessionManager.getBranch()) {\n if (entry.type === \"custom\" && entry.customType === \"my-state\") {\n latest = entry.data;\n }\n }\n // restore from latest\n});\n```\n\n## Rendering extension points\n\n## Custom message renderer\n\n```ts\npi.registerMessageRenderer(\"my-type\", (message, { expanded }, theme) => {\n // return pi-tui Component\n});\n```\n\nUsed by interactive rendering when custom messages are displayed.\n\n## Assistant thinking renderer\n\n```ts\nimport { Container, Text } from \"@oh-my-pi/pi-tui\";\n\npi.registerAssistantThinkingRenderer((context, theme) => {\n const container = new Container();\n container.addChild(new Text(theme.fg(\"dim\", `thinking chars: ${context.text.length}`), 1, 0));\n return container;\n});\n```\n\nUsed by interactive rendering to add display-only supplemental UI below each visible assistant thinking block. The renderer receives the already-visible thinking text, content/thinking indexes, theme, and a `requestRender()` callback for async renderers. All registered renderers that return a component are appended in registration order. Renderers must not mutate messages; the original thinking block remains the provider/session source of truth.\n\n## Tool call/result renderer\n\nProvide `renderCall` / `renderResult` on `registerTool` definitions for custom tool visualization in TUI.\n\n## Constraints and pitfalls\n\n- Runtime actions are unavailable during extension load.\n- `tool_call` errors block execution (fail-closed).\n- Command name conflicts with built-ins are skipped with diagnostics.\n- Reserved shortcuts are ignored (`ctrl+c`, `ctrl+d`, `ctrl+z`, `ctrl+k`, `ctrl+p`, `ctrl+l`, `ctrl+o`, `ctrl+t`, `ctrl+g`, `ctrl+q`, `alt+m`, `shift+tab`, `shift+ctrl+p`, `alt+enter`, `escape`, `enter`).\n- Treat `ctx.reload()` as terminal for the current command handler frame.\n\n## Extensions vs hooks vs custom-tools\n\nUse the right surface:\n\n- **Extensions** (`src/extensibility/extensions/*`): unified system (events + tools + commands + renderers + provider registration).\n- **Hooks** (`src/extensibility/hooks/*`): separate legacy event API.\n- **Custom-tools** (`src/extensibility/custom-tools/*`): tool-focused modules; when loaded alongside extensions they are adapted and still pass through extension interception wrappers.\n\nIf you need one package that owns policy, tools, command UX, and rendering together, use extensions.\n",
22
+ "extensions.md": "# Extensions\n\nPrimary guide for authoring runtime extensions in `packages/coding-agent`.\n\nThis document covers the current extension runtime in:\n\n- `src/extensibility/extensions/types.ts`\n- `src/extensibility/extensions/runner.ts`\n- `src/extensibility/extensions/wrapper.ts`\n- `src/extensibility/extensions/index.ts`\n- `src/modes/controllers/extension-ui-controller.ts`\n\nFor discovery paths and filesystem loading rules, see [`extension-loading.md`](./extension-loading.md).\n\n## What an extension is\n\nAn extension is a TS/JS module exporting a default factory:\n\n```ts\nimport type { ExtensionAPI } from \"@oh-my-pi/pi-coding-agent\";\n\nexport default function myExtension(pi: ExtensionAPI) {\n // register handlers/tools/commands/renderers\n}\n```\n\nExtensions can combine all of the following in one module:\n\n- event handlers (`pi.on(...)`)\n- LLM-callable tools (`pi.registerTool(...)`)\n- slash commands (`pi.registerCommand(...)`)\n- keyboard shortcuts and flags\n- custom message rendering\n- session/message injection APIs (`sendMessage`, `sendUserMessage`, `appendEntry`)\n\n## Runtime model\n\n1. Extensions are imported and their factory functions run.\n2. During that load phase, registration methods are valid; runtime action methods are not yet initialized.\n3. `ExtensionRunner.initialize(...)` wires live actions/contexts for the active mode.\n4. Session/agent/tool lifecycle events are emitted to handlers.\n5. Every tool execution is wrapped with extension interception (`tool_call` / `tool_result`).\n\n```text\nExtension lifecycle (simplified)\n\nload paths\n │\n ▼\nimport module + run factory (registration only)\n │\n ▼\nExtensionRunner.initialize(mode/session/tool registry)\n │\n ├─ emit session/agent events to handlers\n ├─ wrap tool execution (tool_call/tool_result)\n └─ expose runtime actions (sendMessage, setActiveTools, ...)\n```\n\nImportant constraint from `loader.ts`:\n\n- calling action methods like `pi.sendMessage()` during extension load throws `ExtensionRuntimeNotInitializedError`\n- register first; perform runtime behavior from events/commands/tools\n\n## Quick start\n\n```ts\nimport type { ExtensionAPI } from \"@oh-my-pi/pi-coding-agent\";\n\nexport default function (pi: ExtensionAPI) {\n const { z } = pi.zod;\n\n pi.setLabel(\"Safety + Utilities\");\n\n pi.on(\"session_start\", async (_event, ctx) => {\n ctx.ui.notify(`Extension loaded in ${ctx.cwd}`, \"info\");\n });\n\n pi.on(\"tool_call\", async (event) => {\n if (event.toolName === \"bash\" && event.input.command?.includes(\"rm -rf\")) {\n return { block: true, reason: \"Blocked by extension policy\" };\n }\n });\n\n pi.registerTool({\n name: \"hello_extension\",\n label: \"Hello Extension\",\n description: \"Return a greeting\",\n parameters: z.object({ name: z.string() }),\n async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {\n return {\n content: [{ type: \"text\", text: `Hello, ${params.name}` }],\n details: { greeted: params.name },\n };\n },\n });\n\n pi.registerCommand(\"hello-ext\", {\n description: \"Show queue state\",\n handler: async (_args, ctx) => {\n ctx.ui.notify(`pending=${ctx.hasPendingMessages()}`, \"info\");\n },\n });\n}\n```\n\n## Extension API surfaces\n\n## 1) Registration and actions (`ExtensionAPI`)\n\nCore methods:\n\n- `on(event, handler)`\n- `registerTool`, `registerCommand`, `registerShortcut`, `registerFlag`\n- `registerMessageRenderer`, `registerAssistantThinkingRenderer`\n- `setLabel`, `getFlag`\n- `sendMessage`, `sendUserMessage`, `appendEntry`, `exec`\n- `getActiveTools`, `getAllTools`, `setActiveTools`\n- `getCommands`\n- `getSessionName`, `setSessionName`\n- `setModel`, `getThinkingLevel`, `setThinkingLevel`\n- `registerProvider`\n- `events` (shared event bus)\n\nIn interactive mode, `input` handlers run before the built-in first-message auto-title check. Extensions that call `await pi.setSessionName(...)` from `input` can set the persisted session name and prevent the default auto-generated title from running for that session.\n\nAlso exposed:\n\n- `pi.logger`\n- `pi.typebox` (zod-backed compatibility shim for legacy TypeBox-style schemas)\n- `pi.zod` (injected `zod/v4` module — canonical for tool parameter schemas)\n- `pi.pi` (package exports)\n\n### Message delivery semantics\n\n`pi.sendMessage(message, options)` supports:\n\n- `deliverAs: \"steer\"` (default) — interrupts current run\n- `deliverAs: \"followUp\"` — queued to run after current run\n- `deliverAs: \"nextTurn\"` — stored and injected on the next user prompt\n- `triggerTurn: true` — starts a turn when idle (also honored with `deliverAs: \"nextTurn\"`: idle prompts immediately; while streaming the queued message schedules an internal continuation)\n\n`pi.sendUserMessage(content, { deliverAs })` always goes through prompt flow; while streaming it queues as steer/follow-up.\n\n## 2) Handler context (`ExtensionContext`)\n\nHandlers and tool `execute` receive `ctx` with:\n\n- `ui`\n- `hasUI`\n- `cwd`\n- `sessionManager` (read-only)\n- `modelRegistry`, `model`\n- `models` (read-only model query — see below)\n- `getContextUsage()`\n- `compact(...)`\n- `isIdle()`, `hasPendingMessages()`, `abort()`\n- `shutdown()`\n- `getSystemPrompt()`\n- `memory` (optional structured memory runtime — status/search/save across the configured backend)\n\n### Model selection (`ctx.models`)\n\n`ctx.models` is a read-only facade for picking and comparing models the same way core does:\n\n- `list()` — authenticated models available this session.\n- `current()` — the live session model (read lazily, so it reflects `/model` switches).\n- `resolve(spec)` — a model string (`provider/id`, bare id) or role alias (`pi/slow`, a configured role) → `Model`, honoring the same settings-backed aliases and match preferences as `--model`. Returns `undefined` when nothing matches.\n- `family(model)` — an opaque lineage token for \"same family?\" checks (Claude point releases share a token; Claude and GPT differ). Compare it; don't persist it (the vocabulary tracks new releases).\n\n```ts\n// Pick a model from a different family than the current one (e.g. a cross-family reviewer).\nconst current = ctx.models.current();\nconst contrasting = ctx.models\n .list()\n .find(m => current && ctx.models.family(m) !== ctx.models.family(current));\n```\n\n## 3) Command context (`ExtensionCommandContext`)\n\nCommand handlers additionally get:\n\n- `waitForIdle()`\n- `newSession(...)`\n- `switchSession(...)`\n- `branch(entryId)`\n- `navigateTree(targetId, { summarize })`\n- `reload()`\n\nUse command context for session-control flows; these methods are intentionally separated from general event handlers.\n\n## Event surface (current names and behavior)\n\nCanonical event unions and payload types are in `types.ts`.\n\n### Session lifecycle\n\n- `session_start`\n- `session_before_switch` / `session_switch`\n- `session_before_branch` / `session_branch`\n- `session_before_compact` / `session.compacting` / `session_compact`\n- `session_before_tree` / `session_tree`\n- `session_shutdown`\n\nCancelable pre-events:\n\n- `session_before_switch` → `{ cancel?: boolean }`\n- `session_before_branch` → `{ cancel?: boolean; skipConversationRestore?: boolean }`\n- `session_before_compact` → `{ cancel?: boolean; compaction?: CompactionResult }`\n- `session_before_tree` → `{ cancel?: boolean; summary?: { summary: string; details?: unknown } }`\n\n### Prompt and turn lifecycle\n\n- `input`\n- `before_agent_start`\n- `before_provider_request` (may replace provider request payload)\n- `after_provider_response`\n- `context`\n- `agent_start` / `agent_end` — agent loop lifecycle notification; `agent_end` remains notification-only\n- `session_stop` — main-session stop hook, awaited before settle; may continue with `{ continue: true, additionalContext }` or `{ decision: \"block\", reason }`; capped at 8 consecutive continuations and never fires for task/subagent sessions\n- `turn_start` / `turn_end`\n- `message_start` / `message_update` / `message_end`\n\n### Tool lifecycle\n\n- `tool_call` (pre-exec, may block)\n- `tool_result` (post-exec, may patch content/details/isError)\n- `tool_execution_start` / `tool_execution_update` / `tool_execution_end` (observability)\n- `tool_approval_requested` / `tool_approval_resolved` (observability; emitted by `wrapper.ts` only when a tool requires approval and an approval handler is registered)\n\n`tool_result` is middleware-style: handlers run in extension order and each sees prior modifications.\n\n### Reliability/runtime signals\n\n- `auto_compaction_start` / `auto_compaction_end`\n- `auto_retry_start` / `auto_retry_end`\n- `ttsr_triggered`\n- `todo_reminder`\n- `goal_updated`\n- `credential_disabled`\n\n### User command interception\n\n- `user_bash` (override with `{ result }`)\n- `user_python` (override with `{ result }`)\n\n### `resources_discover`\n\n`resources_discover` exists in extension types and `ExtensionRunner`.\nCurrent runtime note: `ExtensionRunner.emitResourcesDiscover(...)` is implemented, but there are no `AgentSession` callsites invoking it in the current codebase.\n\n## Tool authoring details\n\n`registerTool` uses `ToolDefinition` from `types.ts`.\n\nCurrent `execute` signature:\n\n```ts\nexecute(\n\ttoolCallId,\n\tparams,\n\tsignal,\n\tonUpdate,\n\tctx,\n): Promise<AgentToolResult>\n```\n\nTemplate:\n\n```ts\nconst { z } = pi.zod;\n\npi.registerTool({\n name: \"my_tool\",\n label: \"My Tool\",\n description: \"...\",\n parameters: z.object({}),\n hidden: false,\n defaultInactive: false,\n deferrable: false,\n async execute(_id, _params, signal, onUpdate, ctx) {\n if (signal?.aborted) {\n return { content: [{ type: \"text\", text: \"Cancelled\" }] };\n }\n onUpdate?.({ content: [{ type: \"text\", text: \"Working...\" }] });\n return { content: [{ type: \"text\", text: \"Done\" }], details: {} };\n },\n onSession(event, ctx) {\n // reason: start|switch|branch|tree|shutdown\n },\n renderCall(args, options, theme) {\n // optional TUI render\n },\n renderResult(result, options, theme, args) {\n // optional TUI render\n },\n});\n```\n\n`tool_call`/`tool_result` intercept all tools once the registry is wrapped in `sdk.ts`, including built-ins and extension/custom tools. `ToolDefinition` also supports optional `hidden`, `defaultInactive`, `deferrable`, `approval`, `mcpServerName`, `mcpToolName`, `renderCall`, and `renderResult` fields.\n\n## UI integration points\n\n`ctx.ui` implements the `ExtensionUIContext` interface. Support differs by mode.\n\n### Interactive mode (`extension-ui-controller.ts`)\n\nSupported:\n\n- dialogs: `select`, `confirm`, `input`, `editor`\n- input editing: `setEditorText`, `getEditorText`, `pasteToEditor`, `editor`\n- terminal title and working message (`setTitle`, `setWorkingMessage`)\n- notifications/status/editor text/terminal input/custom overlays\n- theme listing/loading by name (`setTheme` supports string names)\n- tools expanded toggle\n\nCurrent no-op methods in this controller:\n\n- `setFooter`\n- `setHeader`\n\n`setEditorComponent` is wired to the live editor (`ctx.setEditorComponent(factory)`). `setWidget` renders real widget components above or below the editor via `setHookWidget(...)` (`placement: \"aboveEditor\" | \"belowEditor\"`; string-array content capped at 10 lines).\n\n### RPC mode (`rpc-mode.ts`)\n\n`ctx.ui` is backed by RPC `extension_ui_request` events:\n\n- dialog methods (`select`, `confirm`, `input`, `editor`) round-trip to client responses\n- fire-and-forget methods emit requests (`notify`, `setStatus`, `setWidget` for string arrays, `setEditorText`; `setTitle` emits only when `PI_RPC_EMIT_TITLE=1`)\n\nUnsupported/no-op in RPC implementation:\n\n- `onTerminalInput`\n- `custom`\n- `setFooter`, `setHeader`, `setEditorComponent`\n- `setWorkingMessage`\n- theme switching/loading (`setTheme` returns failure)\n- tool expansion controls are inert\n\n### Print/headless/subagent paths\n\nWhen no UI context is supplied to runner init, `ctx.hasUI` is `false` and methods are no-op/default-returning.\n\n### ACP mode\n\nACP installs an elicitation-bridged UI context (`createAcpExtensionUiContext` in `acp-agent.ts`). `ctx.hasUI` is `true` while only `select`/`confirm`/`input` round-trip (as ACP elicitations; defaults are returned when the client lacks the `elicitation.form` capability). The non-elicitation surface (widgets, editor, theming, terminal input) is stubbed no-op.\n\n## Session and state patterns\n\nFor durable extension state:\n\n1. Persist with `pi.appendEntry(customType, data)`.\n2. Rebuild state from `ctx.sessionManager.getBranch()` on `session_start`, `session_branch`, `session_tree`.\n3. Keep tool result `details` structured when state should be visible/reconstructible from tool result history.\n\nExample reconstruction pattern:\n\n```ts\npi.on(\"session_start\", async (_event, ctx) => {\n let latest;\n for (const entry of ctx.sessionManager.getBranch()) {\n if (entry.type === \"custom\" && entry.customType === \"my-state\") {\n latest = entry.data;\n }\n }\n // restore from latest\n});\n```\n\n## Rendering extension points\n\n## Custom message renderer\n\n```ts\npi.registerMessageRenderer(\"my-type\", (message, { expanded }, theme) => {\n // return pi-tui Component\n});\n```\n\nUsed by interactive rendering when custom messages are displayed.\n\n## Assistant thinking renderer\n\n```ts\nimport { Container, Text } from \"@oh-my-pi/pi-tui\";\n\npi.registerAssistantThinkingRenderer((context, theme) => {\n const container = new Container();\n container.addChild(new Text(theme.fg(\"dim\", `thinking chars: ${context.text.length}`), 1, 0));\n return container;\n});\n```\n\nUsed by interactive rendering to add display-only supplemental UI below each visible assistant thinking block. The renderer receives the already-visible thinking text, content/thinking indexes, theme, and a `requestRender()` callback for async renderers. All registered renderers that return a component are appended in registration order. Renderers must not mutate messages; the original thinking block remains the provider/session source of truth.\n\n## Tool call/result renderer\n\nProvide `renderCall` / `renderResult` on `registerTool` definitions for custom tool visualization in TUI.\n\n## Constraints and pitfalls\n\n- Runtime actions are unavailable during extension load.\n- `tool_call` errors block execution (fail-closed).\n- Command name conflicts with built-ins are skipped with diagnostics.\n- Reserved shortcuts are ignored (`ctrl+c`, `ctrl+d`, `ctrl+z`, `ctrl+k`, `ctrl+p`, `ctrl+l`, `ctrl+o`, `ctrl+t`, `ctrl+g`, `ctrl+q`, `alt+m`, `shift+tab`, `shift+ctrl+p`, `alt+enter`, `escape`, `enter`).\n- Treat `ctx.reload()` as terminal for the current command handler frame.\n\n## Extensions vs hooks vs custom-tools\n\nUse the right surface:\n\n- **Extensions** (`src/extensibility/extensions/*`): unified system (events + tools + commands + renderers + provider registration).\n- **Hooks** (`src/extensibility/hooks/*`): separate legacy event API.\n- **Custom-tools** (`src/extensibility/custom-tools/*`): tool-focused modules; when loaded alongside extensions they are adapted and still pass through extension interception wrappers.\n\nIf you need one package that owns policy, tools, command UX, and rendering together, use extensions.\n",
22
23
  "fs-scan-cache-architecture.md": "# Filesystem Scan Cache Architecture Contract\n\nThis document defines the current contract for the shared filesystem scan cache implemented in Rust (`crates/pi-natives/src/fs_cache.rs`) and consumed by native discovery/search APIs exposed to `packages/coding-agent`.\n\n## What this cache is\n\nThe cache stores full directory-scan entry lists (`GlobMatch[]`) keyed by scan scope, traversal policy, and requested metadata detail. Higher-level operations (`glob` filtering, `fuzzyFind` scoring, and cached `grep` candidate selection) run against those cached entries.\n\nPrimary goals:\n\n- avoid repeated filesystem walks for repeated discovery/search calls\n- keep consistency across native discovery/search flows when they share the same scan policy\n- allow explicit staleness recovery for empty results and explicit invalidation after file mutations\n\n## Ownership and public surface\n\n- Cache implementation and policy: `crates/pi-natives/src/fs_cache.rs`\n- Native consumers:\n - `crates/pi-natives/src/glob.rs`\n - `crates/pi-natives/src/fd.rs` (`fuzzyFind`)\n - `crates/pi-natives/src/grep.rs` (cached directory mode only)\n - `crates/pi-natives/src/ast.rs` (`astGrep`/`astEdit` file discovery; always cached)\n- JS binding/export:\n - `packages/natives/native/index.d.ts` (`invalidateFsScanCache`)\n - `packages/natives/native/index.js`\n- Coding-agent mutation invalidation helpers:\n - `packages/coding-agent/src/tools/fs-cache-invalidation.ts`\n\n## Cache key partitioning (hard contract)\n\nEach entry is keyed by:\n\n- canonicalized `root` directory path\n- `include_hidden` boolean\n- `use_gitignore` boolean\n- `skip_node_modules` boolean\n- `detail` (`ScanDetail::Minimal` or `ScanDetail::Full`)\n\nImplications:\n\n- Hidden and non-hidden scans do **not** share entries.\n- Gitignore-respecting and ignore-disabled scans do **not** share entries.\n- Scans that prune `node_modules` do **not** share entries with scans that include it.\n- Minimal scans (path + file type only) do **not** share entries with full scans (mtime + regular-file size metadata).\n- `follow_links` is part of `ScanOptions` used to build the walker, but is not currently part of `CacheKey`; calls that differ only by `follow_links` can share a cache entry.\n\nConsumers must pass stable semantics for hidden/gitignore/node_modules/detail behavior; changing any keyed flag creates a different cache partition.\n\n## Scan collection behavior\n\nCache population uses `ignore::WalkBuilder` configured by `include_hidden`, `use_gitignore`, `skip_node_modules`, and `follow_links`:\n\n- sorted by file path\n- `.git` is always pruned\n- `node_modules` is pruned at traversal time when `skip_node_modules=true`\n- cancellation is checked before the walk and every 128 visited entries per parallel visitor\n- `ScanDetail::Minimal` records normalized relative path and file type only\n- `ScanDetail::Full` also records mtime and regular-file size\n\nSearch roots for cache scans are resolved by `fs_cache::resolve_search_path`:\n\n- relative paths are resolved against current cwd\n- target must be an existing directory\n- root is canonicalized when possible\n\n## Freshness and eviction policy\n\nGlobal policy (environment-overridable):\n\n- `FS_SCAN_CACHE_TTL_MS` (default `1000`)\n- `FS_SCAN_EMPTY_RECHECK_MS` (default `200`)\n- `FS_SCAN_CACHE_MAX_ENTRIES` (default `16`)\n\nBehavior:\n\n- `get_or_scan(...)`\n - if TTL is `0`: bypass cache entirely, always fresh scan (`cache_age_ms = 0`)\n - on cache hit within TTL: return cloned cached entries + non-zero `cache_age_ms`\n - on expired hit: evict key, rescan, store fresh entry\n- `force_rescan(..., store=false)`: remove any matching key, scan fresh, and do not repopulate cache\n- `force_rescan(..., store=true)`: remove any matching key, scan fresh, then store the new entry\n- max entry enforcement is oldest-first eviction by `created_at` after insert\n\n## Empty-result fast recheck (separate from normal hits)\n\nNormal cache hit:\n\n- a cache hit inside TTL returns cached entries and does nothing else.\n\nEmpty-result fast recheck:\n\n- this is a **caller-side** policy using `ScanResult.cache_age_ms`\n- if filtered/query result is empty and cached scan age is at least `empty_recheck_ms()`, caller performs one `force_rescan(..., store=true)` and retries\n- intended to reduce stale-negative results when files were added while the cache is still inside TTL\n\nCurrent consumers:\n\n- `glob`: rechecks when filtered matches are empty and scan age exceeds threshold\n- `fuzzyFind` (`fd.rs`): rechecks only when query is non-empty and scored matches are empty\n- `grep`: rechecks when cached directory candidate file list is empty\n- `astGrep`/`astEdit` (`ast.rs`): recheck when the candidate file list is empty\n\n## Consumer defaults and cache usage\n\nCache is opt-in on `glob`/`fuzzyFind`/`grep` (`cache?: boolean`, default `false`). `astGrep`/`astEdit` file discovery always uses the cache (there is no opt-in flag).\n\nCurrent defaults in native APIs:\n\n- `glob`: `hidden=false`, `gitignore=true`, `cache=false`; `node_modules` is included only when `includeNodeModules=true` or the pattern mentions `node_modules`; full detail is used only when `sortByMtime=true`\n- `fuzzyFind`: `hidden=false`, `gitignore=true`, `cache=false`, `node_modules` is skipped, `follow_links=true`, minimal detail\n- `grep`: `hidden=true`, `gitignore=true`, `cache=false`; cached directory mode skips `node_modules` unless the glob mentions `node_modules`; minimal detail\n- `astGrep`/`astEdit` (file discovery): `hidden=true`, `gitignore=true`, always cached; `node_modules` is skipped unless the glob mentions `node_modules`; `follow_links=false`; minimal detail\n\nCurrent callers:\n\n- `@`-mention fuzzy file autocomplete enables cache (`fuzzyFind` with `cache: true`):\n - `packages/tui/src/autocomplete.ts`\n- Mutation flows invalidate through `packages/coding-agent/src/tools/fs-cache-invalidation.ts`.\n- Tool-level search integration (`packages/coding-agent/src/tools/search.ts`) currently calls native `grep` with `cache: false`.\n\n## Invalidation contract\n\nNative invalidation entrypoint:\n\n- `invalidateFsScanCache(path?: string)`\n - with `path`: remove cache entries whose root is a prefix of the target path\n - without path: clear all scan cache entries\n\nPath handling details:\n\n- relative invalidation paths are resolved against cwd\n- invalidation attempts canonicalization\n- if target does not exist (for example after delete), fallback canonicalizes the parent and reattaches the filename when possible\n- this preserves invalidation behavior for create/delete/rename where one side may not exist\n\n## Coding-agent mutation flow responsibilities\n\nCoding-agent code must invalidate after successful filesystem mutations.\n\nCentral helpers:\n\n- `invalidateFsScanAfterWrite(path)`\n- `invalidateFsScanAfterDelete(path)`\n- `invalidateFsScanAfterRename(oldPath, newPath)` (invalidates both sides when paths differ)\n\nCurrent mutation callsites include:\n\n- `packages/coding-agent/src/tools/write.ts`\n- `packages/coding-agent/src/edit/hashline/filesystem.ts`\n- `packages/coding-agent/src/edit/modes/patch.ts`\n- `packages/coding-agent/src/edit/modes/replace.ts`\n\nRule: if a flow mutates filesystem content or location and bypasses these helpers, cache staleness bugs are expected.\n\n## Adding a new cache consumer safely\n\nWhen introducing cache use in a new scanner/search path:\n\n1. **Use stable scan policy inputs**\n - decide hidden/gitignore/node_modules/detail semantics first\n - pass them consistently to `get_or_scan`/`force_rescan` so cache partitions are intentional\n\n2. **Treat cache data as pre-filtered only by traversal policy**\n - apply tool-specific filtering (glob patterns, type filters, scoring) after retrieval\n - never assume cached entries already reflect your higher-level filters\n\n3. **Implement empty-result fast recheck only for stale-negative risk**\n - use `scan.cache_age_ms >= empty_recheck_ms()`\n - retry once with `force_rescan(..., store=true, ...)`\n - keep this path separate from normal cache-hit logic\n\n4. **Respect no-cache mode explicitly**\n - when caller disables cache, call `force_rescan(..., store=false, ...)` or use an uncached streaming walker\n - do not populate shared cache in a no-cache request path\n\n5. **Wire mutation invalidation for any new write path**\n - after successful write/edit/delete/rename, call the coding-agent invalidation helper\n - for rename/move, invalidate both old and new paths\n\n6. **Do not add per-call TTL knobs**\n - current contract is global policy only (env-configured), no per-request TTL override\n\n## Known boundaries\n\n- Cache scope is process-local in-memory (`DashMap`), not persisted across process restarts.\n- Cache stores scan entries, not final tool results.\n- `glob`/`fuzzyFind`/cached `grep`/`astGrep` share scan entries only when key dimensions (`root`, `hidden`, `gitignore`, `skip_node_modules`, `detail`) match.\n- `.git` is always excluded at scan collection time regardless of caller options.\n",
23
24
  "gemini-manifest-extensions.md": "# Gemini Manifest Extensions (`gemini-extension.json`)\n\nThis document covers how the coding-agent discovers and parses Gemini-style manifest extensions (`gemini-extension.json`) into the `extensions` capability.\n\nIt does **not** cover TypeScript/JavaScript extension module loading (`extensions/*.ts`, `index.ts`, `package.json omp.extensions`), which is documented in `extension-loading.md`.\n\n## Implementation files\n\n- [`packages/coding-agent/src/discovery/gemini.ts`](../packages/coding-agent/src/discovery/gemini.ts)\n- [`packages/coding-agent/src/discovery/builtin.ts`](../packages/coding-agent/src/discovery/builtin.ts)\n- [`packages/coding-agent/src/discovery/helpers.ts`](../packages/coding-agent/src/discovery/helpers.ts)\n- [`packages/coding-agent/src/capability/extension.ts`](../packages/coding-agent/src/capability/extension.ts)\n- [`packages/coding-agent/src/capability/index.ts`](../packages/coding-agent/src/capability/index.ts)\n- [`packages/coding-agent/src/extensibility/extensions/loader.ts`](../packages/coding-agent/src/extensibility/extensions/loader.ts)\n\n---\n\n## What gets discovered\n\nThe Gemini provider (`id: gemini`, priority `60`) registers an `extensions` loader that scans two fixed roots:\n\n- User: `~/.gemini/extensions`\n- Project: `<cwd>/.gemini/extensions`\n\nPath resolution is direct from `ctx.home` and `ctx.cwd` via `getUserPath()` / `getProjectPath()`.\n\nImportant scope rule: project lookup is **cwd-only**. It does not walk parent directories.\n\n---\n\n## Directory scan rules\n\nFor each root (`~/.gemini/extensions` and `<cwd>/.gemini/extensions`), discovery does:\n\n1. `readDirEntries(root)`\n2. keep only direct child directories (`entry.isDirectory()`)\n3. for each child `<name>`, attempt to read exactly:\n - `<root>/<name>/gemini-extension.json`\n\nThere is no recursive scan beyond one directory level.\n\n### Hidden directories\n\nGemini manifest discovery does **not** filter out dot-prefixed directory names. If a hidden child directory exists and contains `gemini-extension.json`, it is considered.\n\n### Missing/unreadable files\n\nIf `gemini-extension.json` is missing or unreadable, that directory is skipped silently (no warning).\n\n---\n\n## Manifest shape (as implemented)\n\nThe capability type defines this manifest shape:\n\n```ts\ninterface ExtensionManifest {\n name?: string;\n description?: string;\n mcpServers?: Record<string, Omit<MCPServer, \"name\" | \"_source\">>;\n tools?: unknown[];\n context?: unknown;\n}\n```\n\nDiscovery-time behavior is intentionally loose:\n\n- JSON parse success is required.\n- There is no runtime schema validation for field types/content beyond JSON syntax.\n- The parsed object is stored as `manifest` on the capability item.\n\n### Name normalization\n\n`Extension.name` is set to:\n\n1. `manifest.name` if it is not `null`/`undefined`\n2. otherwise the extension directory name\n\nNo string-type enforcement is applied here.\n\n---\n\n## Materialization into capability items\n\nA valid parsed manifest creates one `Extension` capability item:\n\n```ts\n{\n\tname: manifest.name ?? <directory-name>,\n\tpath: <extension-directory>,\n\tmanifest: <parsed-json>,\n\tlevel: \"user\" | \"project\",\n\t_source: {\n\t\tprovider: \"gemini\",\n\t\tproviderName: \"Gemini CLI\" // attached by capability registry\n\t\tpath: <absolute-manifest-path>,\n\t\tlevel: \"user\" | \"project\"\n\t}\n}\n```\n\nNotes:\n\n- `_source.path` is normalized to an absolute path by `createSourceMeta()`.\n- Registry-level capability validation for `extensions` only checks presence of `name` and `path`.\n- Manifest internals (`mcpServers`, `tools`, `context`) are not validated during discovery.\n\n---\n\n## Error handling and warning semantics\n\n### Warned\n\n- Invalid JSON in a manifest file:\n - warning format: `Invalid JSON in <manifestPath>`\n\n### Not warned (silent skip)\n\n- `extensions` directory missing\n- child directory has no `gemini-extension.json`\n- unreadable manifest file\n- manifest JSON is syntactically valid but semantically odd/incomplete\n\nThis means partial validity is accepted: only syntactic JSON failure emits a warning.\n\n---\n\n## Precedence and deduplication with other sources\n\n`extensions` capability is aggregated across providers by the capability registry.\n\nCurrent providers for this capability:\n\n- `native` (`packages/coding-agent/src/discovery/builtin.ts`) priority `100`\n- `gemini` (`packages/coding-agent/src/discovery/gemini.ts`) priority `60`\n\nDedup key is `ext.name` (`extensionCapability.key = ext => ext.name`).\n\n### Cross-provider precedence\n\nHigher-priority provider wins on duplicate extension names.\n\n- If `native` and `gemini` both emit extension name `foo`, the native item is kept.\n- Lower-priority duplicate is retained only in `result.all` with `_shadowed = true`.\n\n### Intra-provider order effects\n\nBecause dedup is “first seen wins”, provider-local item order matters.\n\n- Gemini loader appends **user first**, then **project**.\n- Therefore, duplicate names between `~/.gemini/extensions` and `<cwd>/.gemini/extensions` keep the user entry and shadow the project entry.\n\nBy contrast, native provider builds config dir order differently (`project` then `user` in `getConfigDirs()`), so native intra-provider shadowing is the opposite direction.\n\n---\n\n## User vs project behavior summary\n\nFor Gemini manifests specifically:\n\n- Both user and project roots are scanned every load.\n- Project root is fixed to `<cwd>/.gemini/extensions` (no ancestor walk).\n- Duplicate names inside Gemini source resolve to user-first.\n- Duplicate names against higher-priority providers (notably native) lose by priority.\n\n---\n\n## Boundary: discovery metadata vs runtime extension loading\n\n`gemini-extension.json` discovery currently feeds capability metadata (`Extension` items). It does **not** directly load runnable TS/JS extension modules.\n\nRuntime module loading (`discoverAndLoadExtensions()` / `loadExtensions()`) uses the `extension-module` capability and explicit paths, and currently filters auto-discovered modules to provider `native` only.\n\nPractical implication:\n\n- Gemini manifest extensions are discoverable as capability records.\n- They are not, by themselves, executed as runtime extension modules by the extension loader pipeline.\n\nThis boundary is intentional in current implementation and explains why manifest discovery and executable module loading can diverge.\n",
24
25
  "handoff-generation-pipeline.md": "# `/handoff` generation pipeline\n\nThis document describes how the coding-agent implements `/handoff`: trigger path, oneshot generation, session switch, context reinjection, persistence, and UI behavior.\n\n## Scope\n\nCovers:\n\n- Interactive `/handoff` command dispatch\n- `AgentSession.handoff()` lifecycle and state transitions\n- `generateHandoff(...)` request shape\n- How old/new sessions persist handoff data differently\n- UI behavior for success, cancel, and failure\n\nDoes not cover:\n\n- Generic tree navigation/branch internals\n- Non-handoff session commands (`/new`, `/fork`, `/resume`)\n\n## Implementation files\n\n- [`../src/modes/controllers/input-controller.ts`](../packages/coding-agent/src/modes/controllers/input-controller.ts)\n- [`../src/modes/controllers/command-controller.ts`](../packages/coding-agent/src/modes/controllers/command-controller.ts)\n- [`../src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts)\n- [`packages/agent/src/compaction/compaction.ts`](../packages/agent/src/compaction/compaction.ts)\n- [`../src/session/session-manager.ts`](../packages/coding-agent/src/session/session-manager.ts)\n- [`../src/slash-commands/builtin-registry.ts`](../packages/coding-agent/src/slash-commands/builtin-registry.ts)\n\n## Trigger path\n\n1. `/handoff` is declared in builtin slash command metadata (`slash-commands/builtin-registry.ts`) with optional inline hint: `[focus instructions]`.\n2. In interactive input handling (`InputController`), submit text matching `/handoff` or `/handoff ...` is intercepted before normal prompt submission.\n3. The editor is cleared and `handleHandoffCommand(customInstructions?)` is called.\n4. `CommandController.handleHandoffCommand` performs a preflight guard using current entries:\n - Counts `type === \"message\"` entries.\n - If `< 2`, it warns: `Nothing to hand off (no messages yet)` and returns.\n\nThe same minimum-content guard exists again inside `AgentSession.handoff()` and throws if violated. This duplicates safety at both UI and session layers.\n\n## End-to-end lifecycle\n\n### 1) Start handoff generation\n\n`AgentSession.handoff(customInstructions?)`:\n\n- Reads current branch entries (`sessionManager.getBranch()`).\n- Validates minimum message count (`>= 2`).\n- Creates `#handoffAbortController` and links any caller-provided abort signal to it.\n- Resolves the current model API key through `ModelRegistry`.\n- Calls `generateHandoff(...)` with:\n - live agent messages (`agent.state.messages`),\n - the current model and API key,\n - the base system prompt (`#baseSystemPrompt`),\n - the live tool array (`agent.state.tools`),\n - optional focus instructions,\n - coding-agent message conversion (`convertToLlm`),\n - provider metadata, current thinking level, and `initiatorOverride: \"agent\"`.\n\n`generateHandoff(...)` lives in `packages/agent/src/compaction/compaction.ts` next to summarization. It renders `packages/agent/src/compaction/prompts/handoff-document.md` via `renderHandoffPrompt(...)` with optional `additionalFocus`.\n\n### 2) Generate and capture output\n\n`generateHandoff(...)` converts the existing `AgentMessage[]` history to real LLM `Message[]` history, then appends one trailing agent-attributed `user` message containing the rendered handoff prompt.\n\nThe request uses `instrumentedCompleteSimple(...)` (the OTEL-instrumented `completeSimple` oneshot wrapper) directly:\n\n```ts\nawait instrumentedCompleteSimple(\n model,\n {\n systemPrompt,\n messages: requestMessages,\n tools,\n },\n {\n apiKey,\n signal,\n reasoning: resolveCompactionEffort(model, options.thinkingLevel),\n toolChoice: \"none\",\n initiatorOverride,\n metadata,\n },\n { telemetry, oneshotKind: \"handoff\" },\n);\n```\n\nImportant generation properties:\n\n- The request preserves the live provider cache prefix by reusing the same system prompt, tool definitions, and real message history shape as the active agent.\n- The handoff instruction is a trailing `user` message, not a developer message, so the cached prefix remains aligned with the prior turn.\n- `toolChoice: \"none\"` prevents intentional tool dispatch.\n- The returned assistant content is filtered to text blocks and joined with `\\n`; stray tool-call blocks are ignored if a provider does not honor `toolChoice: \"none\"`.\n- `stopReason === \"error\"` throws a generation error.\n\nNo agent-loop events are used for capture. The handoff path no longer waits for `agent_end` and no longer scans the latest assistant message.\n\n### 3) Cancellation checks\n\nCancellation throws `Error(\"Handoff cancelled\")`; a completed generation with no text returns `undefined`.\n\n- caller signal aborts `#handoffAbortController`\n- `completeSimple(...)` receives the abort signal\n- aborted handoff signal or provider `AbortError` is normalized to `Error(\"Handoff cancelled\")`\n- empty generated text returns `undefined`\n\n`AgentSession.handoff()` always clears `#handoffAbortController` in `finally`.\n\n### 4) New session creation\n\nIf text was generated and not aborted:\n\n1. Flush current session writer (`sessionManager.flush()`).\n2. Cancel session-owned async jobs.\n3. Start a brand-new session with `parentSession` pointing at the previous session file when one exists.\n4. Reset in-memory agent state (`agent.reset()`).\n5. Rebind `agent.sessionId` to the new session id.\n6. Rekey/reset Hindsight and Mnemopi memory session tracking for the new session.\n7. Clear the queued next-turn context array (`#pendingNextTurnMessages`) and the scheduled hidden next-turn generation (`#scheduledHiddenNextTurnGeneration`). The agent's steering and follow-up queues are already cleared by `agent.reset()` in step 4.\n8. Reset todo reminder counter.\n\n### 5) Handoff-context injection\n\nThe generated handoff document is wrapped by coding-agent session glue and appended to the new session as a `custom_message` entry:\n\n```text\n<handoff-context>\n...handoff text...\n</handoff-context>\n\nThe above is a handoff document from a previous session. Use this context to continue the work seamlessly.\n```\n\nInsertion call:\n\n```ts\nthis.sessionManager.appendCustomMessageEntry(\n \"handoff\",\n handoffContent,\n true,\n undefined,\n \"agent\",\n);\n```\n\nSemantics:\n\n- `customType`: `\"handoff\"`\n- `display`: `true` (visible in TUI rebuild)\n- attribution: `\"agent\"`\n- Entry type: `custom_message` (participates in LLM context)\n\n### 6) Rebuild active agent context\n\nAfter injection:\n\n1. `buildDisplaySessionContext()` resolves message list for current leaf.\n2. `agent.replaceMessages(sessionContext.messages)` makes the injected handoff message active context.\n3. Todo phases are synchronized from the new branch.\n4. Method returns `{ document: handoffText, savedPath? }`.\n\nAt this point, the active LLM context in the new session contains the injected handoff message, not the old transcript.\n\n## Persistence model: old session vs new session\n\n### Old session\n\nHandoff generation is a oneshot request, not a visible agent turn. The generated handoff text is not appended to the old session as an assistant message.\n\nResult: the original session keeps its prior transcript unchanged except for data already persisted before handoff began.\n\n### New session\n\nAfter session reset, handoff is persisted as `custom_message` with `customType: \"handoff\"`.\n\n`buildSessionContext()` converts this entry into a runtime custom/user-context message via `createCustomMessage(...)`, so it is included in future prompts from the new session.\n\nAuto-triggered handoffs can additionally write a timestamped `handoff-*.md` artifact under the session artifacts directory when `compaction.handoffSaveToDisk` is enabled. Manual `/handoff` does not write that artifact.\n\n## Controller/UI behavior\n\n`CommandController.handleHandoffCommand` behavior:\n\n- Shows a status loader: `Generating handoff… (esc to cancel)`.\n- Calls `await session.handoff(customInstructions)`.\n- If result is `undefined`: `showError(\"Handoff cancelled\")`.\n- On success:\n - `rebuildChatFromMessages()` (loads new session context, including injected handoff)\n - invalidates status line and editor top border\n - reloads todos\n - appends success chat line: `New session started with handoff context`\n- On exception:\n - if message is `\"Handoff cancelled\"` or error name is `AbortError`: `showError(\"Handoff cancelled\")`\n - otherwise: `showError(\"Handoff failed: <message>\")`\n- Stops the loader, clears the status container, and requests render at end.\n\nManual `/handoff` no longer streams the generated document into chat. A cancellable loader remains visible while the oneshot request runs, and the chat is rebuilt after generation completes.\n\n## Cancellation semantics\n\n### Session-level cancellation primitive\n\n`AgentSession` exposes:\n\n- `abortHandoff()` → aborts `#handoffAbortController`\n- `isGeneratingHandoff` → true while controller exists\n\nWhen this abort path is used, the abort signal is passed to `completeSimple(...)`; `handoff()` normalizes the cancellation to `Error(\"Handoff cancelled\")`, and command controller maps it to cancellation UI.\n\n### Interactive `/handoff` path\n\n`InputController`'s global `editor.onEscape` handler dispatches on live session state instead of swapping handlers: while `isGeneratingHandoff` is true, pressing Escape calls `session.abortHandoff()`, which aborts the `completeSimple(...)` request through `#handoffAbortController`.\n\n## Aborted vs failed handoff\n\nCurrent UI classification:\n\n- **Aborted/cancelled**\n - `abortHandoff()` path triggers `\"Handoff cancelled\"`, or\n - thrown `AbortError`\n - UI shows `Handoff cancelled`\n- **Failed**\n - any other thrown error from `handoff()` / `generateHandoff()` / provider request path\n - UI shows `Handoff failed: ...`\n\nAdditional nuance: if generation completes but no text is returned, `handoff()` returns `undefined` and controller currently reports **cancelled**, not **failed**.\n\n## Short-session and minimum-content guardrails\n\nTwo guards prevent low-signal handoffs:\n\n- UI layer (`handleHandoffCommand`): warns and returns early for `< 2` message entries\n- Session layer (`handoff()`): throws the same condition as an error\n\nThis avoids creating a new session with empty/near-empty handoff context.\n\n## State transition summary\n\nHigh-level state flow:\n\n1. Interactive slash command intercepted.\n2. Preflight message-count guard.\n3. `#handoffAbortController` created (`isGeneratingHandoff = true`).\n4. `generateHandoff(...)` issues one `instrumentedCompleteSimple(...)` request with live system prompt, tools, message history, current thinking level, and trailing handoff prompt.\n5. Assistant response text blocks are joined; tool-call blocks are discarded.\n6. If missing text → return `undefined`; if aborted → cancellation error path.\n7. If present:\n - flush old session\n - cancel async jobs\n - create new empty session with previous session as parent\n - reset runtime queues/counters\n - append `custom_message(handoff)`\n - optionally save an auto-triggered handoff document under the session artifacts directory when `compaction.handoffSaveToDisk` is enabled\n8. Controller rebuilds chat UI and announces success.\n9. `#handoffAbortController` cleared (`isGeneratingHandoff = false`).\n\n## Known assumptions and limitations\n\n- No structural validation checks that generated markdown follows the requested section format.\n- Missing generated text is reported as cancellation in controller UX.\n- Manual handoff has no streaming visibility; a cancellable loader is shown until the UI updates after generation completes.\n- Auto-triggered handoffs can write a timestamped `handoff-*.md` artifact when `compaction.handoffSaveToDisk` is enabled; write failure is logged and does not fail the handoff.\n",
25
26
  "hooks.md": "# Hooks\n\nThis document describes the **current hook subsystem code** in `src/extensibility/hooks/*`.\n\n## Current status in runtime\n\nThe default CLI runtime initializes the **extension runner** path. In current startup flow:\n\n- `--hook` is treated as an alias for `--extension` (CLI paths are merged into `additionalExtensionPaths`)\n- JS/TS hook factories discovered through `hookCapability` (for example `.omp/hooks/pre/*.ts`) are loaded as extension modules so their `pi.on(...)` handlers bind to the runtime event bus\n- tools are wrapped by `ExtensionToolWrapper`, not `HookToolWrapper`\n- context transforms and lifecycle emissions go through `ExtensionRunner`\n\nSo this file documents the legacy hook subsystem implementation itself (types/loader/runner/wrapper), plus the factory shape still accepted when a discovered hook path is loaded by the extension runner.\n\n## Key files\n\n- `src/extensibility/hooks/types.ts` — hook context, event types, and result contracts\n- `src/extensibility/hooks/loader.ts` — module loading and hook discovery bridge\n- `src/extensibility/hooks/runner.ts` — event dispatch, command lookup, error signaling\n- `src/extensibility/hooks/tool-wrapper.ts` — pre/post tool interception wrapper\n- `src/extensibility/hooks/index.ts` — exports/re-exports\n\n## What a hook module is\n\nA hook module must default-export a factory:\n\n```ts\nimport type { HookAPI } from \"@oh-my-pi/pi-coding-agent/extensibility/hooks\";\n\nexport default function hook(pi: HookAPI): void {\n pi.on(\"tool_call\", async (event, ctx) => {\n if (\n event.toolName === \"bash\" &&\n String(event.input.command ?? \"\").includes(\"rm -rf\")\n ) {\n return { block: true, reason: \"blocked by policy\" };\n }\n });\n}\n```\n\nThe factory can:\n\n- register event handlers with `pi.on(...)`\n- send persistent custom messages with `pi.sendMessage(...)`\n- persist non-LLM state with `pi.appendEntry(...)`\n- register slash commands via `pi.registerCommand(...)`\n- register custom message renderers via `pi.registerMessageRenderer(...)`\n- run shell commands via `pi.exec(...)`\n- author schemas/helpers with injected `pi.zod`, `pi.typebox`, and package exports via `pi.pi`\n\n## Discovery and loading\n\nDefault sessions load JS/TS hook factories discovered by `hookCapability` through the extension runner. `discoverExtensionPaths(configuredPaths, cwd)` does:\n\n1. Load native extension modules from the capability registry\n2. Load importable `.ts`/`.js` hook factories from the hook capability registry\n3. Append plugin extension entry points\n4. Append explicitly configured paths\n\nThe legacy `discoverAndLoadHooks(configuredPaths, cwd)` helper still exists and does:\n\n1. Load discovered hooks from capability registry (`loadCapability(\"hooks\")`)\n2. Append explicitly configured paths (deduped by absolute path)\n3. Call `loadHooks(allPaths, cwd)`\n\n`loadHooks` then imports each path and expects a `default` function.\n\n### Path resolution\n\n`loader.ts` resolves hook paths as:\n\n- absolute path: used as-is\n- `~` path: expanded\n- relative path: resolved against `cwd`\n\n## Event surfaces\n\nHook events are strongly typed in `types.ts`.\n\n### Session events\n\n- `session_start`\n- `session_before_switch` → can return `{ cancel?: boolean }`\n- `session_switch`\n- `session_before_branch` → can return `{ cancel?: boolean; skipConversationRestore?: boolean }`\n- `session_branch`\n- `session_before_compact` → can return `{ cancel?: boolean; compaction?: CompactionResult }`\n- `session.compacting` → can return `{ context?: string[]; prompt?: string; preserveData?: Record<string, unknown> }`\n- `session_compact`\n- `session_before_tree` → can return `{ cancel?: boolean; summary?: { summary: string; details?: unknown } }`\n- `session_tree`\n- `session_shutdown`\n\n### Agent/context events\n\n- `context` → can return `{ messages?: Message[] }`\n- `before_agent_start` → can return `{ message?: { customType; content; display; details; attribution } }`\n- `agent_start`\n- `agent_end`\n- `turn_start`\n- `turn_end`\n- `auto_compaction_start`\n- `auto_compaction_end`\n- `auto_retry_start`\n- `auto_retry_end`\n- `ttsr_triggered`\n- `todo_reminder`\n\n### Tool events (pre/post model)\n\n- `tool_call` (pre-execution) → can return `{ block?: boolean; reason?: string }`\n- `tool_result` (post-execution) → can return `{ content?; details?; isError? }`\n\nThis is the hook subsystem’s core pre/post interception model.\n\n```text\nHook tool interception flow\n\ntool_call handlers\n │\n ├─ any { block: true }? ── yes ──> throw (tool blocked)\n │\n └─ no\n │\n ▼\n execute underlying tool\n │\n ├─ success ──> tool_result handlers can override { content, details }\n │\n └─ error ──> emit tool_result(isError=true) then rethrow original error\n```\n\n## Execution model and mutation semantics\n\n### 1) Pre-execution: `tool_call`\n\n`HookToolWrapper.execute()` emits `tool_call` before tool execution.\n\n- if any handler returns `{ block: true }`, execution stops\n- if handler throws, wrapper fails closed and blocks execution\n- returned `reason` becomes the thrown error text\n\n### 2) Tool execution\n\nUnderlying tool executes normally if not blocked.\n\n### 3) Post-execution: `tool_result`\n\nAfter success, wrapper emits `tool_result` with:\n\n- `toolName`, `toolCallId`, `input`\n- `content`\n- `details`\n- `isError: false`\n\nIf handler returns overrides:\n\n- `content` can replace result content\n- `details` can replace result details\n\nOn tool failure, wrapper emits `tool_result` with `isError: true` and error text content, then rethrows original error.\n\n### What hooks can mutate\n\n- LLM context for a single call via `context` (`messages` replacement chain)\n- tool output content/details on successful tool calls (`tool_result` path)\n- pre-agent injected message via `before_agent_start`\n- cancellation/custom compaction/tree behavior via `session_before_*` and `session.compacting`\n\n### What hooks cannot mutate in this implementation\n\n- raw tool input parameters in-place (only block/allow on `tool_call`)\n- execution continuation after thrown tool errors (error path rethrows)\n- final success/error status in wrapper behavior (returned `isError` is typed but not applied by `HookToolWrapper`)\n\n## Ordering and conflict behavior\n\n### Discovery-level ordering\n\nCapability providers are priority-sorted (higher first). Dedupe is by capability key, first wins.\n\nFor `hooks`, capability key is `${type}:${tool}:${name}`. Shadowed duplicates from lower-priority providers are marked and excluded from effective discovered list.\n\n### Load order\n\n`discoverAndLoadHooks` builds a flat `allPaths` list, deduped by resolved absolute path, then `loadHooks` iterates in that order.\nFile order within each discovered directory depends on `readdir` output; the hook loader does not perform an additional sort.\n\n### Runtime handler order\n\nInside `HookRunner`, order is deterministic by registration sequence:\n\n1. hooks array order\n2. handler registration order per hook/event\n\nConflict behavior by event type:\n\n- `tool_call`: last returned result wins unless a handler blocks; first block short-circuits\n- `tool_result`: last returned override wins (no short-circuit)\n- `context`: chained; each handler receives prior handler’s message output\n- `before_agent_start`: first returned message is kept; later messages ignored\n- `session_before_*`: latest returned result is tracked; `cancel: true` short-circuits immediately\n- `session.compacting`: latest returned result wins\n\nCommand/renderer conflicts:\n\n- `getCommand(name)` returns first match across hooks (first loaded wins)\n- `getMessageRenderer(customType)` returns first match\n- `getRegisteredCommands()` returns all commands (no dedupe)\n\n## UI interactions (`HookContext.ui`)\n\n`HookUIContext` includes:\n\n- `select`, `confirm`, `input`, `editor`\n- `notify`\n- `setStatus`\n- `custom`\n- `setEditorText`, `getEditorText`\n- `theme` getter\n\n`ctx` includes `hasUI`, `cwd`, `sessionManager`, `modelRegistry`, current `model`, `isIdle()`, `abort()`, and `hasQueuedMessages()`.\n\nWhen running with no UI, the default no-op context behavior is:\n\n- `select/input/editor` return `undefined`\n- `confirm` returns `false`\n- `notify`, `setStatus`, `setEditorText` are no-ops\n- `getEditorText` returns `\"\"`\n\n### Status line behavior\n\nHook status text set via `ctx.ui.setStatus(key, text)` is:\n\n- stored per key\n- sorted by key name\n- sanitized (ANSI/VT escape sequences stripped; control characters mapped to spaces; repeated spaces collapsed; trimmed)\n- joined and width-truncated for display\n\n## Error propagation and fallback\n\n### Load-time\n\n- invalid module or missing default export → captured in `LoadHooksResult.errors`\n- loading continues for other hooks\n\n### Event-time\n\n`HookRunner.emit(...)` catches handler errors for most events and emits `HookError` to listeners (`hookPath`, `event`, `error`), then continues.\n\n`emitToolCall(...)` is stricter: handler errors are not swallowed there; they propagate to caller. In `HookToolWrapper`, this blocks the tool call (fail-safe).\n\n## Realistic API examples\n\n### Block unsafe bash commands\n\n```ts\nimport type { HookAPI } from \"@oh-my-pi/pi-coding-agent/extensibility/hooks\";\n\nexport default function (pi: HookAPI): void {\n pi.on(\"tool_call\", async (event, ctx) => {\n if (event.toolName !== \"bash\") return;\n const cmd = String(event.input.command ?? \"\");\n if (!cmd.includes(\"rm -rf\")) return;\n\n if (!ctx.hasUI) return { block: true, reason: \"rm -rf blocked (no UI)\" };\n const ok = await ctx.ui.confirm(\"Dangerous command\", `Allow: ${cmd}`);\n if (!ok) return { block: true, reason: \"user denied command\" };\n });\n}\n```\n\n### Redact tool output on post-execution\n\n```ts\nimport type { HookAPI } from \"@oh-my-pi/pi-coding-agent/extensibility/hooks\";\n\nexport default function (pi: HookAPI): void {\n pi.on(\"tool_result\", async (event) => {\n if (event.toolName !== \"read\" || event.isError) return;\n\n const redacted = event.content.map((chunk) => {\n if (chunk.type !== \"text\") return chunk;\n return {\n ...chunk,\n text: chunk.text.replaceAll(/API_KEY=\\S+/g, \"API_KEY=[REDACTED]\"),\n };\n });\n\n return { content: redacted };\n });\n}\n```\n\n### Modify model context per LLM call\n\n```ts\nimport type { HookAPI } from \"@oh-my-pi/pi-coding-agent/extensibility/hooks\";\n\nexport default function (pi: HookAPI): void {\n pi.on(\"context\", async (event) => {\n const filtered = event.messages.filter(\n (msg) => !(msg.role === \"custom\" && msg.customType === \"debug-only\"),\n );\n return { messages: filtered };\n });\n}\n```\n\n### Register slash command with command-safe context methods\n\n```ts\nimport type { HookAPI } from \"@oh-my-pi/pi-coding-agent/extensibility/hooks\";\n\nexport default function (pi: HookAPI): void {\n pi.registerCommand(\"handoff\", {\n description: \"Create a new session with setup message\",\n handler: async (_args, ctx) => {\n await ctx.waitForIdle();\n await ctx.newSession({\n parentSession: ctx.sessionManager.getSessionFile(),\n setup: async (sm) => {\n sm.appendMessage({\n role: \"user\",\n content: [\n { type: \"text\", text: \"Continue from prior session summary.\" },\n ],\n timestamp: Date.now(),\n });\n },\n });\n },\n });\n}\n```\n\n## Export surface\n\n`src/extensibility/hooks/index.ts` and the package subpath `@oh-my-pi/pi-coding-agent/extensibility/hooks` export:\n\n- loading APIs (`discoverAndLoadHooks`, `loadHooks`)\n- runner and wrapper (`HookRunner`, `HookToolWrapper`)\n- all hook types\n- `execCommand` re-export\n\nThe package root (`@oh-my-pi/pi-coding-agent`) does not re-export `HookAPI`; import legacy hook types from the hooks subpath.\n",
26
27
  "install-id.md": "# Install ID\n\nA persistent per-install UUID that identifies a single oh-my-pi installation across sessions. Used as a stable correlation key for server-side dedup of telemetry-style pushes (currently the auto-QA grievance flush from `report_tool_issue`).\n\n## API\n\nExported from `@oh-my-pi/pi-utils` (`packages/utils/src/dirs.ts`):\n\n| Symbol | Purpose |\n| --------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------- |\n| `getInstallId(): string` | Returns the install ID, generating and persisting one on first call. Result is cached in-process for the lifetime of the runtime. |\n| `__resetInstallIdCacheForTests(): void` | Clears the in-process cache. Test-only — MUST NOT be called from production code. |\n\nGenerated IDs are lowercase RFC 4122 UUIDs. Existing persisted values are accepted case-insensitively when they match `^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$` with the regex `i` flag, and are returned exactly as stored.\n\n## Storage\n\n- Path: `<base-config-root>/install-id` — i.e. `~/.omp/install-id` by default, respecting `PI_CONFIG_DIR`. Resolved against the base config root (`getBaseConfigRoot()`) regardless of the active profile, so every profile on a host shares one install ID (install identity is per-install, not per-profile).\n- Format: a single UUID line (trailing `\\n`).\n- Permissions: file is created with mode `0o600`.\n- Lifecycle: independent of `~/.omp/agent/`. Wiping agent state (sessions, settings, DB) does NOT regenerate the install ID; only deleting the `install-id` file itself does.\n\n## Generation and lifecycle\n\n1. First call to `getInstallId()` reads the file. If contents parse as a valid UUID, that value is cached and returned.\n2. Otherwise the helper calls `crypto.randomUUID()` (Node's CSPRNG-backed UUID v4) to mint a new ID.\n3. The new value is written via `open(O_WRONLY | O_CREAT | O_EXCL, 0o600)`. The exclusive-create guard means two processes hitting first-call simultaneously cannot both succeed — the loser sees `EEXIST`, re-reads the winner's file, and adopts that ID.\n4. If the existing file contained non-empty garbage (failed UUID regex), it is `unlink`ed before the exclusive create so `O_EXCL` does not trip on stale data.\n5. Any other write failure (read-only FS, permission error) is swallowed: the freshly generated UUID is still cached in-memory so the rest of the process sees a stable value, and subsequent process launches will retry persistence.\n6. Subsequent in-process calls return the cached value without touching disk. Mutating the file on disk after the first call has no effect until the process restarts (or tests call `__resetInstallIdCacheForTests`).\n\n## Consumers\n\n- `packages/coding-agent/src/tools/report-tool-issue.ts` — included as `installId` in the auto-QA grievance push body so the backend can deduplicate repeated reports from the same install. See `dev.autoqaPush.*` settings and `PI_AUTO_QA_PUSH_*` env vars.\n\nNew consumers MUST treat the value as opaque and MUST NOT derive PII from it; the helper does not mix in hostname, username, or any other host-identifying entropy.\n\n## See also\n\n- [environment-variables.md](environment-variables.md) — `PI_CONFIG_DIR` controls where `install-id` lives.\n- [config-usage.md](config-usage.md) — broader config-root layout.\n",
27
- "keybindings.md": "# Keybindings\n\nRun `/hotkeys` inside an `omp` session to see the active chords for your current build. The list reflects any remaps loaded from disk and any bindings added by extensions.\n\n## Customize keybindings\n\nUser remaps live in `~/.omp/agent/keybindings.yml`. The file is a YAML mapping whose keys are keybinding action IDs and whose values are either one chord string or an array of chord strings. It is not read from `~/.omp/agent/config.yml`, and there is no nested `keybindings` object.\n\n```yaml\napp.model.cycleForward: Ctrl+P\napp.model.selectTemporary: Alt+P\napp.plan.toggle: Alt+Shift+P\n```\n\nChord names are case-insensitive and use the same notation shown in the UI, such as `Ctrl+P`, `Alt+Shift+P`, `Shift+Enter`, and `Ctrl+Backspace`.\n\nSet an action to an empty array to disable it:\n\n```yaml\napp.history.search: []\n```\n\n## Common action IDs\n\n| Action ID | Default | Meaning |\n| --------------------------- | -------------------------------------- | --------------------------------------------- |\n| `app.model.cycleForward` | `Ctrl+P` | Cycle role models forward |\n| `app.model.cycleBackward` | `Shift+Ctrl+P` | Cycle role models backward |\n| `app.model.selectTemporary` | `Alt+P` | Pick a model temporarily for this session |\n| `app.model.select` | `Alt+M` | Open the model selector and set roles |\n| `app.plan.toggle` | `Alt+Shift+P` | Toggle plan mode |\n| `app.history.search` | `Ctrl+R` | Search prompt history |\n| `app.tools.expand` | `Ctrl+O` | Toggle tool-output expansion |\n| `app.thinking.toggle` | `Ctrl+T` | Toggle thinking-block visibility |\n| `app.thinking.cycle` | `Shift+Tab` | Cycle thinking level |\n| `app.editor.external` | `Ctrl+G` | Edit the draft in `$VISUAL` / `$EDITOR` |\n| `app.message.followUp` | `Ctrl+Q`, `Ctrl+Enter` | Queue a follow-up message |\n| `app.message.dequeue` | `Alt+Up` | Dequeue a queued message back into the editor |\n| `app.display.reset` | `Ctrl+L` | Reset terminal display |\n| `app.clipboard.copyLine` | `Alt+Shift+L` | Copy the current line |\n| `app.clipboard.copyPrompt` | `Alt+Shift+C` | Copy the whole prompt |\n| `app.clipboard.pasteImage` | `Ctrl+V` (`Alt+V` fallback on Windows) | Paste from the clipboard (image preferred, text fallback) |\n| `app.stt.toggle` | Unbound (hold `Space`) | Toggle speech-to-text. By default there is no key chord — hold the space bar to record (push-to-talk) and release to transcribe; bind a chord here for a press-to-toggle alternative |\n\nOn Windows Terminal, `Ctrl+V` may be handled by the terminal paste command before `omp` sees it; use the `Alt+V` fallback when clipboard image paste appears to do nothing. When the clipboard holds no image, `app.clipboard.pasteImage` pastes the clipboard text instead, so hosts that deliver only this chord (VS Code's integrated terminal when configured to forward `Ctrl+V`, Windows clipboard history via `Win+V`) work for both payload kinds. Windows Terminal also swallows `Ctrl+Enter`, so the follow-up shortcut also binds `Ctrl+Q` — the same chord GitHub Copilot CLI uses. If your existing `keybindings.yml` already assigns `Ctrl+Q` to another action, that user remap wins and follow-up keeps `Ctrl+Enter` unless you explicitly bind `app.message.followUp`.\n\nTerminals that implement OSC 5522 enhanced paste can send clipboard MIME data directly to `omp`; image pastes are attached as `[Image #N]`, while text/plain paste events keep normal paste behavior. When OSC 5522 is unavailable, bracketed paste still handles text, and a pasted single image-file path is loaded as an image when the file is readable from the `omp` host.\n\nOlder unqualified action names are migrated when `keybindings.yml` is loaded, but new docs and new configs should use the namespaced action IDs above. Existing `keybindings.json` files are still accepted and migrated to `keybindings.yml`; `keybindings.yaml` is also accepted.\n",
28
+ "keybindings.md": "# Keybindings\n\nRun `/hotkeys` inside an `omp` session to see the active chords for your current build. The list reflects any remaps loaded from disk and any bindings added by extensions.\n\n## Customize keybindings\n\nUser remaps live in `~/.omp/agent/keybindings.yml`. The file is a YAML mapping whose keys are keybinding action IDs and whose values are either one chord string or an array of chord strings. It is not read from `~/.omp/agent/config.yml`, and there is no nested `keybindings` object.\n\n```yaml\napp.model.cycleForward: Ctrl+P\napp.model.selectTemporary: Alt+P\napp.plan.toggle: Alt+Shift+P\n```\n\nChord names are case-insensitive and use the same notation shown in the UI, such as `Ctrl+P`, `Alt+Shift+P`, `Shift+Enter`, and `Ctrl+Backspace`.\n\nSet an action to an empty array to disable it:\n\n```yaml\napp.history.search: []\n```\n\n## Common action IDs\n\n| Action ID | Default | Meaning |\n| --------------------------- | -------------------------------------- | --------------------------------------------- |\n| `app.model.cycleForward` | `Ctrl+P` | Cycle role models forward |\n| `app.model.cycleBackward` | `Shift+Ctrl+P` | Cycle role models backward |\n| `app.model.selectTemporary` | `Alt+P` | Pick a model temporarily for this session |\n| `app.model.select` | `Alt+M` | Open the model selector and set roles |\n| `app.plan.toggle` | `Alt+Shift+P` | Toggle plan mode |\n| `app.history.search` | `Ctrl+R` | Search prompt history |\n| `app.tools.expand` | `Ctrl+O` | Toggle tool-output expansion |\n| `app.thinking.toggle` | `Ctrl+T` | Toggle thinking-block visibility |\n| `app.thinking.cycle` | `Shift+Tab` | Cycle thinking level |\n| `app.editor.external` | `Ctrl+G` | Edit the draft in `$VISUAL` / `$EDITOR` |\n| `app.message.followUp` | `Ctrl+Q`, `Ctrl+Enter` | Queue a follow-up message |\n| `app.message.dequeue` | `Alt+Up` | Dequeue a queued message back into the editor |\n| `app.retry` | `Alt+R` | Retry the last failed assistant turn |\n| `app.display.reset` | `Ctrl+L` | Reset terminal display |\n| `app.clipboard.copyLine` | `Alt+Shift+L` | Copy the current line |\n| `app.clipboard.copyPrompt` | `Alt+Shift+C` | Copy the whole prompt |\n| `app.clipboard.pasteImage` | `Ctrl+V` (`Alt+V` fallback on Windows) | Paste from the clipboard (image preferred, text fallback) |\n| `app.stt.toggle` | Unbound (hold `Space`) | Toggle speech-to-text. By default there is no key chord — hold the space bar to record (push-to-talk) and release to transcribe; bind a chord here for a press-to-toggle alternative |\n\nOn Windows Terminal, `Ctrl+V` may be handled by the terminal paste command before `omp` sees it; use the `Alt+V` fallback when clipboard image paste appears to do nothing. When the clipboard holds no image, `app.clipboard.pasteImage` pastes the clipboard text instead, so hosts that deliver only this chord (VS Code's integrated terminal when configured to forward `Ctrl+V`, Windows clipboard history via `Win+V`) work for both payload kinds. Windows Terminal also swallows `Ctrl+Enter`, so the follow-up shortcut also binds `Ctrl+Q` — the same chord GitHub Copilot CLI uses. If your existing `keybindings.yml` already assigns `Ctrl+Q` to another action, that user remap wins and follow-up keeps `Ctrl+Enter` unless you explicitly bind `app.message.followUp`.\n\nTerminals that implement OSC 5522 enhanced paste can send clipboard MIME data directly to `omp`; image pastes are attached as `[Image #N]`, while text/plain paste events keep normal paste behavior. When OSC 5522 is unavailable, bracketed paste still handles text, and a pasted single image-file path is loaded as an image when the file is readable from the `omp` host.\n\nOlder unqualified action names are migrated when `keybindings.yml` is loaded, but new docs and new configs should use the namespaced action IDs above. Existing `keybindings.json` files are still accepted and migrated to `keybindings.yml`; `keybindings.yaml` is also accepted.\n",
28
29
  "local-models.md": "# Embedded Local Tiny-Model Experiments\n\nThis document summarizes the experiments behind the optional **local** tiny-model paths for\nsession-title generation (`providers.tinyModel`), Mnemopi memory extraction/consolidation\n(`providers.memoryModel`), and the `auto` thinking-level difficulty classifier\n(`providers.autoThinkingModel`, which reuses the memory-model registry). It is a factual engineering\nrecord for maintainers: what we measured, which recipes won, and which models we shipped. All three\nsettings default to `online`, so existing users incur no downloads or on-device inference cost unless\nthey opt in.\n\n## Runtime / environment findings\n\n- **Stack**: `@huggingface/transformers` (transformers.js) v4 running under Bun. In Bun the library\n loads the **native `onnxruntime-node` backend** (not the WASM build).\n- **Device policy**: local tiny models default to CPU-only inference and retry once on CPU if an\n explicit accelerated provider cannot initialize.\n - Pick a provider persistently with the `providers.tinyModelDevice` setting (`default` keeps CPU),\n or per-run with the `PI_TINY_DEVICE` env var (which overrides the setting).\n - Accepted values are `cpu`, `gpu`, `metal`/`webgpu`, `auto`, `cuda`, `dml`, `coreml`, `wasm`,\n `webnn`, `webnn-gpu`, `webnn-cpu`, and `webnn-npu`.\n - Direct `coreml` remains opt-in via `PI_TINY_DEVICE=coreml`; it is not part of the default because\n cached decoder-LLM ONNX loads can fail during session initialization.\n - WebGPU/Metal works for the single-process eval harness, but the production worker forces\n Darwin `gpu`/`webgpu`/`auto` requests back to CPU because ONNX Runtime/Bun currently\n hard-crashes on worker teardown after WebGPU inference.\n - Use `providers.tinyModelDevice` or `PI_TINY_DEVICE` only when explicitly opting out of the CPU\n default.\n- **Quantization: q4 is the sweet spot** — smaller on disk, faster to load, and fast at inference.\n q8/int8 loads slower _and_ infers slower on CPU. Every shipped model defaults to `q4`; override the\n precision persistently with the `providers.tinyModelDtype` setting (`default` keeps `q4`, e.g. `fp16`\n for higher fidelity), or per-run with `PI_TINY_DTYPE` (which overrides the setting). Accepts `auto`,\n `fp32`, `fp16`, `q8`, `int8`, `uint8`, `q4`, `bnb4`, `q4f16`, `q2`, `q2f16`, `q1`, `q1f16`; an\n unrecognized value fails loudly at worker startup.\n- **Load-time correction (important).** An earlier belief that \"q4 >=1B models take minutes to load\"\n was a **measurement artifact** caused by running ~5 multi-GB HuggingFace downloads in parallel\n (I/O saturation). Clean, isolated **warm** loads are all sub-3s:\n - TinyLlama-1.1B q4: ~0.5s\n - Llama-3.2-1B q4: ~2.8s (`graphOpt=all`) / ~0.5s (`disabled`)\n - LFM2-1.2B q4: ~0.36s\n - Qwen2.5-1.5B q4: ~1.5s\n - Qwen3-1.7B q4: ~1.6s\n - gemma-3-1b q4: ~1.1s\n - Conclusion: **1B–1.7B models are viable on CPU.**\n- **`session_options.graphOptimizationLevel`** trades load vs inference speed: `disabled` = fastest\n load, slightly slower inference; `all` = default.\n- **First run** downloads weights from the HF Hub to a cache dir (q4 weights ~200MB–1.1GB depending\n on model); subsequent **warm** loads are sub-second to ~3s. Inference is async and\n background-friendly for memory tasks; titles are semi-interactive.\n\n## Task 1: Session title generation (`providers.tinyModel`)\n\n**Task**: turn the first user message into a 3–6 word title. Tiny models (sub-1B) suffice.\n\n**Winning recipe**:\n\n- Plain system prompt (no few-shot).\n- **Prefill** the assistant turn with `<title>` and **stop at `</title>`**, then take the first line.\n- Greedy decoding (`do_sample:false`), `enable_thinking:false` in the chat template.\n\n**What we learned**:\n\n- **Few-shot examples HURT sub-0.6B models** for titles; the tag-prefill rescues even 270M models.\n- **Token biasing (`bad_words_ids`) is a confirmed no-op** here — the prefill already controls the\n opener.\n\n**Leaderboard** (tag trick, CPU, warm):\n\n| Model | Verdict |\n| ------------- | ----------------------------------- |\n| LFM2-350M | Best speed/quality balance (~212MB) |\n| Qwen3-0.6B | Most robust |\n| gemma-3-270m | Smallest viable |\n| Qwen2.5-0.5B | Acceptable |\n| SmolLM2-135M | Too small |\n| flan-t5-small | Rejected — just echoes the input |\n\n**Shipped local options**: `lfm2-350m`, `qwen3-0.6b`, `gemma-270m`, `qwen2.5-0.5b`, `lfm2-700m`.\n**Default**: `online` (pi/smol).\n\n## Task 2: Mnemopi memory (`providers.memoryModel`)\n\nMnemopi runs two small-LLM tasks:\n\n1. **Extraction** — pull durable, structured items from a single message.\n2. **Consolidation** — summarize a list of memories into 1–3 faithful sentences.\n\nThese need **bigger models than titles: 1B–1.7B**. We tested LFM2-1.2B, Qwen2.5-1.5B, Qwen3-1.7B,\nand gemma-3-1b (q4, CPU) via four parallel agents each running 27–31 experiments.\n\n### Extraction findings\n\nThe stock 5-category JSON prompt fails on small models in two ways:\n\n1. The all-empty example `{\"facts\":[],...}` gets **copied verbatim** → 0 facts extracted.\n2. Capable models emit **JSON objects inside arrays**, which Mnemopi's `String(item)` coerces into\n the literal string `[object Object]`.\n\nThe robust fix is a **one-item-per-line output format** (consumed by Mnemopi's parser line-fallback)\nor a **flat JSON array of strings**. Every model also over-extracts pure small talk; an explicit\nchit-chat → NONE example is the best mitigation.\n\n### Technique polarity flips vs titles\n\n- At 1B+, **few-shot is the dominant quality lever**: e.g. Qwen2.5-1.5B extraction F1 0.52 → 0.83\n going 1 → 3 shots; gemma recall 0.65 → 0.92 with 2 shots.\n- **Prefill HURTS extraction** — it forces output on small talk, producing false positives.\n- **System-split** (instructions in the system role) helps models that have a system role.\n- **Greedy >= temperature** for both tasks.\n- **Token biasing** is again a no-op.\n\n### Per-model verdicts (head-to-head, 16-fixture set)\n\n- **Qwen3-1.7B** — most disciplined extraction: returns empty on small talk, no buried-fact leak,\n preserves language, clean flat JSON. Weaknesses: coarse granularity, missed a multi-turn value\n update.\n- **Qwen2.5-1.5B** — best extraction granularity (atomic facts), caught the value update, zero\n small-talk leakage. Weaknesses: weakest consolidation (run-on, no dedup) and one degenerate\n buried-fact output.\n- **gemma-3-1b** — best consolidation (dedup works, faithful, clean single-memory). Weaknesses: leaks\n small talk and translated German.\n- **LFM2-1.2B** — solid and fastest to load. Weaknesses: `Label: value` noise, small-talk + buried\n leaks, a fluffy single-memory summary.\n\n### Recommendation\n\nExtraction favors **precision** (do not pollute long-term memory) → **Qwen3-1.7B is the best single\npick** (its consolidation is good enough). If running a second model for consolidation, **gemma-3-1b**\nwins that task.\n\n**Shipped local options**: `qwen3-1.7b` (recommended), `gemma-3-1b`, `qwen2.5-1.5b`, `lfm2-1.2b`.\n**Default**: `online` (the configured smol model).\n\n### Known Mnemopi parser bugs (surfaced by these experiments)\n\n- `String(item)` produces `[object Object]` on object array items.\n- The line-fallback drops items `<=10` chars, so a correct short fact like `Name: Can` is discarded.\n\n\n## Integration notes\n\n- `providers.tinyModel`, `providers.memoryModel`, and `providers.autoThinkingModel` default to\n `online`, so existing users get **no downloads or on-device inference cost** unless they opt in.\n- Local inference runs **in a worker** (off the main thread); models are cached on disk and\n downloaded on first use.\n- The memory local path applies the refined recipes (line-format + small-talk-guarded extraction\n prompt, hardened consolidation prompt) via Mnemopi prompt overrides; the **online path is\n unchanged**.\n- `providers.autoThinkingModel` uses the same shipped local options as `providers.memoryModel`.\n",
29
30
  "lsp-config.md": "# LSP configuration in OMP\n\nThis guide explains how to configure language servers for the OMP coding agent.\n\nSource of truth in code:\n\n- Server config type: `packages/coding-agent/src/lsp/types.ts` (`ServerConfig`)\n- Config loader: `packages/coding-agent/src/lsp/config.ts`\n- Built-in server definitions: `packages/coding-agent/src/lsp/defaults.json`\n\n## Auto-detection\n\nWhen no LSP config file is present, OMP auto-detects servers by intersecting two conditions:\n\n1. The project directory contains at least one of the server's `rootMarkers`.\n2. The server binary is available — checked in project-local bin directories first (e.g., `node_modules/.bin/`, `.venv/bin/`), then `$PATH`.\n\nNo configuration is required for common setups. The built-in server list covers most popular languages; see [`defaults.json`](../packages/coding-agent/src/lsp/defaults.json) for the full set.\n\n## Config file locations\n\nOMP merges LSP config from multiple files, lowest to highest priority:\n\n| Priority | Location |\n| ----------- | --------------------------------------------------------------------------------------------------------------------------- |\n| 5 (lowest) | `~/lsp.json`, `~/.lsp.json`, `~/lsp.yaml`, `~/.lsp.yaml`, `~/lsp.yml`, `~/.lsp.yml` |\n| 4 | Plugin LSP configs (marketplace / `--plugin-dir` roots) |\n| 3 | User config dirs: `~/.omp/agent/lsp.*`, `~/.claude/lsp.*`, `~/.codex/lsp.*`, `~/.gemini/lsp.*` |\n| 2 | Project config dirs: `<project>/.omp/lsp.*`, `<project>/.claude/lsp.*`, `<project>/.codex/lsp.*`, `<project>/.gemini/lsp.*` |\n| 1 (highest) | Project root: `<project>/lsp.*` and `<project>/.lsp.*` |\n\nEach location accepts `.json`, `.yaml`, and `.yml` variants, including hidden-file versions (`.lsp.json`, `.lsp.yaml`, `.lsp.yml`). Files are merged in order: higher-priority files override lower-priority fields for the same server. Servers not mentioned in any override file remain at their built-in defaults.\n\n**Recommended locations:**\n\n- User-wide preferences → `~/.omp/agent/lsp.json`\n- Project-specific overrides → `<project>/.omp/lsp.json`\n\n> **Note:** Auto-detection is skipped only when at least one config file contributes server overrides. A config file that only sets `idleTimeoutMs` still lets OMP auto-detect built-in servers. When server overrides exist, OMP merges them with defaults and then loads servers that have matching `rootMarkers`, an available binary, and are not explicitly `disabled`.\n\n## File shape\n\nBoth JSON and YAML are accepted. The top-level object can use either a `servers` wrapper key or a flat map directly:\n\n```json\n{\n \"servers\": {\n \"server-name\": { ... }\n },\n \"idleTimeoutMs\": 300000\n}\n```\n\nor (flat, without the `servers` wrapper):\n\n```json\n{\n \"server-name\": { ... },\n \"idleTimeoutMs\": 300000\n}\n```\n\nTop-level keys:\n\n- `servers` — map of server name to `ServerConfig` (optional wrapper; flat form is equivalent)\n- `idleTimeoutMs` — shut down idle language servers after this many milliseconds; disabled by default\n\n## ServerConfig fields\n\n| Field | Type | Required | Description |\n| ----------------- | ---------- | -------- | ---------------------------------------------------------------------------------------------------------------- |\n| `command` | `string` | yes | Binary name (resolved via PATH/local bins) or absolute path |\n| `args` | `string[]` | no | Arguments passed to the binary |\n| `fileTypes` | `string[]` | yes | File extensions this server handles, e.g. `[\".ts\", \".tsx\"]` |\n| `rootMarkers` | `string[]` | yes | Files/dirs that indicate a project root; glob patterns (e.g. `*.cabal`) are supported |\n| `initOptions` | `object` | no | Sent as `initializationOptions` during LSP handshake |\n| `settings` | `object` | no | Workspace settings pushed via `workspace/didChangeConfiguration` |\n| `disabled` | `boolean` | no | Set to `true` to disable this server entirely |\n| `warmupTimeoutMs` | `number` | no | Startup timeout in ms for this server (overrides the global default) |\n| `isLinter` | `boolean` | no | Mark server as linter/formatter only; excluded from type-intelligence operations (hover, go-to-definition, etc.) |\n| `capabilities` | `object` | no | Opt-in server-specific features; see [Capabilities](#capabilities) |\n\n`resolvedCommand` is populated automatically at runtime — do not set it manually.\n\n### Capabilities\n\nThe `capabilities` object enables optional server-specific features that OMP supports on a per-server basis:\n\n```json\n{\n \"capabilities\": {\n \"flycheck\": true,\n \"ssr\": true,\n \"expandMacro\": true,\n \"runnables\": true,\n \"relatedTests\": true\n }\n}\n```\n\nAll fields are boolean and optional. They are currently used by `rust-analyzer`.\n\n## Common recipes\n\n### Override a built-in server's settings\n\nPartial overrides are merged onto the built-in defaults. You only need to specify the fields you want to change.\n\n```json\n{\n \"servers\": {\n \"typescript-language-server\": {\n \"args\": [\"--stdio\", \"--log-level\", \"4\"]\n }\n }\n}\n```\n\n```yaml\nservers:\n gopls:\n settings:\n gopls:\n gofumpt: false\n staticcheck: false\n```\n\n### Disable a built-in server\n\n```json\n{\n \"servers\": {\n \"eslint\": {\n \"disabled\": true\n }\n }\n}\n```\n\n### Register a custom server\n\nNew servers require `command`, `fileTypes`, and `rootMarkers`. All other fields are optional.\n\n```json\n{\n \"servers\": {\n \"my-lsp\": {\n \"command\": \"my-lsp-server\",\n \"args\": [\"--stdio\"],\n \"fileTypes\": [\".xyz\"],\n \"rootMarkers\": [\".xyz-project\", \".git\"]\n }\n }\n}\n```\n\n### Set a global idle timeout\n\nShut down language servers that have been inactive for more than five minutes:\n\n```json\n{\n \"idleTimeoutMs\": 300000\n}\n```\n\n### Disable a server for one project, keep it globally\n\nPlace the override in `<project>/.omp/lsp.json`:\n\n```json\n{\n \"servers\": {\n \"pylsp\": {\n \"disabled\": true\n }\n }\n}\n```\n\nThe user-level config in `~/.omp/agent/lsp.json` is unaffected; pylsp is only suppressed in this project.\n\n## Built-in server list\n\nThe following servers ship in `defaults.json` and are eligible for auto-detection:\n\n| Server key | Language(s) | Binary |\n| ----------------------------- | ----------------------------- | --------------------------------- |\n| `rust-analyzer` | Rust | `rust-analyzer` |\n| `clangd` | C, C++, ObjC | `clangd` |\n| `zls` | Zig | `zls` |\n| `gopls` | Go | `gopls` |\n| `typescript-language-server` | TypeScript, JavaScript | `typescript-language-server` |\n| `denols` | TypeScript, JavaScript (Deno) | `deno` |\n| `biome` | TS/JS/JSON (linter) | `biome` |\n| `eslint` | TS/JS/Vue/Svelte (linter) | `vscode-eslint-language-server` |\n| `vscode-html-language-server` | HTML | `vscode-html-language-server` |\n| `vscode-css-language-server` | CSS, SCSS, Less | `vscode-css-language-server` |\n| `vscode-json-language-server` | JSON | `vscode-json-language-server` |\n| `tailwindcss` | HTML, CSS, TS/JS | `tailwindcss-language-server` |\n| `svelte` | Svelte | `svelteserver` |\n| `vue-language-server` | Vue | `vue-language-server` |\n| `astro` | Astro | `astro-ls` |\n| `pyright` | Python | `pyright-langserver` |\n| `basedpyright` | Python | `basedpyright-langserver` |\n| `pylsp` | Python | `pylsp` |\n| `ruff` | Python (linter) | `ruff` |\n| `jdtls` | Java | `jdtls` |\n| `kotlin-lsp` | Kotlin | `kotlin-lsp` |\n| `metals` | Scala | `metals` |\n| `hls` | Haskell | `haskell-language-server-wrapper` |\n| `ocamllsp` | OCaml | `ocamllsp` |\n| `elixirls` | Elixir | `elixir-ls` |\n| `expert` | Elixir | `expert` |\n| `erlangls` | Erlang | `erlang_ls` |\n| `gleam` | Gleam | `gleam` |\n| `solargraph` | Ruby | `solargraph` |\n| `ruby-lsp` | Ruby | `ruby-lsp` |\n| `rubocop` | Ruby (linter) | `rubocop` |\n| `bashls` | Bash, Zsh | `bash-language-server` |\n| `lua-language-server` | Lua | `lua-language-server` |\n| `intelephense` | PHP | `intelephense` |\n| `phpactor` | PHP | `phpactor` |\n| `omnisharp` | C# | `omnisharp` |\n| `yamlls` | YAML | `yaml-language-server` |\n| `terraformls` | Terraform | `terraform-ls` |\n| `dockerls` | Dockerfile | `docker-langserver` |\n| `helm-ls` | Helm | `helm_ls` |\n| `nixd` | Nix | `nixd` |\n| `nil` | Nix | `nil` |\n| `ols` | Odin | `ols` |\n| `dartls` | Dart | `dart` |\n| `marksman` | Markdown | `marksman` |\n| `texlab` | LaTeX | `texlab` |\n| `graphql` | GraphQL | `graphql-lsp` |\n| `prismals` | Prisma | `prisma-language-server` |\n| `vimls` | Vim script | `vim-language-server` |\n| `emmet-language-server` | HTML, CSS, JSX | `emmet-language-server` |\n| `sourcekit-lsp` | Swift | `sourcekit-lsp` |\n| `swiftlint` | Swift (linter) | `swiftlint` |\n| `tlaplus` | TLA+ | `tlapm_lsp` |\n",
30
31
  "macos-signing-notarization.md": "# macOS signing & notarization\n\nThe compiled macOS `omp` binaries shipped on GitHub Releases are signed with a\n**Developer ID Application** certificate and **notarized** by Apple. This makes\nthem Gatekeeper-acceptable and is the prerequisite for an official Homebrew\nsubmission (see [#776](https://github.com/can1357/oh-my-pi/issues/776)).\n\nSigning happens in CI, in the `release_binary` job's darwin matrix legs\n(`.github/workflows/ci.yml`), via `scripts/ci-macos-sign.sh`. It **auto-skips**\nuntil the `APPLE_*` repository secrets below are configured, so releases keep\nworking (ad-hoc signed, as before) in the meantime.\n\n## How it works\n\n1. `ci:release:build-binaries` builds and **ad-hoc** signs the binary (so it can\n run on the build runner).\n2. `scripts/ci-macos-sign.sh` then:\n - imports the Developer ID cert into a throwaway keychain;\n - re-signs with `--options runtime --timestamp` (hardened runtime + secure\n timestamp) and `--entitlements scripts/macos-entitlements.plist`;\n - runs `--version` and `--smoke-test` under the new signature to fail fast;\n - notarizes the binary via `notarytool submit --wait`.\n3. `release_github_verify` re-downloads the published arm64 asset and asserts it\n is **not** ad-hoc, passes `codesign --verify --strict`, and boots cleanly.\n\n### Why the entitlements are mandatory\n\nThe binary is a Bun single-file executable, so the hardened runtime needs:\n\n| Entitlement | Reason |\n| --- | --- |\n| `com.apple.security.cs.allow-jit` | JavaScriptCore JITs at runtime. |\n| `com.apple.security.cs.allow-unsigned-executable-memory` | JSC executable memory pages. |\n| `com.apple.security.cs.disable-library-validation` | omp extracts its native addon (`pi_natives.<triple>.node`) and other optional dylibs to a runtime cache and `dlopen()`s them. They do not share the main binary's Team ID, so without this the hardened runtime aborts with *\"mapping process and mapped file have different Team IDs\"* — breaking effectively every command. |\n\nWithout `disable-library-validation`, a signed+notarized binary signs and\nnotarizes fine but **fails at first real use**. `scripts/ci-macos-sign.sh` runs\n`--smoke-test` after signing specifically to catch this before notarizing.\n\n### Stapling limitation (important)\n\nA bare Mach-O executable **cannot be stapled** (`stapler` only supports\n`.app`/`.pkg`/`.dmg`). The binary is genuinely notarized — `notarytool` returns\n`Accepted` and the ticket exists on Apple's servers keyed to its cdhash — but\nbecause there is no *stapled* ticket, a direct `spctl -a -t exec` assessment\nreports `rejected / source=Unnotarized Developer ID`. This is expected and is\n**not** a signing or credential failure.\n\nWhat this means in practice:\n\n- `curl https://omp.sh/install | sh` — `curl` sets no quarantine bit, so\n Gatekeeper is never consulted; the binary just runs. ✅\n- Homebrew **formula** installs — Homebrew does not quarantine formula files, so\n Gatekeeper is never consulted. ✅\n- Anything that **quarantines** the binary (a browser download, or a Homebrew\n **cask**) and is assessed offline will be blocked, because there is no stapled\n ticket. For that route, wrap the binary in a stapleable, notarized **`.pkg` or\n `.dmg`** (`xcrun stapler staple` works on those). That is a follow-up and is\n **not** required for the `curl`/formula paths.\n\n## Required GitHub secrets\n\nAdd these under **Settings → Secrets and variables → Actions** (repo secrets).\nAll five secrets (cert, password, and API key trio) must be present for\nsigning to engage.\n\n| Secret | What it is |\n| --- | --- |\n| `APPLE_CERTIFICATE_P12` | base64 of the exported Developer ID Application `.p12` (cert + private key). |\n| `APPLE_CERTIFICATE_PASSWORD` | password you set when exporting the `.p12`. |\n| `APPLE_API_KEY_ID` | App Store Connect API **Key ID**. |\n| `APPLE_API_ISSUER_ID` | App Store Connect API **Issuer ID** (UUID). |\n| `APPLE_API_KEY` | base64 of the App Store Connect `.p8` private key. |\n\n### Producing the credential files\n\nDrop these into a working directory (default `~/omp-signing`):\n\n| File | How |\n| --- | --- |\n| `*.p12` | **Keychain Access** → right-click your *Developer ID Application: …* identity (the entry that expands to a cert **with** a private key) → **Export…** → save as `.p12` and set a password. |\n| `p12-password.txt` | the password you just set on the `.p12`. |\n| `AuthKey_<KEYID>.p8` | App Store Connect → **Users and Access → Integrations → App Store Connect API** → create a key (**Account Holder** role also allows API cert creation; **Developer** is enough for notarization) → **download once** (non-recoverable). |\n| `issuer-id.txt` | the **Issuer ID** (UUID) shown above the keys table. |\n| `key-id.txt` | *optional* — the Key ID; otherwise read from the `.p8` filename. |\n\nThe App Store Connect API key is the one credential that **cannot** be minted\nfrom a CLI — it is the bootstrap credential for the API itself, and the `.p8`\ndownloads exactly once. Everything else is local.\n\n### Uploading (no value leaves disk)\n\n`scripts/ci-macos-upload-secrets.sh` validates the files (opens the `.p12` with\nyour password, sanity-checks the `.p8`) and pipes each value to `gh secret set`\nover stdin — no secret is ever printed to the terminal, argv, or shell history:\n\n```sh\nscripts/ci-macos-upload-secrets.sh ~/omp-signing --dry-run # validate first\nscripts/ci-macos-upload-secrets.sh ~/omp-signing # upload all five\ngh secret list --repo can1357/oh-my-pi # confirm\n```\n\nRe-run it whenever the certificate is renewed.\n\n### Finding your signing identity / Team ID (sanity check)\n\n```sh\nsecurity find-identity -v -p codesigning\n# e.g. \"Developer ID Application: Your Name (TEAMID1234)\"\n```\n\nThe script selects the first `Developer ID Application` identity automatically;\nyou do not need to store the identity string or Team ID as a secret.\n\n## Local dry run\n\nYou can exercise the full sign+notarize path locally (real cert + API key) by\nexporting the five env vars and running:\n\n```sh\nRELEASE_TARGETS=darwin-arm64 bun run ci:release:build-binaries\nAPPLE_CERTIFICATE_P12=… APPLE_CERTIFICATE_PASSWORD=… \\\nAPPLE_API_KEY_ID=… APPLE_API_ISSUER_ID=… APPLE_API_KEY=… \\\n bash scripts/ci-macos-sign.sh packages/coding-agent/binaries/omp-darwin-arm64\n```\n",
@@ -35,7 +36,7 @@ export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
35
36
  "mcp-server-tool-authoring.md": "# MCP server and tool authoring\n\nThis document explains how MCP server definitions become callable `mcp__*` tools in coding-agent, and what operators should expect when configs are invalid, duplicated, disabled, or auth-gated.\n\n## Architecture at a glance\n\n```text\nConfig sources (.omp/.claude/.cursor/.vscode/mcp.json, mcp.json, etc.)\n -> discovery providers normalize to canonical MCPServer\n -> capability loader dedupes by server name (higher provider priority wins)\n -> loadAllMCPConfigs converts to MCPServerConfig + skips enabled:false\n -> MCPManager connects/listTools (with auth/header/env resolution)\n -> manager best-effort loads resources/prompts and subscribes to resource updates when enabled\n -> MCPTool/DeferredMCPTool bridge exposes tools as mcp__<server>_<tool>\n -> AgentSession.refreshMCPTools replaces live MCP tools immediately\n```\n\n## 1) Server config model and validation\n\n`src/mcp/types.ts` defines the authoring shape used by MCP config writers and runtime:\n\n- `stdio` (default when `type` missing): requires `command`, optional `args`, `env`, `cwd`\n- `http`: requires `url`, optional `headers`\n- `sse`: requires `url`, optional `headers` (kept for compatibility)\n- shared fields: `enabled`, `timeout`, `auth`, `oauth`\n\n`validateServerConfig()` (`src/mcp/config.ts`) enforces transport basics:\n\n- rejects configs that set both `command` and `url`\n- requires `command` for stdio\n- requires `url` for http/sse\n- rejects unknown `type`\n\n`config-writer.ts` applies this validation for add/update operations and also validates server names:\n\n- non-empty\n- max 100 chars\n- only `[a-zA-Z0-9_.:-]` (colon allows namespaced plugin server names, e.g. `cloudflare:cloudflare-api`)\n\n### Transport pitfalls\n\n- `type` omitted means stdio. If you intended HTTP/SSE but omitted `type`, `command` becomes mandatory.\n- `sse` is still accepted but treated as HTTP transport internally (`createHttpTransport`).\n- Validation is structural, not reachability: a syntactically valid URL can still fail at connect time.\n\n## 2) Discovery, normalization, and precedence\n\n### Capability-based discovery\n\n`loadAllMCPConfigs()` (`src/mcp/config.ts`) loads canonical `MCPServer` items via `loadCapability(mcpCapability.id)`.\n\nThe capability layer (`src/capability/index.ts`) then:\n\n1. loads providers in priority order\n2. dedupes by `server.name` (first win = highest priority)\n3. validates deduped items\n\nResult: duplicate server names across sources are not merged. One definition wins; lower-priority duplicates are shadowed.\n\n### `.mcp.json` and related files\n\nThe dedicated fallback provider in `src/discovery/mcp-json.ts` reads project-root `mcp.json` and `.mcp.json` (low priority).\n\nIn practice MCP servers also come from higher-priority providers (for example native `.omp/...` and tool-specific config dirs). Authoring guidance:\n\n- Prefer `.omp/mcp.json` (project) or `~/.omp/agent/mcp.json` (user) for explicit control.\n- Use root `mcp.json` / `.mcp.json` when you need fallback compatibility.\n- Reusing the same server name in multiple sources causes precedence shadowing, not merge.\n\n### Normalization behavior\n\n`convertToLegacyConfig()` (`src/mcp/config.ts`) maps canonical `MCPServer` to runtime `MCPServerConfig`.\n\nKey behavior:\n\n- transport inferred as `server.transport ?? (command ? \"stdio\" : url ? \"http\" : \"stdio\")`\n- disabled servers (`enabled === false`) and names in the user `disabledServers` list are dropped before connection\n- optional fields are preserved when present\n\n### Environment expansion during discovery\n\nOMP-native MCP config (`.omp/mcp.json`, `~/.omp/agent/mcp.json`, plus their `.mcp.json` variants) expands `${VAR}` and `${VAR:-default}` placeholders recursively before converting to runtime config. It also accepts boolean/string forms for `enabled` (`true`, `false`, `1`, `0`) and numeric strings for `timeout`.\n\nThe standalone fallback provider in `src/discovery/mcp-json.ts` reads project-root `mcp.json` and `.mcp.json`, expands the same `${...}` placeholders, and type-checks `enabled`/`timeout` without coercing string values.\n\nInvalid `enabled`/`timeout` values are ignored with warnings rather than failing the whole file.\n\n## 3) Auth and runtime value resolution\n\n`MCPManager.prepareConfig()`/`#resolveAuthConfig()` (`src/mcp/manager.ts`) is the final pre-connect pass.\n\n### OAuth credential injection\n\nIf config has:\n\n```ts\nauth: { type: \"oauth\", credentialId: \"...\" }\n```\n\nand credential exists in auth storage:\n\n- `http`/`sse`: injects `Authorization: Bearer <access_token>` header\n- `stdio`: injects `OAUTH_ACCESS_TOKEN` env var\n\nIf credential lookup fails, manager logs a warning and continues with unresolved auth.\n\n### Header/env value resolution\n\nBefore connect, manager resolves stdio `env` values and HTTP/SSE `headers` values via `resolveConfigValue()` (`src/config/resolve-config-value.ts`):\n\n- value starting with `!` => execute shell command, use trimmed stdout (cached)\n- failed, timed-out, or whitespace-only commands produce `undefined`, so that entry is omitted\n- otherwise, treat value as environment variable name first (`process.env[name]`), fallback to literal value\n\nOperational caveat: a mistyped `!` secret command can silently remove that header/env entry, producing downstream 401/403 or server startup failures. A mistyped environment variable name is sent literally unless that literal happens to be meaningful to the server.\n\n## 4) Tool bridge: MCP -> agent-callable tools\n\n`src/mcp/tool-bridge.ts` converts MCP tool definitions into `CustomTool`s.\n\n### Naming and collision domain\n\nTool names are generated as:\n\n```text\nmcp__<sanitized_server_name>_<sanitized_tool_name>\n```\n\nRules:\n\n- lowercases\n- non-`[a-z_]` chars become `_`\n- repeated underscores collapse\n- redundant `<server>_` prefix in tool name is stripped once\n\nThis avoids many collisions, but not all. Different raw names can still sanitize to the same identifier (for example `my-server` and `my.server` both sanitize similarly), and registry insertion is last-write-wins.\n\n### Schema mapping\n\n`tool-bridge.ts` passes each MCP `inputSchema` through `normalizeSchemaForMCP()` before registering it as a `CustomTool` schema.\n\n### Execution mapping\n\n`MCPTool.execute()` / `DeferredMCPTool.execute()`:\n\n- calls MCP `tools/call`\n- flattens MCP content into displayable text\n- returns structured details (`serverName`, `mcpToolName`, provider metadata)\n- maps server-reported `isError` to `Error: ...` text result\n- attempts reconnect + one retry for retriable connection errors\n- maps remaining thrown transport/runtime failures to `MCP error: ...`\n- preserves abort semantics by translating AbortError into `ToolAbortError`\n\n## 5) Operator lifecycle: add/edit/remove and live updates\n\nInteractive mode exposes `/mcp` in `src/modes/controllers/mcp-command-controller.ts`.\n\nSupported operations:\n\n- `add` (wizard or quick-add)\n- `remove` / `rm`\n- `enable` / `disable`\n- `test`\n- `reauth` / `unauth`\n- `reconnect`\n- `reload`\n- `resources`, `prompts`, `notifications`\n- Smithery search/login/logout flows\n\nConfig writes are atomic (`writeMCPConfigFile`: temp file + rename).\n\nAfter changes, controller calls `#reloadMCP()`:\n\n1. `mcpManager.disconnectAll()`\n2. `mcpManager.discoverAndConnect()`\n3. `session.refreshMCPTools(mcpManager.getTools())`\n\n`refreshMCPTools()` replaces all `mcp__` registry entries and immediately re-activates the latest MCP tool set, so changes take effect without restarting the session.\n\n### Mode differences\n\n- **Interactive/TUI mode**: `/mcp` gives in-app UX (wizard, OAuth flow, connection status text, immediate runtime rebinding).\n- **SDK/headless integration**: `discoverAndLoadMCPTools()` (`src/mcp/loader.ts`) returns loaded tools + per-server errors; no `/mcp` command UX.\n\n## 6) User-visible error surfaces\n\nCommon error strings users/operators see:\n\n- add/update validation failures:\n - `Invalid server config: ...`\n - `Server \"<name>\" already exists in <path>`\n- quick-add argument issues:\n - `Use either --url or -- <command...>, not both.`\n - `--token requires --url (HTTP/SSE transport).`\n- connect/test failures:\n - `Failed to connect to \"<name>\": <message>`\n - timeout help text suggests increasing timeout\n - auth help text for `401/403`\n- auth/OAuth flows:\n - `Authentication required ... OAuth endpoints could not be discovered`\n - `OAuth flow timed out. Please try again.`\n - `OAuth authentication failed: ...`\n- disabled server usage:\n - `Server \"<name>\" is disabled. Run /mcp enable <name> first.`\n\nBad source JSON in discovery is generally handled as warnings/logs; config-writer paths throw explicit errors.\n\n## 7) Practical authoring guidance\n\nFor robust MCP authoring in this codebase:\n\n1. Keep server names globally unique across all MCP-capable config sources.\n2. Prefer names that remain distinct after MCP tool-name sanitization to avoid generated `mcp__` collisions.\n3. Use explicit `type` to avoid accidental stdio defaults.\n4. Treat `enabled: false` as hard-off: server is omitted from runtime connect set.\n5. For OAuth configs, store a valid `credentialId`; otherwise auth injection is skipped.\n6. If using command-based secret resolution (`!cmd`), verify command output is stable and non-empty.\n\n## Implementation files\n\n- [`src/mcp/types.ts`](../packages/coding-agent/src/mcp/types.ts)\n- [`src/mcp/config.ts`](../packages/coding-agent/src/mcp/config.ts)\n- [`src/mcp/config-writer.ts`](../packages/coding-agent/src/mcp/config-writer.ts)\n- [`src/mcp/tool-bridge.ts`](../packages/coding-agent/src/mcp/tool-bridge.ts)\n- [`src/discovery/mcp-json.ts`](../packages/coding-agent/src/discovery/mcp-json.ts)\n- [`src/modes/controllers/mcp-command-controller.ts`](../packages/coding-agent/src/modes/controllers/mcp-command-controller.ts)\n- [`src/mcp/manager.ts`](../packages/coding-agent/src/mcp/manager.ts)\n- [`src/capability/index.ts`](../packages/coding-agent/src/capability/index.ts)\n- [`src/config/resolve-config-value.ts`](../packages/coding-agent/src/config/resolve-config-value.ts)\n- [`src/mcp/loader.ts`](../packages/coding-agent/src/mcp/loader.ts)\n",
36
37
  "memory.md": "# Autonomous Memory\n\nWhen the local memory backend is enabled, the agent automatically extracts durable knowledge from past sessions and injects a compact summary into future sessions for the same project. Over time it builds a project-scoped memory store — technical decisions, recurring workflows, pitfalls — that carries forward without manual effort.\n\nDisabled by default. Enable the local summary pipeline via `/settings` or `config.yml`:\n\n```yaml\nmemory:\n backend: local\n```\n\n## Usage\n\n### What gets injected\n\nAt session start, if a memory summary exists for the current project, it is injected into the system prompt as a **Memory Guidance** block. The agent is instructed to:\n\n- Treat memory as heuristic context — useful for process and prior decisions, not authoritative on current repo state.\n- Cite the memory artifact path when memory changes the plan, and pair it with current-repo evidence before acting.\n- Prefer repo state and user instruction when they conflict with memory; treat conflicting memory as stale.\n\n### Reading memory artifacts\n\nThe agent can read memory files directly using `memory://` URLs with the `read` tool:\n\n| URL | Content |\n| -------------------------------------- | ----------------------------------- |\n| `memory://root` | Compact summary injected at startup |\n| `memory://root/MEMORY.md` | Full long-term memory document |\n| `memory://root/skills/<name>/SKILL.md` | A generated skill playbook |\n\n### `/memory` slash command\n\n| Subcommand | Effect |\n| --------------------- | --------------------------------------------------------- |\n| `view` | Show the current backend injection payload |\n| `stats` | Show backend-specific memory statistics, when supported |\n| `diagnose` | Show backend-specific diagnostics, when supported |\n| `clear` / `reset` | Delete active backend memory data/artifacts |\n| `enqueue` / `rebuild` | Force consolidation/retention work for the active backend |\n\n## How it works\n\nLocal summary memories are built by a background pipeline that runs at startup; `/memory enqueue` marks consolidation work that the next startup picks up. The pipeline is skipped for subagents and for sessions that are not persisted to a session file.\n\n**Phase 1 — per-session extraction:** For each past session that has changed since it was last processed, a model reads the session history and extracts durable signal: technical decisions, constraints, resolved failures, recurring workflows. Sessions that are too recent, too old, currently active, or beyond the configured scan/age limits are skipped. Each extraction produces a raw memory block and a short synopsis for that session.\n\n**Phase 2 — consolidation:** After extraction, a second model pass reads all per-session extractions and produces three outputs written to disk:\n\n- `MEMORY.md` — a curated long-term memory document\n- `memory_summary.md` — the compact text injected at session start\n- `skills/` — reusable procedural playbooks, each in its own subdirectory\n\nPhase 2 uses a lease and heartbeat to prevent double-running when multiple processes start simultaneously. Stale skill directories from prior runs are pruned automatically.\n\nConsolidated output is redacted for common secret/token patterns before `MEMORY.md`, `memory_summary.md`, or generated skills are written to disk.\n\n### Extraction behavior\n\nMemory extraction and consolidation behavior is driven by static prompt files in `packages/coding-agent/src/prompts/memories/`.\n\n| File | Purpose | Variables |\n| ------------------------ | -------------------------------------------- | ------------------------------------------- |\n| `stage_one_system.md` | System prompt for per-session extraction | — |\n| `stage_one_input.md` | User-turn template wrapping session content | `{{thread_id}}`, `{{response_items_json}}` |\n| `consolidation_system.md`| System prompt for cross-session consolidation | — |\n| `consolidation.md` | User-turn prompt for cross-session consolidation | `{{raw_memories}}`, `{{rollout_summaries}}` |\n| `read-path.md` | Memory guidance injected into live sessions | `{{memory_summary}}`, `{{learned}}` |\n\n### Model selection\n\nMemory piggybacks on the model role system.\n\n| Phase | Role | Purpose |\n| ----------------------- | ------------------------------------------------------------------- | -------------------------------- |\n| Phase 1 (extraction) | `default` | Per-session knowledge extraction |\n| Phase 2 (consolidation) | `smol` (falls back to `default`, then current/first registry model) | Cross-session synthesis |\n\nIf the requested memory role is not configured, memory model resolution falls back to the `default` role, then the active session model, then the first model in the registry.\n\n## Configuration\n\n| Setting | Default | Description |\n| ------------------------------------- | ------- | ---------------------------------------------------------------------------------------------------------------------------------------- |\n| `memory.backend` | `off` | Select `local` for this pipeline; legacy `memories.enabled: true` is migrated to `memory.backend: local` when no explicit backend is set |\n| `memories.maxRolloutAgeDays` | `30` | Sessions older than this are not processed |\n| `memories.minRolloutIdleHours` | `12` | Sessions active more recently than this are skipped |\n| `memories.maxRolloutsPerStartup` | `64` | Cap on sessions processed in a single startup |\n| `memories.summaryInjectionTokenLimit` | `5000` | Max tokens of the summary injected into the system prompt |\n\nAdditional tuning knobs (concurrency, lease durations, token budgets) are available in config for advanced use.\n\n## Key files\n\n- `packages/coding-agent/src/memories/index.ts` — pipeline orchestration, injection, clear/enqueue entry points (the `/memory` command routes here via `packages/coding-agent/src/memory-backend/local-backend.ts`)\n- `packages/coding-agent/src/memories/storage.ts` — SQLite-backed job queue and thread registry\n- `packages/coding-agent/src/prompts/memories/` — memory prompt templates\n- `packages/coding-agent/src/internal-urls/memory-protocol.ts` — `memory://` URL handler\n",
37
38
  "mnemosyne-memory-backend.md": "# Mnemopi memory backend\n\nOh My Pi can use `@oh-my-pi/pi-mnemopi` as a local long-term memory backend.\n\nSet:\n\n```yaml\nmemory:\n backend: mnemopi\n```\n\nExample:\n\n```yaml\nmemory:\n backend: mnemopi\nmnemopi:\n scoping: per-project-tagged\n```\n\nWith this backend enabled, the coding agent:\n\n1. Opens one or more local Mnemopi SQLite databases according to the configured bank scoping.\n2. Recalls relevant memories into a `<memories>` block for the first model turn of a session and refreshes the base prompt if recall happens from the `agent_start` listener.\n3. Retains completed conversation turns into the retain bank after agent turns, no more often than `mnemopi.retainEveryNTurns`.\n4. Adds recalled memory as extra compaction context when compaction asks the memory backend for `preCompactionContext`.\n5. Uses the normal `/memory view`, `/memory stats`, `/memory diagnose`, `/memory clear`, and `/memory enqueue` commands through the shared memory backend interface.\n\nRecalled memory is background context, not instructions. Current user messages and tool output take precedence when they conflict.\n\n## Settings\n\n| Setting | Default | Description |\n| ------------------------------- | ---------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| `memory.backend` | `off` | Set to `mnemopi` to enable this backend. |\n| `mnemopi.dbPath` | agent memories dir | Optional SQLite database path. |\n| `mnemopi.bank` | unset | Optional shared bank base name passed to `Mnemopi`; the coding-agent wrapper scopes from this base according to `mnemopi.scoping`. Unset → shared bank `default`; per-project modes derive a project bank from the working-directory basename plus a stable hash of its absolute path. |\n| `mnemopi.scoping` | `per-project` | Memory visibility mode: `global` = one shared bank, `per-project` = isolated project memory, `per-project-tagged` = project-local writes plus global recall visibility. |\n| `mnemopi.autoRecall` | `true` | Recall memory on the first turn of a session. |\n| `mnemopi.autoRetain` | `true` | Retain completed turns automatically. |\n| `mnemopi.polyphonicRecall` | `false` | Enable 4-voice polyphonic recall (vector, graph, fact, temporal) with reciprocal rank fusion; `MNEMOPI_POLYPHONIC_RECALL` overrides when set. |\n| `mnemopi.enhancedRecall` | `false` | Enable the tiered query result cache for repeated/similar recall queries; `MNEMOPI_ENHANCED_RECALL` overrides when set. |\n| `mnemopi.retainEveryNTurns` | `4` | Minimum user turns between automatic retain writes. |\n| `mnemopi.recallLimit` | `8` | Maximum recalled memories in the prompt block. |\n| `mnemopi.recallContextTurns` | `3` | Prior user-bounded turns included in recall queries. |\n| `mnemopi.recallMaxQueryChars` | `4000` | Maximum composed recall query length. |\n| `mnemopi.injectionTokenLimit` | `5000` | Approximate token budget for memory prompt injection. |\n| `mnemopi.debug` | `false` | Enable debug logging for backend failures. |\n| `mnemopi.noEmbeddings` | `false` | Pass `noEmbeddings` to `Mnemopi` and force FTS-only recall. |\n| `mnemopi.embeddingVariant` | `en` | Local embedding model variant: `en` = `BAAI/bge-base-en-v1.5` (768d), `multilingual` = `intfloat/multilingual-e5-large` (1024d). `mnemopi.embeddingModel`/`MNEMOPI_EMBEDDING_MODEL` override it; changing it rebuilds stored embeddings on the next writable start. |\n| `mnemopi.embeddingModel` | variant default | Explicit embedding model id; overrides `mnemopi.embeddingVariant`. Precedence: this setting > `MNEMOPI_EMBEDDING_MODEL` env > variant default. |\n| `mnemopi.embeddingApiUrl` | env/default | OpenAI-compatible embedding endpoint passed to `Mnemopi`. |\n| `mnemopi.embeddingApiKey` | env/default | Embedding API key passed to `Mnemopi`. |\n| `mnemopi.llmMode` | `smol` | `smol` uses the configured pi-ai smol model, `remote` uses the settings below, and `none` disables LLM calls. |\n| `mnemopi.llmBaseUrl` | env/default | OpenAI-compatible LLM endpoint for `llmMode: remote`. |\n| `mnemopi.llmApiKey` | env/default | LLM API key for `llmMode: remote`. |\n| `mnemopi.llmModel` | env/default | LLM model id for `llmMode: remote`. |\n\n## Scoping\n\nThe coding-agent wrapper applies scoping on top of the underlying `Mnemopi` package:\n\n- `global` uses one shared bank for recall and writes.\n- `per-project` writes to and recalls from a bank derived from the current working directory alone — its basename plus a stable hash of its absolute path, independent of the surrounding git layout.\n- `per-project-tagged` writes to the project-local bank and recalls from both the project-local bank and the shared global bank, with duplicate recall results merged.\n\nThe combined project-plus-global behavior lives in the wrapper. The `@oh-my-pi/pi-mnemopi` package itself still exposes banks and constructor options directly, including `bank` for selecting a bank name. Project-local banks other than the shared bank are stored as sibling bank databases managed by Mnemopi's `BankManager`.\n\n## LLM and embeddings\n\nThe backend passes these settings to the `Mnemopi` constructor; if a setting is omitted, Mnemopi falls back to its `MNEMOPI_*` environment defaults. The backend does not download or run a local GGUF LLM. LLM-dependent paths use a configured pi-ai model, an opt-in local on-device memory model (`providers.memoryModel`, ONNX — overrides `smol`/`remote` when set to a local model), a dynamic completion function, a remote OpenAI-compatible endpoint, or deterministic no-LLM fallbacks.\n\nFTS-only:\n\n```yaml\nmemory:\n backend: mnemopi\nmnemopi:\n noEmbeddings: true\n```\n\nEquivalent constructor shape:\n\n```ts\nnew Mnemopi({ noEmbeddings: true });\n```\n\nRemote embeddings:\n\n```yaml\nmnemopi:\n embeddingModel: text-embedding-3-small\n embeddingApiUrl: https://api.openai.com/v1\n embeddingApiKey: ${OPENAI_API_KEY}\n```\n\nEquivalent constructor shape:\n\n```ts\nnew Mnemopi({\n embeddingModel: \"text-embedding-3-small\",\n embeddingApiUrl: \"https://api.openai.com/v1\",\n embeddingApiKey,\n});\n```\n\nRemote LLM:\n\n```yaml\nmnemopi:\n llmMode: remote\n llmBaseUrl: https://api.openai.com/v1\n llmApiKey: ${OPENAI_API_KEY}\n llmModel: gpt-4.1-mini\n```\n\nEquivalent constructor shapes:\n\n```ts\nnew Mnemopi({ llm: { baseUrl, apiKey, model } });\nnew Mnemopi({ llmBaseUrl: baseUrl, llmApiKey: apiKey, llmModel: model });\n```\n\nDynamic function LLM for rotating OAuth tokens:\n\n```ts\nnew Mnemopi({\n llm: async (prompt, opts) => {\n const token = await getFreshOauthToken();\n return await completeWithPiAi(prompt, {\n token,\n maxTokens: opts?.maxTokens,\n temperature: opts?.temperature,\n });\n },\n});\n```\n\npi-ai smol model LLM:\n\n```yaml\nmnemopi:\n llmMode: smol\n```\n\nThe coding agent resolves its configured smol role and passes a dynamic completion function so every Mnemopi LLM call can fetch the current provider credentials at call time:\n\n```ts\nnew Mnemopi({\n llm: async (prompt, opts) => completeSmolWithCurrentAuth(prompt, opts),\n});\n```\n\n## Operational notes\n\n- The default shared database lives under the agent memories directory in `mnemopi/mnemopi.db`; project-scoped banks use sibling database paths under that Mnemopi directory.\n- `/memory clear` removes every scoped Mnemopi SQLite database and sidecar WAL/SHM files for the active configuration.\n- `/memory enqueue` forces retention of the current session, flushes pending fact extractions, and runs Mnemopi sleep/consolidation.\n- `/memory stats` and `/memory diagnose` render backend-specific bank statistics/diagnostics when the Mnemopi backend is active.\n- Subagents do not own separate Mnemopi retain loops; they alias the parent state when a parent Mnemopi state exists, and otherwise remain inert.\n",
38
- "models.md": "# Model and Provider Configuration (`models.yml`)\n\nThis document describes how the coding-agent currently loads models, applies overrides, resolves credentials, and chooses models at runtime.\n\n## What controls model behavior\n\nPrimary implementation files:\n\n- `src/config/model-registry.ts` — loads built-in + custom models, provider overrides, runtime discovery, auth integration\n- `src/config/model-resolver.ts` — parses model patterns and selects initial/smol/slow models\n- `src/config/settings-schema.ts` — model-related settings (`modelRoles`, provider transport preferences)\n- `src/session/auth-storage.ts` — re-exports `AuthStorage` from `@oh-my-pi/pi-ai` (`packages/ai/src/auth-storage.ts`); API key + OAuth resolution order\n- `packages/catalog/src/models.ts` and `packages/catalog/src/types.ts` — built-in providers/models (`getBundledModels` / `getBundledProviders`) and `Model`/`compat` types\n\n## Config file location and legacy behavior\n\nDefault config path:\n\n- `~/.omp/agent/models.yml`\n\nLegacy behavior still present:\n\n- If `models.yml` is missing and `models.json` exists at the same location, it is migrated to `models.yml`.\n- Explicit `.json` / `.jsonc` config paths are still supported when passed programmatically to `ModelRegistry`.\n\n## `models.yml` shape\n\n```yaml\nproviders:\n <provider-id>:\n # provider-level config\nequivalence:\n overrides:\n <provider-id>/<model-id>: <canonical-model-id>\n exclude:\n - <provider-id>/<model-id>\n```\n\n`provider-id` is the canonical provider key used across selection and auth lookup.\n\n`equivalence` is optional and configures canonical model grouping on top of concrete provider models:\n\n- `overrides` maps an exact concrete selector (`provider/modelId`) to an official upstream canonical id\n- `exclude` opts a concrete selector out of canonical grouping\n\n## Provider-level fields\n\n```yaml\nproviders:\n my-provider:\n baseUrl: https://api.example.com/v1\n apiKey: MY_PROVIDER_API_KEY\n api: openai-completions\n headers:\n X-Team: platform\n authHeader: true\n auth: apiKey\n disableStrictTools: false # set true for Anthropic-compatible endpoints that reject the strict field\n discovery:\n type: ollama\n modelOverrides:\n some-model-id:\n name: Renamed model\n models:\n - id: some-model-id\n name: Some Model\n api: openai-completions\n reasoning: false\n input: [text]\n cost:\n input: 0\n output: 0\n cacheRead: 0\n cacheWrite: 0\n contextWindow: 128000\n maxTokens: 16384\n headers:\n X-Model: value\n compat:\n supportsStore: true\n supportsDeveloperRole: true\n supportsReasoningEffort: true\n maxTokensField: max_completion_tokens\n openRouterRouting:\n only: [anthropic]\n vercelGatewayRouting:\n order: [anthropic, openai]\n extraBody:\n gateway: m1-01\n controller: mlx\n```\n\n### Allowed provider/model `api` values\n\n- `openai-completions`\n- `openai-responses`\n- `openai-codex-responses`\n- `azure-openai-responses`\n- `anthropic-messages`\n- `google-generative-ai`\n- `google-gemini-cli`\n- `google-vertex`\n\n### Allowed auth/discovery values\n\n- `auth`: `apiKey` (default), `none`, or `oauth`; for `models.yml` custom models, `oauth` is accepted by schema but does not waive the `apiKey` requirement\n- `discovery.type`: `ollama`, `llama.cpp`, `lm-studio`, `openai-models-list`, or `proxy`\n- `transport`: `pi-native` only. When set, every model under that provider is sent to an `omp auth-gateway` compatible `baseUrl` via `POST /v1/pi/stream`; `apiKey` is the gateway bearer.\n\n## Validation rules (current)\n\n### Full custom provider (`models` is non-empty)\n\nRequired:\n\n- `baseUrl`\n- `apiKey` unless `auth: none`\n- `api` at provider level or each model\n\n### Override-only provider (`models` missing or empty)\n\nMust define at least one of:\n\n- `baseUrl`\n- `apiKey`\n- `auth: none`\n- `headers`\n- `compat`\n- `disableStrictTools`\n- `modelOverrides`\n- `discovery`\n\n### Discovery\n\n- `discovery` requires provider-level `api`, except `discovery.type: proxy` (per-model wire auto-detected).\n\n### Model value checks\n\n- `id` required\n- `contextWindow` and `maxTokens` must be positive if provided\n\n### Command-resolved secrets\n\nProvider `apiKey` values and provider/model `headers` values may start with `!` to read a secret from command stdout. The command is run with a 10 s timeout, stdout is trimmed, and empty/failing commands are omitted:\n\n```yaml\nproviders:\n openai:\n apiKey: \"!op read op://dev/openai/api-key\"\n headers:\n X-Team-Key: \"!bw get password omp-team-key\"\n```\n\nSuccessful command outputs are cached for the process lifetime so the command is not re-run for every model.\n\n## Merge and override order\n\nModelRegistry pipeline (on refresh):\n\n1. Load built-in providers/models from `@oh-my-pi/pi-catalog` (`getBundledProviders` / `getBundledModels`).\n2. Load `models.yml` custom config.\n3. Apply provider overrides (`baseUrl`, `headers`, `disableStrictTools`) to built-in models.\n4. Apply `modelOverrides` (per provider + model id).\n5. Merge custom `models`:\n - same `provider + id` replaces existing\n - otherwise append\n6. Load cached/runtime-discovered models (Ollama, llama.cpp, LM Studio, plus built-in provider managers), then re-apply model overrides.\n\n### Provider-model cache and static fingerprint\n\nCached per-provider model lists are persisted in the model-cache SQLite\ndatabase (current schema version 6) with a `static_fingerprint` column that\nhashes the static catalog slice merged into the row. When `resolveProviderModels`\nskips the network fetch and the fingerprint of the in-memory static\ncatalog matches the cached one, the cached rows are returned verbatim —\nthe static + dynamic merge is bypassed entirely. The fingerprint is\nmemoized per process by tagging the static-models array with a symbol\nproperty, so repeated cold-start calls do not re-hash.\n\n## Canonical model equivalence and coalescing\n\nThe registry keeps every concrete provider model and then builds a canonical layer above them.\n\nCanonical ids are official upstream ids only, for example:\n\n- `claude-opus-4-6`\n- `claude-haiku-4-5`\n- `gpt-5.3-codex`\n\n### `models.yml` equivalence config\n\nExample:\n\n```yaml\nproviders:\n zenmux:\n baseUrl: https://api.zenmux.example/v1\n apiKey: ZENMUX_API_KEY\n api: openai-codex-responses\n models:\n - id: codex\n name: Zenmux Codex\n reasoning: true\n input: [text]\n cost:\n input: 0\n output: 0\n cacheRead: 0\n cacheWrite: 0\n contextWindow: 200000\n maxTokens: 32768\n\nequivalence:\n overrides:\n zenmux/codex: gpt-5.3-codex\n p-codex/codex: gpt-5.3-codex\n exclude:\n - demo/codex-preview\n```\n\nBuild order for canonical grouping:\n\n1. exact user override from `equivalence.overrides`\n2. bundled official-id matches from built-in model metadata\n3. conservative heuristic normalization for gateway/provider variants\n4. fallback to the concrete model's own id\n\nCurrent heuristics are intentionally narrow:\n\n- embedded upstream prefixes can be stripped when present, for example `anthropic/...` or `openai/...`\n- dotted and dashed version variants can normalize only when they map to an existing official id, for example `4.6 -> 4-6`\n- ambiguous families or versions are not merged without a bundled match or explicit override\n\n### Canonical resolution behavior\n\nWhen multiple concrete variants share a canonical id, resolution uses:\n\n1. availability and auth\n2. `config.yml` `modelProviderOrder`\n3. existing registry/provider order if `modelProviderOrder` is unset\n\nDisabled or unauthenticated providers are skipped.\n\nSession state and transcripts continue to record the concrete provider/model that actually executed the turn.\n\nProvider defaults vs per-model overrides:\n\n- Provider `headers` are baseline.\n- Model `headers` override provider header keys.\n- `modelOverrides` can override model metadata (`name`, `reasoning`, `thinking`, `input`, `supportsTools`, `cost`, `premiumMultiplier`, `contextWindow`, `maxTokens`, `omitMaxOutputTokens`, `headers`, `compat`, `contextPromotionTarget`).\n- `compat` is deep-merged for nested routing blocks (`openRouterRouting`, `vercelGatewayRouting`, `extraBody`).\n\n## Runtime discovery integration\n\n### Implicit Ollama discovery\n\nIf `ollama` is not explicitly configured, registry adds an implicit discoverable provider:\n\n- provider: `ollama`\n- api: `openai-responses`\n- base URL: `OLLAMA_BASE_URL`, or `OLLAMA_HOST`, or `http://127.0.0.1:11434`\n- context window: `OLLAMA_CONTEXT_LENGTH` if set, otherwise Ollama `/api/show` metadata, otherwise `128000`\n- auth mode: keyless (`auth: none` behavior)\n\nRuntime discovery calls Ollama endpoints and normalizes discovered OpenAI-compatible models to `openai-responses`.\n\n`OLLAMA_CONTEXT_LENGTH` does not configure Ollama's runtime `num_ctx`; set that in Ollama/model configuration separately.\n\n### Implicit llama.cpp discovery\n\nIf `llama.cpp` is not explicitly configured, registry adds an implicit discoverable provider:\n\n- provider: `llama.cpp`\n- api: `openai-responses`\n- base URL: `LLAMA_CPP_BASE_URL` or `http://127.0.0.1:8080`\n- auth mode: keyless (`auth: none` behavior)\n\nRuntime discovery calls llama.cpp model endpoints and synthesizes model entries with local defaults.\n\n### Implicit LM Studio discovery\n\nIf `lm-studio` is not explicitly configured, registry adds an implicit discoverable provider:\n\n- provider: `lm-studio`\n- api: `openai-completions`\n- base URL: `LM_STUDIO_BASE_URL` or `http://127.0.0.1:1234/v1`\n- auth mode: keyless (`auth: none` behavior)\n\nRuntime discovery fetches models (`GET /models`) and synthesizes model entries with local defaults.\n\nThis path also works for local OpenAI-compatible servers that are not LM Studio. For example, if oMLX is bound to Ollama's usual port, set `LM_STUDIO_BASE_URL=http://127.0.0.1:11434/v1` to discover it through the existing `/v1/models` flow. Running oMLX and Ollama side by side requires assigning a different port to one of them. Do not configure oMLX as `ollama`: Ollama discovery uses native `/api/tags` and `/api/show` endpoints, not OpenAI `/v1/models`.\n\n### Explicit provider discovery\n\nYou can configure discovery yourself:\n\n```yaml\nproviders:\n ollama:\n baseUrl: http://127.0.0.1:11434\n api: openai-responses\n auth: none\n discovery:\n type: ollama\n\n llama.cpp:\n baseUrl: http://127.0.0.1:8080\n api: openai-responses\n auth: none\n discovery:\n type: llama.cpp\n```\n\n### Proxy discovery (`discovery.type: proxy`)\n\nFor Anthropic+OpenAI-compatible proxies (new-api / one-api / similar)\nthat expose both `/v1/messages` and `/v1/chat/completions` behind the same\nhost. Discovery hits `GET /v1/models` (10s timeout, OpenAI-style payload) and\nderives each model's `api` from the entry's `supported_endpoint_types`:\n\n- contains `\"anthropic\"` -> `api: anthropic-messages` (routes via `/v1/messages`)\n- contains `\"openai\"` -> `api: openai-completions` (routes via `/v1/chat/completions`)\n- otherwise -> falls back to provider-level `api` if set, else dropped\n\nProvider-level `api` is **optional** with `discovery.type: proxy` because the\nper-model wire is auto-detected. The Anthropic SDK strips a trailing `/v1`\nfrom `baseUrl` before appending `/v1/messages`, so a single discovery `baseUrl`\n(ending in `/v1`) round-trips correctly to both wires.\n\n```yaml\nproviders:\n newapi-reseller:\n baseUrl: https://api.example.com/v1\n apiKey: xxxx\n authHeader: true # injects Authorization: Bearer for openai models\n disableStrictTools: true # most anthropic-fronted proxies reject `strict`\n discovery:\n type: proxy\n```\n\n### Extension provider registration\n\nExtensions can register providers at runtime (`pi.registerProvider(...)`), including:\n\n- model replacement/append for a provider\n- custom stream handler registration for new API IDs\n- custom OAuth provider registration\n\n## Auth and API key resolution order\n\nWhen requesting a key for a provider, effective order is:\n\n1. Runtime override (CLI `--api-key`)\n2. Stored API key credential in `agent.db`\n3. Stored OAuth credential in `agent.db` (with refresh)\n4. Environment variable mapping (`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, etc.)\n5. ModelRegistry fallback resolver (provider `apiKey` from `models.yml`, env-name-or-literal semantics)\n\n`models.yml` `apiKey` behavior:\n\n- Value is first treated as an environment variable name.\n- If no env var exists, the literal string is used as the token.\n\nIf `authHeader: true` and provider `apiKey` is set, models get:\n\n- `Authorization: Bearer <resolved-key>` header injected.\n\nKeyless providers:\n\n- Providers marked `auth: none` are treated as available without credentials.\n- `getApiKey*` returns `kNoAuth` for them.\n\n### Broker mode\n\nWhen `OMP_AUTH_BROKER_URL` (or `auth.broker.url`) is set, the local SQLite credential store is replaced by `RemoteAuthCredentialStore`. Layers 2 and 3 above (stored API key / OAuth in `agent.db`) are served from a broker-supplied snapshot whose `refresh` tokens are redacted; expiry triggers `POST /v1/credential/:id/refresh` on the broker rather than a local refresh.\n\n`AuthStorage.setConfigApiKey` lets a `models.yml` `apiKey` win over a broker-resolved OAuth token without overriding a runtime `--api-key`. See [`auth-broker-gateway.md`](./auth-broker-gateway.md) for the full broker / gateway design and env surface (`OMP_AUTH_BROKER_URL`, `OMP_AUTH_BROKER_TOKEN`, `auth.broker.url`, `auth.broker.token`).\n\n## Model availability vs all models\n\n- `getAll()` returns the loaded model registry (built-in + merged custom + discovered).\n- `getAvailable()` filters to models that are keyless or have resolvable auth.\n\nSo a model can exist in registry but not be selectable until auth is available.\n\n## Runtime model resolution\n\n### CLI and pattern parsing\n\n`model-resolver.ts` supports:\n\n- exact `provider/modelId`\n- exact canonical model id\n- exact model id (provider inferred)\n- fuzzy/substring matching\n- glob scope patterns in `--models` (e.g. `openai/*`, `*sonnet*`)\n- optional `:thinkingLevel` suffix (`off|minimal|low|medium|high|xhigh`)\n\n`--provider` is legacy; `--model` is preferred.\n\nResolution precedence for exact selectors:\n\n1. exact `provider/modelId` bypasses coalescing\n2. exact canonical id resolves through the canonical index\n3. exact bare concrete id still works\n4. fuzzy and glob matching run after the exact paths\n\n### Initial model selection priority\n\n`findInitialModel(...)` uses this order:\n\n1. explicit CLI provider+model\n2. first scoped model (if not resuming)\n3. saved default provider/model\n4. known provider defaults (e.g. OpenAI/Anthropic/etc.) among available models\n5. first available model\n\n### Role aliases and settings\n\nSupported model roles:\n\n- `default`, `smol`, `slow`, `vision`, `plan`, `designer`, `commit`, `title`, `task`, `advisor`\n\nRole aliases like `pi/smol` expand through `settings.modelRoles`. Each role value can also append a thinking selector such as `:minimal`, `:low`, `:medium`, or `:high`.\n\nIf a role points at another role, the target model still inherits normally and any explicit suffix on the referring role wins for that role-specific use.\n\nRelated settings:\n\n- `modelRoles` (record)\n- `enabledModels` (scoped pattern list)\n- `modelProviderOrder` (global canonical-provider precedence)\n- `providers.kimiApiFormat` (`openai` or `anthropic` request format)\n- `providers.openaiWebsockets` (`auto|off|on` websocket preference for OpenAI Codex transport)\n\n`modelRoles` may store either:\n\n- `provider/modelId` to pin a concrete provider variant\n- a canonical id such as `gpt-5.3-codex` to allow provider coalescing\n\nFor `enabledModels` and CLI `--models`:\n\n- exact canonical ids expand to all concrete variants in that canonical group\n- explicit `provider/modelId` entries stay exact\n- globs and fuzzy matches still operate on concrete models\n\nGlobal `enabledModels` and `disabledProviders` entries may also be scoped to a path prefix:\n\n```yaml\nenabledModels:\n - claude-sonnet-4-5\n - path: ~/work\n models:\n - anthropic/claude-opus-4-5\ndisabledProviders:\n - ollama\n - path: ~/private\n providers:\n - anthropic\n```\n\nString entries apply everywhere. Scoped entries apply when the current working directory is the configured path or one of its subdirectories. Use `path`, `paths`, `pathPrefix`, or `pathPrefixes`; use `models` for `enabledModels`, `providers` for `disabledProviders`, or `values` for either.\n\n## `/model` and `omp models`\n\nBoth surfaces keep provider-prefixed models visible and selectable.\n\nThey now also expose canonical/coalesced models:\n\n- `/model` includes a canonical view alongside provider tabs\n- `omp models` prints provider-grouped tables of every concrete model, and `omp models canonical` prints the coalesced canonical view\n\nSelecting a canonical entry stores the canonical selector. Selecting a provider row stores the explicit `provider/modelId`.\n\n## Context promotion (model-level fallback chains)\n\nContext promotion is an overflow recovery mechanism for small-context variants (for example `*-spark`) that automatically promotes to a larger-context sibling when the API rejects a request with a context length error.\n\n### Trigger and order\n\nWhen a turn fails with a context overflow error (e.g. `context_length_exceeded`), `AgentSession` attempts promotion **before** falling back to compaction:\n\n1. If `contextPromotion.enabled` is true, resolve a promotion target (see below).\n2. If a target is found, switch to it and retry the request — no compaction needed.\n3. If no target is available, fall through to auto-compaction on the current model.\n\n### Target selection\n\nSelection is model-driven, not role-driven:\n\n1. `currentModel.contextPromotionTarget` (if configured)\n2. smallest larger-context model on the same provider + API\n\nCandidates are ignored unless credentials resolve (`ModelRegistry.getApiKey(...)`).\n\n### OpenAI Codex websocket handoff\n\nIf switching from/to `openai-codex-responses`, session provider state key `openai-codex-responses` is closed before model switch. This drops websocket transport state so the next turn starts clean on the promoted model.\n\n### Persistence behavior\n\nPromotion uses temporary switching (`setModelTemporary`):\n\n- recorded as a temporary `model_change` in session history\n- does not rewrite saved role mapping\n\n### Configuring explicit fallback chains\n\nConfigure fallback directly in model metadata via `contextPromotionTarget`.\n\n`contextPromotionTarget` accepts either:\n\n- `provider/model-id` (explicit)\n- `model-id` (resolved within current provider)\n\nExample (`models.yml`) for an explicit OpenAI fallback:\n\n```yaml\nproviders:\n openai-codex:\n modelOverrides:\n gpt-5.5:\n contextPromotionTarget: openai-codex/gpt-5.4\n```\n\nThe built-in model policy currently links OpenAI `codex-spark` variants to `gpt-5.5`, and `gpt-5.5` to `gpt-5.4`, when that target exists on the same provider/API.\n\n## Compatibility and routing fields\n\nThe `compat` block on a provider or model overrides the URL-based auto-detection in `packages/catalog/src/compat/openai.ts` (`buildOpenAICompat`). It is validated by `OpenAICompatSchema` in `packages/coding-agent/src/config/models-config-schema.ts` and consumed by every `openai-completions` transport (`packages/ai/src/providers/openai-completions.ts`). The canonical type is `OpenAICompat` in `packages/catalog/src/types.ts`.\n\n`models.yml` accepts the following keys (all optional; unset falls back to URL detection):\n\nRequest shaping:\n\n- `supportsStore` — emit `store: false` on requests. Default: auto (off for non-standard endpoints).\n- `supportsDeveloperRole` — use the `developer` system role for reasoning models instead of `system`. Default: auto.\n- `supportsMultipleSystemMessages` — preserve separate leading system/developer messages instead of coalescing them. Default: auto (known OpenAI-compatible hosted APIs preserve; strict-template/local hosts coalesce).\n- `supportsUsageInStreaming` — send `stream_options: { include_usage: true }` to receive token usage on streaming responses. Default: `true`.\n- `maxTokensField` — `\"max_completion_tokens\"` or `\"max_tokens\"`. Default: auto.\n- `supportsToolChoice` — emit the `tool_choice` parameter when the caller forces a specific tool. Default: `true`. Set `false` for endpoints that 400 on `tool_choice` (e.g. DeepSeek when reasoning is on).\n- `supportsForcedToolChoice` — accept a forced `tool_choice` that requires a specific tool. Default: `true`. When `false`, a forced selector is downgraded to `auto` so the tool stays available for endpoints that reject forced tool calls (e.g. some thinking-required OpenAI-compatible models).\n- `disableReasoningOnForcedToolChoice` — drop `reasoning_effort` / OpenRouter `reasoning` whenever `tool_choice` forces a call. Default: auto (Kimi/Anthropic-fronted endpoints).\n- `disableReasoningOnToolChoice` — drop reasoning fields whenever any `tool_choice` is sent. Default: auto (DeepSeek reasoning models).\n- `alwaysSendMaxTokens` — always send a max-token field when the caller did not provide one. Default: auto (Kimi-family models derive TPM limits from `max_tokens`).\n- `strictResponsesPairing` — Responses-API tool-call/result history must be strictly paired. Default: auto (Azure OpenAI, GitHub Copilot).\n- `streamIdleTimeoutMs` — stream-watchdog idle-timeout floor in ms for slow reasoning hosts. Default: auto (GLM coding-plan hosts, direct DeepSeek reasoning).\n- `cacheControlFormat` — `\"anthropic\"` to include Anthropic-style prompt-cache markers in chat-completions payloads. Default: auto (OpenRouter `anthropic/*` models).\n- `supportsLongPromptCacheRetention` — host honors `prompt_cache_retention: \"24h\"` on the Responses API. Default: auto (api.openai.com).\n- `extraBody` — extra top-level fields merged into every request body (gateway hints, controller selectors, etc.).\n\nReasoning / thinking:\n\n- `supportsReasoningEffort` — accept `reasoning_effort`. Default: auto (off for Grok, Z.ai/Zhipu, and Xiaomi MiMo).\n- `supportsReasoningParams` — whether request shaping may send reasoning params at all. Default: auto (off for GitHub Copilot chat-completions).\n- `reasoningEffortMap` — partial map from internal effort levels (`minimal|low|medium|high|xhigh`) to provider-specific strings (e.g. DeepSeek maps `xhigh -> \"max\"`).\n- `thinkingFormat` — request shape for thinking: `\"openai\"` (`reasoning_effort`), `\"openrouter\"` (`reasoning: { effort }`), `\"zai\"` (`thinking: { type: \"enabled\" }`), `\"qwen\"` (top-level `enable_thinking`), or `\"qwen-chat-template\"` (`chat_template_kwargs.enable_thinking`). Default: `\"openai\"`.\n- `reasoningContentField` — assistant field carrying chain-of-thought: `\"reasoning_content\"`, `\"reasoning\"`, or `\"reasoning_text\"`. Default: auto.\n- `requiresReasoningContentForToolCalls` — assistant tool-call turns must round-trip the reasoning field (DeepSeek-R1, Kimi, OpenRouter when reasoning is on). Default: `false`.\n- `allowsSyntheticReasoningContentForToolCalls` — allow a placeholder reasoning field when a prior assistant tool-call turn lacks provider reasoning content. Default: `true`; set `false` for providers that validate the exact reasoning value.\n- `requiresAssistantContentForToolCalls` — assistant tool-call turns must include non-empty text content (Kimi). Default: `false`.\n- `whenThinking` — partial compat overrides applied only when a request actually engages thinking mode (deep-merged over the baseline compat).\n\nTool / message normalization:\n\n- `requiresToolResultName` — tool-result messages need a `name` field (Mistral). Default: auto.\n- `requiresAssistantAfterToolResult` — a user message after a tool result needs an assistant turn in between. Default: auto.\n- `requiresThinkingAsText` — convert thinking blocks to text wrapped in `<thinking>` delimiters (Mistral). Default: auto.\n- `requiresMistralToolIds` — normalize tool-call ids to exactly 9 alphanumeric chars. Default: auto.\n- `supportsStrictMode` — accept the per-tool `strict` field on tool schemas. Default: conservative auto-detect per provider/baseUrl.\n- `toolStrictMode` — `\"all_strict\"` forces strict on every tool, `\"none\"` forces it off; unset keeps the existing per-tool mixed behavior.\n\nGateway routing (only applied when `baseUrl` matches the gateway):\n\n- `openRouterRouting.only` / `openRouterRouting.order` — provider routing on `openrouter.ai` (see <https://openrouter.ai/docs/provider-routing>).\n- `vercelGatewayRouting.only` / `vercelGatewayRouting.order` — provider routing on `ai-gateway.vercel.sh` (see <https://vercel.com/docs/ai-gateway/models-and-providers/provider-options>).\n\nProvider-level `compat` is the baseline; per-model `compat` is deep-merged on top, with `openRouterRouting`, `vercelGatewayRouting`, and `extraBody` merged as nested objects.\n\n### Anthropic compatibility (`anthropic-messages`)\n\nFor `anthropic-messages` models the runtime uses a separate `AnthropicCompat` shape (`packages/catalog/src/types.ts`). The `models.yml` schema exposes the strict-tools opt-out as a top-level provider field (see below) plus two Anthropic-side flags in the same `compat` slot — `requiresToolResultId` (non-standard `id` alias on `tool_result` blocks for Z.AI-style proxies) and `replayUnsignedThinking` (replay unsigned thinking blocks as native thinking instead of demoting them to text); the remaining Anthropic-side knobs (`disableAdaptiveThinking`, `supportsEagerToolInputStreaming`, `supportsLongCacheRetention`, `supportsMidConversationSystem`, `supportsForcedToolChoice`, `supportsSamplingParams`, `escapeBuiltinToolNames`) are set by built-in catalog metadata and are not user-configurable from `models.yml`.\n\n### Strict tool schemas (`disableStrictTools`)\n\nAnthropic's API supports a `strict` field on tool definitions that forces the model to always follow the provided schema exactly. OMP enables it by default for a small allowlist of high-frequency built-in `anthropic-messages` tools (`bash`, `python`, `edit`, and `find`) whose schemas fit Anthropic's strict grammar limits; other tools still send normalized schemas but omit `strict`.\n\nThird-party providers that front the Anthropic API (AWS Bedrock, Azure, self-hosted proxies) do not always implement this field and will reject requests that include it. Set `disableStrictTools: true` at the provider level to opt out of strict mode for the allowlisted tools:\n\n```yaml\nproviders:\n bedrock-anthropic:\n baseUrl: https://bedrock-runtime.us-east-1.amazonaws.com/anthropic\n apiKey: AWS_BEARER_TOKEN\n api: anthropic-messages\n disableStrictTools: true\n models:\n - id: claude-sonnet-4-20250514\n name: Claude Sonnet 4 (Bedrock)\n input: [text, image]\n contextWindow: 200000\n maxTokens: 16384\n cost:\n input: 3.00\n output: 15.00\n cacheRead: 0.30\n cacheWrite: 3.75\n```\n\n`disableStrictTools` is a provider-level flag that applies to all models in the provider. It disables the Anthropic `strict` marker only for tools that OMP would otherwise mark strict; it does not change runtime tool argument validation. OMP can automatically retry without strict tools after Anthropic reports a strict-grammar-too-large error before the first streamed token, but proxies that reject the `strict` field for other reasons should set this flag explicitly.\n\nTool schemas going on the wire are normalized by the unified flow in\n`packages/ai/src/utils/schema/normalize.ts` (Google/CCA/MCP dispatchers\nplus the OpenAI strict-mode sanitize+enforce pipeline). See\n[`ai-schema-normalize.md`](./ai-schema-normalize.md) for the strict-mode\nedge cases (local `$ref` inlining, single-item `allOf` collapse,\n`anyOf`-wrapper description hoist, enum/const primitive-type inference)\nand the per-provider dispatcher mapping.\n\n## Practical examples\n\n### Local OpenAI-compatible endpoint (no auth)\n\n```yaml\nproviders:\n local-openai:\n baseUrl: http://127.0.0.1:8000/v1\n auth: none\n api: openai-completions\n models:\n - id: Qwen/Qwen2.5-Coder-32B-Instruct\n name: Qwen 2.5 Coder 32B (local)\n```\n\nFor oMLX or another local OpenAI-compatible server with a discoverable `/v1/models` endpoint, prefer discovery instead of listing models by hand. Set `api` to the endpoint family your server actually exposes: `openai-completions` uses `/v1/chat/completions`; servers that expose `/v1/responses` need `openai-responses` instead.\n\n```yaml\nproviders:\n omlx:\n baseUrl: http://127.0.0.1:11434/v1\n auth: none\n api: openai-completions\n discovery:\n type: openai-models-list\n```\n\nThe built-in vLLM provider can be pointed at a non-default endpoint without declaring a custom discovery type. OMP uses vLLM's `/v1/models` metadata and preserves vLLM's `max_model_len` field as the discovered context window.\n\n```yaml\nproviders:\n vllm:\n baseUrl: http://192.168.5.3:8085/v1\n auth: none\n```\n\nFor multiple vLLM endpoints, use arbitrary provider IDs with the generic OpenAI-compatible discovery path. Set `auth: none` for local no-auth servers or `apiKey` for authenticated ones. Generic discovery reads `max_model_len` first and then `context_length` as a generic OpenAI-compatible fallback.\n\n```yaml\nproviders:\n vllm-fast:\n baseUrl: http://host-a:8000/v1\n auth: none\n api: openai-completions\n discovery:\n type: openai-models-list\n vllm-long:\n baseUrl: http://host-b:8000/v1\n auth: none\n api: openai-completions\n discovery:\n type: openai-models-list\n```\n\n### Hosted proxy with env-based key\n\n```yaml\nproviders:\n anthropic-proxy:\n baseUrl: https://proxy.example.com/anthropic\n apiKey: ANTHROPIC_PROXY_API_KEY\n api: anthropic-messages\n authHeader: true\n disableStrictTools: true # if the proxy doesn't support strict tool schemas\n models:\n - id: claude-sonnet-4-20250514\n name: Claude Sonnet 4 (Proxy)\n reasoning: true\n input: [text, image]\n```\n\n### Override built-in provider route + model metadata\n\n```yaml\nproviders:\n openrouter:\n baseUrl: https://my-proxy.example.com/v1\n headers:\n X-Team: platform\n modelOverrides:\n anthropic/claude-sonnet-4:\n name: Sonnet 4 (Corp)\n compat:\n openRouterRouting:\n only: [anthropic]\n```\n\n## Legacy consumer caveat\n\nMost model configuration now flows through `models.yml` via `ModelRegistry`. Explicit `.json` / `.jsonc` paths remain supported only when passed programmatically to `ModelRegistry`; the default user config is `~/.omp/agent/models.yml`.\n\n## Failure mode\n\nIf `models.yml` fails schema or validation checks:\n\n- registry keeps operating with built-in models\n- error is exposed via `ModelRegistry.getError()` and surfaced in UI/notifications\n",
39
+ "models.md": "# Model and Provider Configuration (`models.yml`)\n\nThis document describes how the coding-agent currently loads models, applies overrides, resolves credentials, and chooses models at runtime.\n\n## What controls model behavior\n\nPrimary implementation files:\n\n- `src/config/model-registry.ts` — loads built-in + custom models, provider overrides, runtime discovery, auth integration\n- `src/config/model-resolver.ts` — parses model patterns and selects initial/smol/slow models\n- `src/config/settings-schema.ts` — model-related settings (`modelRoles`, provider transport preferences)\n- `src/session/auth-storage.ts` — re-exports `AuthStorage` from `@oh-my-pi/pi-ai` (`packages/ai/src/auth-storage.ts`); API key + OAuth resolution order\n- `packages/catalog/src/models.ts` and `packages/catalog/src/types.ts` — built-in providers/models (`getBundledModels` / `getBundledProviders`) and `Model`/`compat` types\n\n## Config file location and legacy behavior\n\nDefault config path:\n\n- `~/.omp/agent/models.yml`\n\nLegacy behavior still present:\n\n- If `models.yml` is missing and `models.json` exists at the same location, it is migrated to `models.yml`.\n- Explicit `.json` / `.jsonc` config paths are still supported when passed programmatically to `ModelRegistry`.\n\n## `models.yml` shape\n\n```yaml\nproviders:\n <provider-id>:\n # provider-level config\nequivalence:\n overrides:\n <provider-id>/<model-id>: <canonical-model-id>\n exclude:\n - <provider-id>/<model-id>\n```\n\n`provider-id` is the canonical provider key used across selection and auth lookup.\n\n`equivalence` is optional and configures canonical model grouping on top of concrete provider models:\n\n- `overrides` maps an exact concrete selector (`provider/modelId`) to an official upstream canonical id\n- `exclude` opts a concrete selector out of canonical grouping\n\n## Provider-level fields\n\n```yaml\nproviders:\n my-provider:\n baseUrl: https://api.example.com/v1\n apiKey: MY_PROVIDER_API_KEY\n api: openai-completions\n headers:\n X-Team: platform\n authHeader: true\n auth: apiKey\n disableStrictTools: false # set true for Anthropic-compatible endpoints that reject the strict field\n discovery:\n type: ollama\n modelOverrides:\n some-model-id:\n name: Renamed model\n models:\n - id: some-model-id\n name: Some Model\n api: openai-completions\n reasoning: false\n input: [text]\n cost:\n input: 0\n output: 0\n cacheRead: 0\n cacheWrite: 0\n contextWindow: 128000\n maxTokens: 16384\n headers:\n X-Model: value\n compat:\n supportsStore: true\n supportsDeveloperRole: true\n supportsReasoningEffort: true\n maxTokensField: max_completion_tokens\n openRouterRouting:\n only: [anthropic]\n vercelGatewayRouting:\n order: [anthropic, openai]\n extraBody:\n gateway: m1-01\n controller: mlx\n```\n\n### Allowed provider/model `api` values\n\n- `openai-completions`\n- `openai-responses`\n- `openai-codex-responses`\n- `azure-openai-responses`\n- `anthropic-messages`\n- `google-generative-ai`\n- `google-gemini-cli`\n- `google-vertex`\n\n### Allowed auth/discovery values\n\n- `auth`: `apiKey` (default), `none`, or `oauth`; for `models.yml` custom models, `oauth` is accepted by schema but does not waive the `apiKey` requirement\n- `discovery.type`: `ollama`, `llama.cpp`, `lm-studio`, `openai-models-list`, or `proxy`\n- `transport`: `pi-native` only. When set, every model under that provider is sent to an `omp auth-gateway` compatible `baseUrl` via `POST /v1/pi/stream`; `apiKey` is the gateway bearer.\n\n## Validation rules (current)\n\n### Full custom provider (`models` is non-empty)\n\nRequired:\n\n- `baseUrl`\n- `apiKey` unless `auth: none`\n- `api` at provider level or each model\n\n### Override-only provider (`models` missing or empty)\n\nMust define at least one of:\n\n- `baseUrl`\n- `apiKey`\n- `auth: none`\n- `headers`\n- `compat`\n- `disableStrictTools`\n- `modelOverrides`\n- `discovery`\n\n### Discovery\n\n- `discovery` requires provider-level `api`, except `discovery.type: proxy` (per-model wire auto-detected).\n\n### Model value checks\n\n- `id` required\n- `contextWindow` and `maxTokens` must be positive if provided\n\n### Command-resolved secrets\n\nProvider `apiKey` values and provider/model `headers` values may start with `!` to read a secret from command stdout. The command is run with a 10 s timeout, stdout is trimmed, and empty/failing commands are omitted:\n\n```yaml\nproviders:\n openai:\n apiKey: \"!op read op://dev/openai/api-key\"\n headers:\n X-Team-Key: \"!bw get password omp-team-key\"\n```\n\nSuccessful command outputs are cached for the process lifetime so the command is not re-run for every model.\n\n## Merge and override order\n\nModelRegistry pipeline (on refresh):\n\n1. Load built-in providers/models from `@oh-my-pi/pi-catalog` (`getBundledProviders` / `getBundledModels`).\n2. Load `models.yml` custom config.\n3. Apply provider overrides (`baseUrl`, `headers`, `disableStrictTools`) to built-in models.\n4. Apply `modelOverrides` (per provider + model id).\n5. Merge custom `models`:\n - same `provider + id` replaces existing\n - otherwise append\n6. Load cached/runtime-discovered models (Ollama, llama.cpp, LM Studio, plus built-in provider managers), then re-apply model overrides.\n\n### Provider-model cache and static fingerprint\n\nCached per-provider model lists are persisted in the model-cache SQLite\ndatabase (current schema version 6) with a `static_fingerprint` column that\nhashes the static catalog slice merged into the row. When `resolveProviderModels`\nskips the network fetch and the fingerprint of the in-memory static\ncatalog matches the cached one, the cached rows are returned verbatim —\nthe static + dynamic merge is bypassed entirely. The fingerprint is\nmemoized per process by tagging the static-models array with a symbol\nproperty, so repeated cold-start calls do not re-hash.\n\n## Canonical model equivalence and coalescing\n\nThe registry keeps every concrete provider model and then builds a canonical layer above them.\n\nCanonical ids are official upstream ids only, for example:\n\n- `claude-opus-4-6`\n- `claude-haiku-4-5`\n- `gpt-5.3-codex`\n\n### `models.yml` equivalence config\n\nExample:\n\n```yaml\nproviders:\n zenmux:\n baseUrl: https://api.zenmux.example/v1\n apiKey: ZENMUX_API_KEY\n api: openai-codex-responses\n models:\n - id: codex\n name: Zenmux Codex\n reasoning: true\n input: [text]\n cost:\n input: 0\n output: 0\n cacheRead: 0\n cacheWrite: 0\n contextWindow: 200000\n maxTokens: 32768\n\nequivalence:\n overrides:\n zenmux/codex: gpt-5.3-codex\n p-codex/codex: gpt-5.3-codex\n exclude:\n - demo/codex-preview\n```\n\nBuild order for canonical grouping:\n\n1. exact user override from `equivalence.overrides`\n2. bundled official-id matches from built-in model metadata\n3. conservative heuristic normalization for gateway/provider variants\n4. fallback to the concrete model's own id\n\nCurrent heuristics are intentionally narrow:\n\n- embedded upstream prefixes can be stripped when present, for example `anthropic/...` or `openai/...`\n- dotted and dashed version variants can normalize only when they map to an existing official id, for example `4.6 -> 4-6`\n- ambiguous families or versions are not merged without a bundled match or explicit override\n\n### Canonical resolution behavior\n\nWhen multiple concrete variants share a canonical id, resolution uses:\n\n1. availability and auth\n2. `config.yml` `modelProviderOrder`\n3. existing registry/provider order if `modelProviderOrder` is unset\n\nDisabled or unauthenticated providers are skipped.\n\nSession state and transcripts continue to record the concrete provider/model that actually executed the turn.\n\nProvider defaults vs per-model overrides:\n\n- Provider `headers` are baseline.\n- Model `headers` override provider header keys.\n- `modelOverrides` can override model metadata (`name`, `reasoning`, `thinking`, `input`, `supportsTools`, `cost`, `premiumMultiplier`, `contextWindow`, `maxTokens`, `omitMaxOutputTokens`, `headers`, `compat`, `contextPromotionTarget`).\n- `compat` is deep-merged for nested routing blocks (`openRouterRouting`, `vercelGatewayRouting`, `extraBody`).\n\n## Runtime discovery integration\n\n### Implicit Ollama discovery\n\nIf `ollama` is not explicitly configured, registry adds an implicit discoverable provider:\n\n- provider: `ollama`\n- api: `openai-responses`\n- base URL: `OLLAMA_BASE_URL`, or `OLLAMA_HOST`, or `http://127.0.0.1:11434`\n- context window: `OLLAMA_CONTEXT_LENGTH` if set, otherwise Ollama `/api/show` metadata, otherwise `128000`\n- auth mode: keyless (`auth: none` behavior)\n\nRuntime discovery calls Ollama endpoints and normalizes discovered OpenAI-compatible models to `openai-responses`.\n\n`OLLAMA_CONTEXT_LENGTH` does not configure Ollama's runtime `num_ctx`; set that in Ollama/model configuration separately.\n\n### Implicit llama.cpp discovery\n\nIf `llama.cpp` is not explicitly configured, registry adds an implicit discoverable provider:\n\n- provider: `llama.cpp`\n- api: `openai-responses`\n- base URL: `LLAMA_CPP_BASE_URL` or `http://127.0.0.1:8080`\n- auth mode: keyless (`auth: none` behavior)\n\nRuntime discovery calls llama.cpp model endpoints and synthesizes model entries with local defaults.\n\n### Implicit LM Studio discovery\n\nIf `lm-studio` is not explicitly configured, registry adds an implicit discoverable provider:\n\n- provider: `lm-studio`\n- api: `openai-completions`\n- base URL: `LM_STUDIO_BASE_URL` or `http://127.0.0.1:1234/v1`\n- auth mode: keyless (`auth: none` behavior)\n\nRuntime discovery fetches models (`GET /models`) and synthesizes model entries with local defaults.\n\nThis path also works for local OpenAI-compatible servers that are not LM Studio. For example, if oMLX is bound to Ollama's usual port, set `LM_STUDIO_BASE_URL=http://127.0.0.1:11434/v1` to discover it through the existing `/v1/models` flow. Running oMLX and Ollama side by side requires assigning a different port to one of them. Do not configure oMLX as `ollama`: Ollama discovery uses native `/api/tags` and `/api/show` endpoints, not OpenAI `/v1/models`.\n\n### LiteLLM provider discovery\n\nWhen `litellm` is active (for example through `LITELLM_API_KEY` or stored auth), runtime discovery uses the LiteLLM proxy:\n\n- provider: `litellm`\n- api: `openai-completions`\n- base URL: explicit provider `baseUrl` / `models.yml` config, otherwise `LITELLM_BASE_URL`, otherwise `http://localhost:4000/v1`\n- auth mode: `LITELLM_API_KEY` or stored LiteLLM auth when the proxy requires a key\n\nRuntime discovery fetches models (`GET /models`) from the proxy and enriches bare LiteLLM model ids against bundled reference metadata when available.\n\n### Explicit provider discovery\n\nYou can configure discovery yourself:\n\n```yaml\nproviders:\n ollama:\n baseUrl: http://127.0.0.1:11434\n api: openai-responses\n auth: none\n discovery:\n type: ollama\n\n llama.cpp:\n baseUrl: http://127.0.0.1:8080\n api: openai-responses\n auth: none\n discovery:\n type: llama.cpp\n```\n\n### Proxy discovery (`discovery.type: proxy`)\n\nFor Anthropic+OpenAI-compatible proxies (new-api / one-api / similar)\nthat expose both `/v1/messages` and `/v1/chat/completions` behind the same\nhost. Discovery hits `GET /v1/models` (10s timeout, OpenAI-style payload) and\nderives each model's `api` from the entry's `supported_endpoint_types`:\n\n- contains `\"anthropic\"` -> `api: anthropic-messages` (routes via `/v1/messages`)\n- contains `\"openai\"` -> `api: openai-completions` (routes via `/v1/chat/completions`)\n- otherwise -> falls back to provider-level `api` if set, else dropped\n\nProvider-level `api` is **optional** with `discovery.type: proxy` because the\nper-model wire is auto-detected. The Anthropic SDK strips a trailing `/v1`\nfrom `baseUrl` before appending `/v1/messages`, so a single discovery `baseUrl`\n(ending in `/v1`) round-trips correctly to both wires.\n\n```yaml\nproviders:\n newapi-reseller:\n baseUrl: https://api.example.com/v1\n apiKey: xxxx\n authHeader: true # injects Authorization: Bearer for openai models\n disableStrictTools: true # most anthropic-fronted proxies reject `strict`\n discovery:\n type: proxy\n```\n\n### Extension provider registration\n\nExtensions can register providers at runtime (`pi.registerProvider(...)`), including:\n\n- model replacement/append for a provider\n- custom stream handler registration for new API IDs\n- custom OAuth provider registration\n\n## Auth and API key resolution order\n\nWhen requesting a key for a provider, effective order is:\n\n1. Runtime override (CLI `--api-key`)\n2. Stored API key credential in `agent.db`\n3. Stored OAuth credential in `agent.db` (with refresh)\n4. Environment variable mapping (`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, etc.)\n5. ModelRegistry fallback resolver (provider `apiKey` from `models.yml`, env-name-or-literal semantics)\n\n`models.yml` `apiKey` behavior:\n\n- Value is first treated as an environment variable name.\n- If no env var exists, the literal string is used as the token.\n\nIf `authHeader: true` and provider `apiKey` is set, models get:\n\n- `Authorization: Bearer <resolved-key>` header injected.\n\nKeyless providers:\n\n- Providers marked `auth: none` are treated as available without credentials.\n- `getApiKey*` returns `kNoAuth` for them.\n\n### Broker mode\n\nWhen `OMP_AUTH_BROKER_URL` (or `auth.broker.url`) is set, the local SQLite credential store is replaced by `RemoteAuthCredentialStore`. Layers 2 and 3 above (stored API key / OAuth in `agent.db`) are served from a broker-supplied snapshot whose `refresh` tokens are redacted; expiry triggers `POST /v1/credential/:id/refresh` on the broker rather than a local refresh.\n\n`AuthStorage.setConfigApiKey` lets a `models.yml` `apiKey` win over a broker-resolved OAuth token without overriding a runtime `--api-key`. See [`auth-broker-gateway.md`](./auth-broker-gateway.md) for the full broker / gateway design and env surface (`OMP_AUTH_BROKER_URL`, `OMP_AUTH_BROKER_TOKEN`, `auth.broker.url`, `auth.broker.token`).\n\n## Model availability vs all models\n\n- `getAll()` returns the loaded model registry (built-in + merged custom + discovered).\n- `getAvailable()` filters to models that are keyless or have resolvable auth.\n\nSo a model can exist in registry but not be selectable until auth is available.\n\n## Runtime model resolution\n\n### CLI and pattern parsing\n\n`model-resolver.ts` supports:\n\n- exact `provider/modelId`\n- exact canonical model id\n- exact model id (provider inferred)\n- fuzzy/substring matching\n- glob scope patterns in `--models` (e.g. `openai/*`, `*sonnet*`)\n- optional `:thinkingLevel` suffix (`off|minimal|low|medium|high|xhigh`)\n\n`--provider` is legacy; `--model` is preferred.\n\nResolution precedence for exact selectors:\n\n1. exact `provider/modelId` bypasses coalescing\n2. exact canonical id resolves through the canonical index\n3. exact bare concrete id still works\n4. fuzzy and glob matching run after the exact paths\n\n### Initial model selection priority\n\n`findInitialModel(...)` uses this order:\n\n1. explicit CLI provider+model\n2. first scoped model (if not resuming)\n3. saved default provider/model\n4. known provider defaults (e.g. OpenAI/Anthropic/etc.) among available models\n5. first available model\n\n### Role aliases and settings\n\nSupported model roles:\n\n- `default`, `smol`, `slow`, `vision`, `plan`, `designer`, `commit`, `title`, `task`, `advisor`\n\nRole aliases like `pi/smol` expand through `settings.modelRoles`. Each role value can also append a thinking selector such as `:minimal`, `:low`, `:medium`, or `:high`.\n\nIf a role points at another role, the target model still inherits normally and any explicit suffix on the referring role wins for that role-specific use.\n\nRelated settings:\n\n- `modelRoles` (record)\n- `enabledModels` (scoped pattern list)\n- `modelProviderOrder` (global canonical-provider precedence)\n- `providers.kimiApiFormat` (`openai` or `anthropic` request format)\n- `providers.openaiWebsockets` (`auto|off|on` websocket preference for OpenAI Codex transport)\n\n`modelRoles` may store either:\n\n- `provider/modelId` to pin a concrete provider variant\n- a canonical id such as `gpt-5.3-codex` to allow provider coalescing\n\nFor `enabledModels` and CLI `--models`:\n\n- exact canonical ids expand to all concrete variants in that canonical group\n- explicit `provider/modelId` entries stay exact\n- globs and fuzzy matches still operate on concrete models\n\nGlobal `enabledModels` and `disabledProviders` entries may also be scoped to a path prefix:\n\n```yaml\nenabledModels:\n - claude-sonnet-4-5\n - path: ~/work\n models:\n - anthropic/claude-opus-4-5\ndisabledProviders:\n - ollama\n - path: ~/private\n providers:\n - anthropic\n```\n\nString entries apply everywhere. Scoped entries apply when the current working directory is the configured path or one of its subdirectories. Use `path`, `paths`, `pathPrefix`, or `pathPrefixes`; use `models` for `enabledModels`, `providers` for `disabledProviders`, or `values` for either.\n\n## `/model` and `omp models`\n\nBoth surfaces keep provider-prefixed models visible and selectable.\n\nThey now also expose canonical/coalesced models:\n\n- `/model` includes a canonical view alongside provider tabs\n- `omp models` prints provider-grouped tables of every concrete model, and `omp models canonical` prints the coalesced canonical view\n\nSelecting a canonical entry stores the canonical selector. Selecting a provider row stores the explicit `provider/modelId`.\n\n## Context promotion (model-level fallback chains)\n\nContext promotion is an overflow recovery mechanism for small-context variants (for example `*-spark`) that automatically promotes to a larger-context sibling when the API rejects a request with a context length error.\n\n### Trigger and order\n\nWhen a turn fails with a context overflow error (e.g. `context_length_exceeded`), `AgentSession` attempts promotion **before** falling back to compaction:\n\n1. If `contextPromotion.enabled` is true, resolve a promotion target (see below).\n2. If a target is found, switch to it and retry the request — no compaction needed.\n3. If no target is available, fall through to auto-compaction on the current model.\n\n### Target selection\n\nSelection is model-driven, not role-driven:\n\n1. `currentModel.contextPromotionTarget` (if configured)\n2. smallest larger-context model on the same provider + API\n\nCandidates are ignored unless credentials resolve (`ModelRegistry.getApiKey(...)`).\n\n### OpenAI Codex websocket handoff\n\nIf switching from/to `openai-codex-responses`, session provider state key `openai-codex-responses` is closed before model switch. This drops websocket transport state so the next turn starts clean on the promoted model.\n\n### Persistence behavior\n\nPromotion uses temporary switching (`setModelTemporary`):\n\n- recorded as a temporary `model_change` in session history\n- does not rewrite saved role mapping\n\n### Configuring explicit fallback chains\n\nConfigure fallback directly in model metadata via `contextPromotionTarget`.\n\n`contextPromotionTarget` accepts either:\n\n- `provider/model-id` (explicit)\n- `model-id` (resolved within current provider)\n\nExample (`models.yml`) for an explicit OpenAI fallback:\n\n```yaml\nproviders:\n openai-codex:\n modelOverrides:\n gpt-5.5:\n contextPromotionTarget: openai-codex/gpt-5.4\n```\n\nThe built-in model policy currently links OpenAI `codex-spark` variants to `gpt-5.5`, and `gpt-5.5` to `gpt-5.4`, when that target exists on the same provider/API.\n\n## Compatibility and routing fields\n\nThe `compat` block on a provider or model overrides the URL-based auto-detection in `packages/catalog/src/compat/openai.ts` (`buildOpenAICompat`). It is validated by `OpenAICompatSchema` in `packages/coding-agent/src/config/models-config-schema.ts` and consumed by every `openai-completions` transport (`packages/ai/src/providers/openai-completions.ts`). The canonical type is `OpenAICompat` in `packages/catalog/src/types.ts`.\n\nEndpoint-specific exceptions that interact with these fields are cataloged in [Provider endpoint constraints](./provider-endpoint-constraints.md).\n\n`models.yml` accepts the following keys (all optional; unset falls back to URL detection):\n\nRequest shaping:\n\n- `supportsStore` — emit `store: false` on requests. Default: auto (off for non-standard endpoints).\n- `supportsDeveloperRole` — use the `developer` system role for reasoning models instead of `system`. Default: auto.\n- `supportsMultipleSystemMessages` — preserve separate leading system/developer messages instead of coalescing them. Default: auto (known OpenAI-compatible hosted APIs preserve; strict-template/local hosts coalesce).\n- `supportsUsageInStreaming` — send `stream_options: { include_usage: true }` to receive token usage on streaming responses. Default: `true`.\n- `maxTokensField` — `\"max_completion_tokens\"` or `\"max_tokens\"`. Default: auto.\n- `supportsToolChoice` — emit the `tool_choice` parameter when the caller forces a specific tool. Default: `true`. Set `false` for endpoints that 400 on `tool_choice` (e.g. DeepSeek when reasoning is on).\n- `supportsForcedToolChoice` — accept a forced `tool_choice` that requires a specific tool. Default: `true`. When `false`, a forced selector is downgraded to `auto` so the tool stays available for endpoints that reject forced tool calls (e.g. some thinking-required OpenAI-compatible models).\n- `disableReasoningOnForcedToolChoice` — drop `reasoning_effort` / OpenRouter `reasoning` whenever `tool_choice` forces a call. Default: auto (Kimi/Anthropic-fronted endpoints).\n- `disableReasoningOnToolChoice` — drop reasoning fields whenever any `tool_choice` is sent. Default: auto (DeepSeek reasoning models).\n- `alwaysSendMaxTokens` — always send a max-token field when the caller did not provide one. Default: auto (Kimi-family models derive TPM limits from `max_tokens`).\n- `strictResponsesPairing` — Responses-API tool-call/result history must be strictly paired. Default: auto (Azure OpenAI, GitHub Copilot).\n- `streamIdleTimeoutMs` — stream-watchdog idle-timeout floor in ms for slow reasoning hosts. Default: auto (GLM coding-plan hosts, direct DeepSeek reasoning).\n- `cacheControlFormat` — `\"anthropic\"` to include Anthropic-style prompt-cache markers in chat-completions payloads. Default: auto (OpenRouter `anthropic/*` models).\n- `supportsLongPromptCacheRetention` — host honors `prompt_cache_retention: \"24h\"` on the Responses API. Default: auto (api.openai.com).\n- `extraBody` — extra top-level fields merged into every request body (gateway hints, controller selectors, etc.).\n\nReasoning / thinking:\n\n- `supportsReasoningEffort` — accept `reasoning_effort`. Default: auto (off for Grok, Z.ai/Zhipu, and Xiaomi MiMo).\n- `supportsReasoningParams` — whether request shaping may send reasoning params at all. Default: auto (off for GitHub Copilot chat-completions).\n- `reasoningEffortMap` — partial map from internal effort levels (`minimal|low|medium|high|xhigh`) to provider-specific strings (e.g. DeepSeek maps `xhigh -> \"max\"`).\n- `thinkingFormat` — request shape for thinking: `\"openai\"` (`reasoning_effort`), `\"openrouter\"` (`reasoning: { effort }`), `\"zai\"` (`thinking: { type: \"enabled\" }`), `\"qwen\"` (top-level `enable_thinking`), or `\"qwen-chat-template\"` (`chat_template_kwargs.enable_thinking`). Default: `\"openai\"`.\n- `reasoningContentField` — assistant field carrying chain-of-thought: `\"reasoning_content\"`, `\"reasoning\"`, or `\"reasoning_text\"`. Default: auto.\n- `requiresReasoningContentForToolCalls` — assistant tool-call turns must round-trip the reasoning field (DeepSeek-R1, Kimi, OpenRouter when reasoning is on). Default: `false`.\n- `allowsSyntheticReasoningContentForToolCalls` — allow a placeholder reasoning field when a prior assistant tool-call turn lacks provider reasoning content. Default: `true`; set `false` for providers that validate the exact reasoning value.\n- `requiresAssistantContentForToolCalls` — assistant tool-call turns must include non-empty text content (Kimi). Default: `false`.\n- `whenThinking` — partial compat overrides applied only when a request actually engages thinking mode (deep-merged over the baseline compat).\n\nTool / message normalization:\n\n- `requiresToolResultName` — tool-result messages need a `name` field (Mistral). Default: auto.\n- `requiresAssistantAfterToolResult` — a user message after a tool result needs an assistant turn in between. Default: auto.\n- `requiresThinkingAsText` — convert thinking blocks to text wrapped in `<thinking>` delimiters (Mistral). Default: auto.\n- `requiresMistralToolIds` — normalize tool-call ids to exactly 9 alphanumeric chars. Default: auto.\n- `supportsStrictMode` — accept the per-tool `strict` field on tool schemas. Default: conservative auto-detect per provider/baseUrl.\n- `toolStrictMode` — `\"all_strict\"` forces strict on every tool, `\"none\"` forces it off; unset keeps the existing per-tool mixed behavior.\n\nGateway routing (only applied when `baseUrl` matches the gateway):\n\n- `openRouterRouting.only` / `openRouterRouting.order` — provider routing on `openrouter.ai` (see <https://openrouter.ai/docs/provider-routing>).\n- `vercelGatewayRouting.only` / `vercelGatewayRouting.order` — provider routing on `ai-gateway.vercel.sh` (see <https://vercel.com/docs/ai-gateway/models-and-providers/provider-options>).\n\nProvider-level `compat` is the baseline; per-model `compat` is deep-merged on top, with `openRouterRouting`, `vercelGatewayRouting`, and `extraBody` merged as nested objects.\n\n### Anthropic compatibility (`anthropic-messages`)\n\nFor `anthropic-messages` models the runtime uses a separate `AnthropicCompat` shape (`packages/catalog/src/types.ts`). The `models.yml` schema exposes the strict-tools opt-out as a top-level provider field (see below) plus two Anthropic-side flags in the same `compat` slot — `requiresToolResultId` (non-standard `id` alias on `tool_result` blocks for Z.AI-style proxies) and `replayUnsignedThinking` (replay unsigned thinking blocks as native thinking instead of demoting them to text); the remaining Anthropic-side knobs (`disableAdaptiveThinking`, `supportsEagerToolInputStreaming`, `supportsLongCacheRetention`, `supportsMidConversationSystem`, `supportsForcedToolChoice`, `supportsSamplingParams`, `escapeBuiltinToolNames`) are set by built-in catalog metadata and are not user-configurable from `models.yml`.\n\n### Strict tool schemas (`disableStrictTools`)\n\nAnthropic's API supports a `strict` field on tool definitions that forces the model to always follow the provided schema exactly. OMP enables it by default for a small allowlist of high-frequency built-in `anthropic-messages` tools (`bash`, `python`, `edit`, and `find`) whose schemas fit Anthropic's strict grammar limits; other tools still send normalized schemas but omit `strict`.\n\nThird-party providers that front the Anthropic API (AWS Bedrock, Azure, self-hosted proxies) do not always implement this field and will reject requests that include it. Set `disableStrictTools: true` at the provider level to opt out of strict mode for the allowlisted tools:\n\n```yaml\nproviders:\n bedrock-anthropic:\n baseUrl: https://bedrock-runtime.us-east-1.amazonaws.com/anthropic\n apiKey: AWS_BEARER_TOKEN\n api: anthropic-messages\n disableStrictTools: true\n models:\n - id: claude-sonnet-4-20250514\n name: Claude Sonnet 4 (Bedrock)\n input: [text, image]\n contextWindow: 200000\n maxTokens: 16384\n cost:\n input: 3.00\n output: 15.00\n cacheRead: 0.30\n cacheWrite: 3.75\n```\n\n`disableStrictTools` is a provider-level flag that applies to all models in the provider. It disables the Anthropic `strict` marker only for tools that OMP would otherwise mark strict; it does not change runtime tool argument validation. OMP can automatically retry without strict tools after Anthropic reports a strict-grammar-too-large error before the first streamed token, but proxies that reject the `strict` field for other reasons should set this flag explicitly.\n\nTool schemas going on the wire are normalized by the unified flow in\n`packages/ai/src/utils/schema/normalize.ts` (Google/CCA/MCP dispatchers\nplus the OpenAI strict-mode sanitize+enforce pipeline). See\n[`ai-schema-normalize.md`](./ai-schema-normalize.md) for the strict-mode\nedge cases (local `$ref` inlining, single-item `allOf` collapse,\n`anyOf`-wrapper description hoist, enum/const primitive-type inference)\nand the per-provider dispatcher mapping.\n\n## Practical examples\n\n### Local OpenAI-compatible endpoint (no auth)\n\n```yaml\nproviders:\n local-openai:\n baseUrl: http://127.0.0.1:8000/v1\n auth: none\n api: openai-completions\n models:\n - id: Qwen/Qwen2.5-Coder-32B-Instruct\n name: Qwen 2.5 Coder 32B (local)\n```\n\nFor oMLX or another local OpenAI-compatible server with a discoverable `/v1/models` endpoint, prefer discovery instead of listing models by hand. Set `api` to the endpoint family your server actually exposes: `openai-completions` uses `/v1/chat/completions`; servers that expose `/v1/responses` need `openai-responses` instead.\n\n```yaml\nproviders:\n omlx:\n baseUrl: http://127.0.0.1:11434/v1\n auth: none\n api: openai-completions\n discovery:\n type: openai-models-list\n```\n\nThe built-in vLLM provider can be pointed at a non-default endpoint without declaring a custom discovery type. OMP uses vLLM's `/v1/models` metadata and preserves vLLM's `max_model_len` field as the discovered context window.\n\n```yaml\nproviders:\n vllm:\n baseUrl: http://192.168.5.3:8085/v1\n auth: none\n```\n\nFor multiple vLLM endpoints, use arbitrary provider IDs with the generic OpenAI-compatible discovery path. Set `auth: none` for local no-auth servers or `apiKey` for authenticated ones. Generic discovery reads `max_model_len` first and then `context_length` as a generic OpenAI-compatible fallback.\n\n```yaml\nproviders:\n vllm-fast:\n baseUrl: http://host-a:8000/v1\n auth: none\n api: openai-completions\n discovery:\n type: openai-models-list\n vllm-long:\n baseUrl: http://host-b:8000/v1\n auth: none\n api: openai-completions\n discovery:\n type: openai-models-list\n```\n\n### Hosted proxy with env-based key\n\n```yaml\nproviders:\n anthropic-proxy:\n baseUrl: https://proxy.example.com/anthropic\n apiKey: ANTHROPIC_PROXY_API_KEY\n api: anthropic-messages\n authHeader: true\n disableStrictTools: true # if the proxy doesn't support strict tool schemas\n models:\n - id: claude-sonnet-4-20250514\n name: Claude Sonnet 4 (Proxy)\n reasoning: true\n input: [text, image]\n```\n\n### Override built-in provider route + model metadata\n\n```yaml\nproviders:\n openrouter:\n baseUrl: https://my-proxy.example.com/v1\n headers:\n X-Team: platform\n modelOverrides:\n anthropic/claude-sonnet-4:\n name: Sonnet 4 (Corp)\n compat:\n openRouterRouting:\n only: [anthropic]\n```\n\n## Legacy consumer caveat\n\nMost model configuration now flows through `models.yml` via `ModelRegistry`. Explicit `.json` / `.jsonc` paths remain supported only when passed programmatically to `ModelRegistry`; the default user config is `~/.omp/agent/models.yml`.\n\n## Failure mode\n\nIf `models.yml` fails schema or validation checks:\n\n- registry keeps operating with built-in models\n- error is exposed via `ModelRegistry.getError()` and surfaced in UI/notifications\n",
39
40
  "natives-addon-loader-runtime.md": "# Natives Addon Loader Runtime\n\nThis document covers the runtime loader shipped by `@oh-my-pi/pi-natives`: how `native/index.js` decides which `.node` file to require, how compiled-binary embedded payloads are extracted, and what startup failures report.\n\n## Implementation files\n\n- `packages/natives/native/index.js`\n- `packages/natives/native/loader-state.js`\n- `packages/natives/native/embedded-addon.js`\n- `packages/natives/scripts/embed-native.ts`\n- `packages/natives/package.json`\n\n## Scope and responsibility\n\nThe loader is intentionally narrow:\n\n- Build a platform/CPU-aware candidate list for addon filenames and directories.\n- Treat an embedded-addon manifest as a compiled-binary signal when present.\n- Optionally materialize embedded addon archive contents into a versioned per-user cache directory.\n- On Windows `node_modules` installs, stage addon files into the versioned cache to avoid locked-DLL update failures.\n- Attempt candidates in deterministic order and return the first addon that `require(...)` loads and validates.\n\nFor install and compiled-binary paths, the loader verifies a release sentinel export named from `package.json#version` (for example `__piNativesV16_0_3`). Workspace-dev loads skip this validation so a local checkout can rebuild after a pull. The loader does not validate the full export surface; stale same-version or incomplete binaries still surface as missing members or native errors at use sites.\n\n## Runtime inputs and derived state\n\nAt module initialization, `native/index.js` computes:\n\n- **Platform tag**: `${process.platform}-${process.arch}` (for example `darwin-arm64`).\n- **Package version**: from `packages/natives/package.json`.\n- **Core directories**:\n - `leafPackageDir`: directory of the platform leaf package, resolved via `require.resolve(\"@oh-my-pi/pi-natives-<tag>/package.json\")`; `null` when no leaf is installed (e.g. local dev) and forced to `null` in compiled-binary mode.\n - `nativeDir`: package-local `packages/natives/native`.\n - `execDir`: directory containing `process.execPath`.\n - `versionedDir`: `<getNativesDir()>/<packageVersion>`.\n - `userDataDir` fallback:\n - Windows: `%LOCALAPPDATA%/omp` or `%USERPROFILE%/AppData/Local/omp`.\n - Non-Windows: `~/.local/bin`.\n- **Natives cache root** (`getNativesDir()`):\n - if `$XDG_DATA_HOME/omp` exists, `$XDG_DATA_HOME/omp/natives`;\n - otherwise `~/.omp/natives`.\n- **Compiled-binary mode** (`detectCompiledBinary`): true if any of:\n - embedded-addon manifest is non-null,\n - `PI_COMPILED` env var is set,\n - `import.meta.url` contains Bun embedded markers (`$bunfs`, `~BUN`, `%7EBUN`).\n- **Windows staging mode** (`shouldStageNodeModulesAddon`): true only on Windows, in non-compiled mode, when `nativeDir` is inside `node_modules`.\n- **Variant override**: `PI_NATIVE_VARIANT` (`modern`/`baseline` only; invalid values ignored).\n- **Selected variant**: explicit override, otherwise runtime AVX2 detection on x64 (`modern` if AVX2, else `baseline`).\n\n## Platform support and tag resolution\n\n`SUPPORTED_PLATFORMS` is fixed to:\n\n- `linux-x64`\n- `linux-arm64`\n- `darwin-x64`\n- `darwin-arm64`\n- `win32-x64`\n\nUnsupported platforms are not rejected before probing. The loader first tries the computed candidate paths. If all fail and `platformTag` is unsupported, it throws an unsupported-platform error listing supported tags.\n\n## Variant selection (`modern` / `baseline` / default)\n\n### x64 behavior\n\n1. `PI_NATIVE_VARIANT=modern|baseline` wins when valid.\n2. Otherwise AVX2 support is detected:\n - Linux: scan `/proc/cpuinfo` for `avx2`.\n - macOS: `sysctl -n machdep.cpu.leaf7_features`, then `machdep.cpu.features`.\n - Windows: PowerShell `[System.Runtime.Intrinsics.X86.Avx2]::IsSupported`.\n3. AVX2 selects `modern`; unavailable or undetectable AVX2 selects `baseline`.\n\n### Non-x64 behavior\n\nNo variant suffix is used; the filename is `pi_natives.<platform>-<arch>.node`.\n\n### Filename construction\n\n`loader-state.js#getAddonFilenames` returns:\n\n- Non-x64 or no variant: `pi_natives.<tag>.node`\n- x64 + `modern`:\n 1. `pi_natives.<tag>-modern.node`\n 2. `pi_natives.<tag>-baseline.node`\n 3. `pi_natives.<tag>.node`\n- x64 + `baseline`:\n 1. `pi_natives.<tag>-baseline.node`\n 2. `pi_natives.<tag>.node`\n\nThe default unsuffixed fallback remains part of the x64 candidate list.\n\n## Candidate path construction and fallback ordering\n\n`resolveLoaderCandidates(...)` expands every filename across directories, then de-duplicates while preserving first occurrence order.\n\n### Non-compiled runtime\n\nCandidates are grouped by directory class, in order:\n\n1. `<leafPackageDir>/<filename>` for every filename (omitted when `leafPackageDir` is `null`)\n2. `<nativeDir>/<filename>` then `<execDir>/<filename>`, per filename\n\nThe leaf package dir comes first so the optional-dependency binary published with the release is preferred over any `.node` left in the core package's `native/` (e.g. a stale local-dev build).\n\nOn Windows installs where `nativeDir` is inside a `node_modules` segment (`shouldStageNodeModulesAddon`), `<versionedDir>/<filename>` staging candidates are prepended ahead of the leaf candidates so a locked `node_modules` binary can be sidestepped during `bun install -g` updates. The staged file is copied from `leafPackageDir ?? nativeDir` before probing.\n\n### Compiled runtime\n\nCandidates are grouped, in order:\n\n1. `<versionedDir>/<filename>` then `<userDataDir>/<filename>`, per filename\n2. `<nativeDir>/<filename>` then `<execDir>/<filename>`, per filename\n\nAt load time, an extracted embedded candidate, or a staged Windows candidate when no embedded candidate exists, is prepended ahead of these de-duplicated candidates.\n\n## Embedded addon extraction lifecycle\n\n`embedded-addon.js` is generated by `scripts/embed-native.ts`. The reset stub exports `embeddedAddon = null`. A populated manifest has:\n\n- `platformTag`\n- `version`\n- `archive`: `{ format: \"tar.gz\", filename, filePath }`\n- `files[]` entries with `variant`, `filename`, and `size`\n\nExtraction (`maybeExtractEmbeddedAddon`) runs only when:\n\n1. compiled-binary mode is true,\n2. `embeddedAddon` is non-null,\n3. manifest `platformTag` equals the runtime platform tag,\n4. manifest `version` equals the package version,\n5. a variant-appropriate embedded file exists.\n\nVariant file selection:\n\n- Non-x64: prefer `default`, then first available file.\n- x64 + `modern`: prefer `modern`, fallback to `baseline`.\n- x64 + `baseline`: require `baseline`.\n\nMaterialization:\n\n1. Ensure `<versionedDir>` exists.\n2. Select `<versionedDir>/<selected filename>`.\n3. If the current cached file exists and its size matches manifest metadata, reuse it.\n4. Otherwise extract `embeddedAddon.archive.filePath` into `<versionedDir>` using the manifest `files[]` allowlist.\n5. Verify the selected target by size and return it as the first candidate.\n\nArchive, directory, or write failures are appended to the loader error list; probing continues through normal candidates.\n\n## Lifecycle and state transitions\n\n```text\nInit\n -> Load package metadata and embedded-addon manifest\n -> Compute platform/version/variant/filenames/candidate paths\n -> (compiled + embedded manifest matches?)\n yes -> extract archive to versionedDir when needed (record errors, continue)\n no -> skip extraction\n -> (Windows non-compiled node_modules install and no embedded candidate?)\n yes -> stage leaf/core addon to versionedDir (record errors, continue)\n no -> skip staging\n -> For each runtime candidate in order:\n require(candidate)\n -> sentinel validation passes or is workspace-dev: return addon exports (READY)\n -> failure: record error, continue\n -> none loaded:\n if unsupported platform tag -> throw Unsupported platform\n else -> throw Failed to load (tried-path diagnostics + hints)\n```\n\n## Failure behavior and diagnostics\n\n### Unsupported platform\n\nIf all candidates fail and `platformTag` is not supported, the loader throws:\n\n- `Unsupported platform: <tag>`\n- supported platform list\n- issue-reporting guidance\n\n### No loadable candidate\n\nIf the platform is supported but no candidate can be loaded, the final error includes:\n\n- `Failed to load pi_natives native addon for <platformTag>` or `<platformTag> (<variant>)`\n- every attempted path with the corresponding `require(...)` or sentinel-validation error\n- mode-specific remediation hints\n\n### Compiled-binary startup failures\n\nCompiled mode diagnostics include:\n\n- expected versioned cache target paths (`<versionedDir>/<filename>`),\n- remediation to delete the versioned cache and rerun,\n- direct release download `curl` commands for each expected filename.\n- release sentinel mismatch details when a loadable `.node` belongs to another `@oh-my-pi/pi-natives` version.\n\n### Non-compiled startup failures\n\nNormal package/runtime diagnostics include:\n\n- reinstall hint (`bun install @oh-my-pi/pi-natives`),\n- local rebuild command (`bun --cwd=packages/natives run build`),\n- optional x64 variant build hint (`TARGET_VARIANT=baseline|modern bun --cwd=packages/natives run build`).\n",
40
41
  "natives-architecture.md": "# Natives Architecture\n\n`@oh-my-pi/pi-natives` is a two-layer package around an ESM loader:\n\n1. **ESM loader/package entrypoint** resolves and loads the correct `.node` addon with `createRequire`, validates the release sentinel outside workspace-dev loads, and re-exports generated classes/functions plus enum runtime objects as explicit named ESM exports.\n2. **Rust N-API module layer** implements the exported functions/classes and emits the generated TypeScript declarations.\n\nThis document is the foundation for deeper module-level docs.\n\n## Implementation files\n\n- `packages/natives/native/index.js`\n- `packages/natives/native/index.d.ts`\n- `packages/natives/native/loader-state.js`\n- `packages/natives/native/embedded-addon.js`\n- `packages/natives/scripts/build-native.ts`\n- `packages/natives/scripts/embed-native.ts`\n- `packages/natives/scripts/gen-enums.ts`\n- `packages/natives/package.json`\n- `crates/pi-natives/src/lib.rs`\n\n## Package entrypoint and public surface\n\n`packages/natives/package.json` points at generated native artifacts:\n\n- `main`: `./native/index.js`\n- `types`: `./native/index.d.ts`\n- `exports[\".\"].types`: `./native/index.d.ts`\n- `exports[\".\"].import`: `./native/index.js`\n\nThere is no current `packages/natives/src` TypeScript wrapper layer. Consumers import functions/classes/enums directly from `@oh-my-pi/pi-natives`; the type contract is the generated `native/index.d.ts` plus the explicit named exports generated into `native/index.js` by `scripts/gen-enums.ts`.\n\nCurrent capability groups in the generated API include:\n\n- **Search/text/code primitives**: `grep`, `search`, `hasMatch`, `fuzzyFind`, `glob`, `astGrep`, `astEdit`, `blockRangeAt`, `summarizeCode`, text width/slicing/wrapping/sanitization, syntax highlighting, token counting.\n- **Execution/process/terminal primitives**: `executeShell`, `Shell`, `PtySession`, `Process`, key parsing, bash fixups.\n- **System/media/isolation/conversion primitives**: clipboard, SIXEL encoding, HTML-to-Markdown, macOS appearance/power helpers, work profiling, workspace scanning, isolation backend helpers (`iso*`).\n\n## Loader layer\n\n`packages/natives/native/index.js` is the package entrypoint; it calls `loadNative()` from `loader-state.js`, which owns runtime addon selection and optional embedded extraction.\n\n### Candidate resolution model\n\n- Platform tag is `${process.platform}-${process.arch}`.\n- Supported tags are currently:\n - `linux-x64`\n - `linux-arm64`\n - `darwin-x64`\n - `darwin-arm64`\n - `win32-x64`\n- x64 can use CPU variants:\n - `modern` (AVX2-capable)\n - `baseline` (fallback)\n- Non-x64 uses the default filename without a variant suffix.\n\nFilename strategy:\n\n- Default: `pi_natives.<platform>-<arch>.node`\n- x64 variant: `pi_natives.<platform>-<arch>-modern.node` or `...-baseline.node`\n- x64 runtime fallback includes the unsuffixed default filename after variant candidates.\n\n### Platform-specific variant detection\n\nFor x64, variant selection uses:\n\n- Linux: `/proc/cpuinfo`\n- macOS: `sysctl -n machdep.cpu.leaf7_features`, then `machdep.cpu.features`\n- Windows: PowerShell check for `System.Runtime.Intrinsics.X86.Avx2`\n\n`PI_NATIVE_VARIANT` can force `modern` or `baseline`; invalid values are ignored.\n\n### Binary distribution and extraction model\n\nThe published `@oh-my-pi/pi-natives` package ships **only** the loader layer in `native/`: the ESM loader (`index.js`), generated declarations (`index.d.ts`), the `loader-state.js`/`.d.ts` helpers, and the embedded-addon manifest stub (`embedded-addon.js`). It carries no `.node` binaries.\n\nEach platform's prebuilt `.node` is published as a separate optional-dependency leaf package — `@oh-my-pi/pi-natives-<platform>-<arch>`, one per supported tag — which the core lists in `optionalDependencies` at the lockstep version during publish. npm/bun install only the leaf whose `os`/`cpu` match the host. The working-tree package keeps built `.node` files under `native/` for local dev; the release-publish rewrite (`prepareNativeCorePackage` in `scripts/ci-release-publish.ts`) strips them from the core tarball, and the leaves are generated by `packages/natives/scripts/gen-npm-packages.ts` (`LEAF_TARGETS`). Adding a build target therefore requires a matching `LEAF_TARGETS` entry, or the binary never reaches npm users.\n\nFor compiled binaries, loader behavior is:\n\n1. Check versioned user cache path: `<getNativesDir()>/<packageVersion>/...`.\n2. Check legacy compiled-binary location:\n - Windows: `%LOCALAPPDATA%/omp` (fallback `%USERPROFILE%/AppData/Local/omp`)\n - non-Windows: `~/.local/bin`\n3. Fall back to packaged `native/` and executable directory candidates.\n\n`getNativesDir()` uses `$XDG_DATA_HOME/omp/natives` when `$XDG_DATA_HOME/omp` exists; otherwise it uses `~/.omp/natives`.\n\nIf a populated embedded addon manifest is present, it is also treated as a compiled-binary signal. Current embedded manifests point at a gzip-compressed tar archive (`embedded-addons.<tag>.tar.gz`) that contains one or more matching `.node` files. The loader extracts the archive into the versioned cache directory, validates the selected file by size, and prepends that cache path before normal candidate probing.\n\nFor npm/bun installs (non-compiled), `loader-state.js` resolves the platform leaf directory via `require.resolve(\"@oh-my-pi/pi-natives-<tag>/package.json\")` and probes its `.node` **before** the core package's `native/` directory and the executable directory. The optional-dependency binary is therefore preferred over any `.node` left in the core (e.g. a stale local-dev build). On Windows `node_modules` installs, the loader first stages the selected leaf/core addon into `<getNativesDir()>/<packageVersion>/...` and prepends that staged path so running processes do not lock the `node_modules` copy during global updates.\n\n### Failure modes\n\nLoader failures are explicit:\n\n- **Unsupported platform tag**: after failed probing, throws with supported platform list.\n- **No loadable candidate**: throws with all attempted paths and remediation hints.\n- **Embedded/staging errors**: directory/write/archive/staging failures are recorded and included in final load diagnostics if no candidate loads.\n- **Release mismatch**: outside workspace-dev loads, a candidate that loads but lacks the version sentinel export for `package.json#version` is rejected with a reinstall hint.\n\n## Rust N-API module layer\n\n`crates/pi-natives/src/lib.rs` declares exported module ownership:\n\n- `appearance`\n- `ast`\n- `block`\n- `clipboard`\n- `crash_handler`\n- `fd`\n- `fs_cache`\n- `glob`\n- `glob_util`\n- `grep`\n- `highlight`\n- `html`\n- `iso`\n- `keys`\n- `language` (re-exported from `pi_ast`)\n- `power`\n- `prof`\n- `ps`\n- `pty`\n- `shell`\n- `sixel`\n- `snapcompact`\n- `summary`\n- `task`\n- `text`\n- `tokens`\n- `utils` (crate-private helpers)\n- `workspace`\n\nN-API exports are generated from Rust `#[napi]` functions/classes/objects/enums. Snake_case Rust names are exposed as camelCase JavaScript names unless explicitly configured by napi-rs.\n\n## Ownership boundaries\n\n- **Loader/package ownership (`packages/natives/native`, `packages/natives/scripts`)**\n - runtime binary selection\n - CPU variant selection and override handling\n - compiled-binary embedded archive extraction\n - Windows `node_modules` addon staging\n - generated TypeScript declarations and explicit ESM export/enum patching\n- **Rust ownership (`crates/pi-natives/src`)**\n - algorithmic and system-level implementation\n - platform-native behavior and performance-sensitive logic\n - N-API symbol implementation consumed directly by package callers\n- **Consumer ownership (`packages/coding-agent`, `packages/tui`)**\n - user-facing policy and fallbacks that are not built into the native API\n - higher-level rendering, artifact, shell-session, and command behavior\n\n## Runtime flow (high level)\n\n1. Consumer imports from `@oh-my-pi/pi-natives`.\n2. `native/index.js` computes platform/arch/variant and candidate paths.\n3. Optional embedded archive extraction or Windows `node_modules` staging can prepend a versioned-cache candidate.\n4. Each candidate is `require(...)`d; install/compiled loads must expose the package-version sentinel.\n5. The loaded addon object is bound to explicit named ESM exports, including generated enum objects.\n6. Caller invokes generated N-API functions/classes directly.\n\n## Glossary\n\n- **Native addon**: A `.node` binary loaded via Node-API (N-API).\n- **Platform tag**: Runtime tuple `platform-arch` (for example `darwin-arm64`).\n- **Platform leaf package**: Per-platform npm package `@oh-my-pi/pi-natives-<tag>` that carries one platform's prebuilt `.node`. The core depends on every leaf via `optionalDependencies`; the package manager installs only the host-matching one (`os`/`cpu`).\n- **Variant**: x64 CPU-specific build flavor (`modern` AVX2, `baseline` fallback).\n- **Generated binding declaration**: `native/index.d.ts` emitted by napi-rs during `build-native.ts`.\n- **Version sentinel**: Rust export named from the package version (for example `__piNativesV16_0_3`) that lets the loader reject a `.node` from a different release.\n- **Compiled binary mode**: Runtime mode where the CLI is bundled and native addons are resolved from embedded/cache paths before package-local paths.\n- **Embedded addon**: Build artifact metadata and archive reference generated into `native/embedded-addon.js` so compiled binaries can extract matching `.node` payloads.\n",
41
42
  "natives-binding-contract.md": "# Natives Binding Contract (JavaScript/TypeScript Side)\n\nThis document defines the JS/TS contract between `@oh-my-pi/pi-natives` callers and the loaded N-API addon.\n\nCurrent package shape is direct-to-native: there is no `packages/natives/src/<module>` TypeScript wrapper layer. The public API is the generated `packages/natives/native/index.d.ts` declaration file, the ESM loader/export wrapper in `packages/natives/native/index.js`, and the Rust `#[napi]` exports in `crates/pi-natives/src`.\n\n## Implementation files\n\n- `packages/natives/native/index.js`\n- `packages/natives/native/index.d.ts`\n- `packages/natives/native/loader-state.js`\n- `packages/natives/scripts/build-native.ts`\n- `packages/natives/scripts/gen-enums.ts`\n- `packages/natives/package.json`\n- `crates/pi-natives/src/lib.rs`\n- Rust modules under `crates/pi-natives/src/*.rs`\n\n## Contract model\n\nThe contract has three parts:\n\n1. **ESM runtime loader/export wrapper** (`native/index.js`)\n - calls `loadNative()` from `loader-state.js`, which `require(...)`s the `.node` addon;\n - binds generated classes/functions as explicit named ESM exports;\n - emits enum runtime objects generated by `scripts/gen-enums.ts`.\n2. **Generated TypeScript declarations** (`native/index.d.ts`)\n - generated by napi-rs during `scripts/build-native.ts`;\n - declares exported functions, classes, object interfaces, and native enums;\n - is the package `types` entry.\n3. **Rust N-API exports** (`crates/pi-natives/src`)\n - `#[napi]` functions/classes/objects/enums are the source of generated declarations and runtime symbols;\n - snake_case Rust names become camelCase JavaScript names by napi-rs convention.\n\nThere is no current `NativeBindings` declaration-merging lifecycle and no full required-export list in the loader. Install/compiled loads do validate the package-version sentinel export; workspace-dev loads skip that check.\n\n## Public export surface organization\n\n`packages/natives/package.json` exposes the package root only:\n\n```json\n{\n \"main\": \"./native/index.js\",\n \"types\": \"./native/index.d.ts\",\n \"exports\": {\n \".\": {\n \"types\": \"./native/index.d.ts\",\n \"import\": \"./native/index.js\"\n }\n }\n}\n```\n\nConsumers in `packages/coding-agent` and `packages/tui` import directly from `@oh-my-pi/pi-natives`.\n\n## JS API ↔ native export mapping (representative)\n\n| Category | Public JS API | Rust source | Return style |\n| ----------------- | --------------------------------------------------------------------------------------------------------- | ------------------------------------------------ | -------------------------- |\n| Grep | `grep(options, onMatch?)` | `grep.rs` | `Promise<GrepResult>` |\n| Grep | `search(content, options)` | `grep.rs` | `SearchResult` |\n| Grep | `hasMatch(content, pattern, ignoreCase?, multiline?)` | `grep.rs` | `boolean` |\n| Fuzzy path search | `fuzzyFind(options)` | `fd.rs` | `Promise<FuzzyFindResult>` |\n| Glob/workspace | `glob(options, onMatch?)`, `listWorkspace(options)` | `glob.rs`, `workspace.rs` | `Promise<...>` |\n| Glob cache | `invalidateFsScanCache(path?)` | `fs_cache.rs` | `void` |\n| AST/block/summary | `astGrep(options)`, `astMatch(options)`, `astEdit(options)`, `blockRangeAt(options)`, `enclosingBlockBoundaries(options)`, `summarizeCode(options)` | `ast.rs`, `block.rs`, `summary.rs` | mixed |\n| Shell | `executeShell(options, onChunk?)` | `shell.rs` | `Promise<ShellRunResult>` |\n| Shell | `new Shell(options?)`, `shell.run(...)`, `shell.abort()` | `shell.rs` | class / promises |\n| Shell | `applyBashFixups(command)` | `shell.rs` | `BashFixupResult` |\n| PTY | `new PtySession()`, `start/write/resize/kill` | `pty.rs` | class / promises |\n| Process | `Process.fromPid/fromPath`, `status/children/killTree/terminate/waitForExit` | `ps.rs` | class / mixed |\n| Keys | `parseKey`, `matchesKey`, Kitty/legacy helpers | `keys.rs` | sync |\n| Text | `wrapTextWithAnsi`, `truncateToWidth`, `sliceWithWidth`, `extractSegments`, `visibleWidth` | `text.rs` | sync |\n| Highlight | `highlightCode`, `supportsLanguage`, `getSupportedLanguages` | `highlight.rs` | sync |\n| HTML | `htmlToMarkdown(html, options?)` | `html.rs` | `Promise<string>` |\n| SIXEL | `encodeSixel` | `sixel.rs` | sync |\n| Snapcompact | `renderSnapcompactPng(text, options)` | `snapcompact.rs` | sync |\n| Clipboard | `copyToClipboard`, `readImageFromClipboard` | `clipboard.rs` | sync / promise |\n| Tokens | `countTokens(input, encoding?)` | `tokens.rs` | sync |\n| System/isolation | `detectMacOSAppearance`, `MacAppearanceObserver`, `MacOSPowerAssertion`, `getWorkProfile`, `iso*` helpers | `appearance.rs`, `power.rs`, `prof.rs`, `iso.rs` | mixed |\n\n## Sync vs async contract differences\n\nThe contract preserves Rust/N-API call style:\n\n- **Promise-returning exports** for worker-thread or async runtime work (`grep`, `glob`, `fuzzyFind`, `astGrep`, `astMatch`, `astEdit`, `htmlToMarkdown`, shell/PTY runs, `isoStart`/`isoStop`/`isoDiff`, clipboard image read, workspace scan).\n- **Synchronous exports** for deterministic in-memory transforms/parsers or direct system calls (`search`, `hasMatch`, highlighting, text utilities, token counting, process construction/status, `copyToClipboard`, `encodeSixel`, isolation probe/resolve helpers).\n- **Constructor exports** for stateful runtime objects (`Shell`, `PtySession`, `Process`, macOS observer/power handles).\n\nChanging sync ↔ async for an existing export is a breaking public API change because consumers call these exports directly.\n\n## Object and enum typing patterns\n\n### Object patterns\n\n`#[napi(object)]` Rust structs become TS interfaces, for example:\n\n- `GrepResult`, `SearchResult`, `GlobResult`, `FuzzyFindResult`\n- `ShellRunResult`, `PtyRunResult`, `MinimizerResult`\n- `AstFindResult`, `AstReplaceResult`, `BlockRange`, `SummaryResult`\n- `System`/media/isolation payloads such as `ClipboardImage`, `WorkProfile`, `ParsedKittyResult`, `IsoResolveResult`\n\nRuntime shape correctness is owned by napi-rs and the Rust implementation.\n\n### Enum patterns\n\nNative enums are represented in generated declarations and also emitted as runtime objects by `scripts/gen-enums.ts`, because napi-rs string enums are TS-only without explicit JS exports. Current enum objects include:\n\n- `AstMatchStrictness`\n- `Ellipsis`\n- `Encoding`\n- `FileType`\n- `GrepOutputMode`\n- `IsoBackendKind`\n- `IsoChangeKind`\n- `KeyEventType`\n- `MacOSAppearance`\n- `ProcessStatus`\n\n## Error behavior and caveats\n\n- Addon load failure or unsupported platform throws during package import from `native/index.js`.\n- The loader rejects install/compiled candidates that lack the package-version sentinel export. It does not verify the full export set after `require(...)`; stale same-version or incomplete binaries surface as native load errors or missing members at use sites.\n- N-API conversion validates basic argument conversion, but TS optional fields do not guarantee semantic validity for untyped callers.\n- Numeric enum declarations do not prevent out-of-range numeric values from untyped callers unless the Rust function rejects them during conversion.\n- Callback exports use napi-rs `ThreadsafeFunction` shape: `(error: Error | null, value) => void`. Native code generally emits successful values; hard failures reject/throw through the owning call.\n\n## Maintainer checklist for binding changes\n\nWhen adding/changing an export, update all of:\n\n1. Rust `#[napi]` implementation in the owning `crates/pi-natives/src/<module>.rs`.\n2. `crates/pi-natives/src/lib.rs` if a new module is added.\n3. Any consumer imports/callsites in `packages/coding-agent` or `packages/tui`.\n4. Build output by running the natives build so `native/index.d.ts` and `native/index.js` stay in sync.\n5. `scripts/gen-enums.ts` if enum runtime export patching needs to change.\n\nDo not add a parallel TS wrapper convention unless the package design intentionally moves back to wrappers; current consumers depend on the direct generated API.\n",
@@ -49,8 +50,9 @@ export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
49
50
  "plugin-manager-installer-plumbing.md": "# Plugin manager and installer plumbing\n\nThis document describes how `omp plugin` npm/git/link operations mutate plugin state on disk and how installed npm/git/link plugins become runtime capabilities (tools and extensions today, hooks/commands path resolution available). Marketplace installs use separate marketplace registries and cache plumbing; see `docs/marketplace.md`.\n\n## Scope and architecture\n\nThere are two plugin-management implementations in the codebase:\n\n1. **Active path used by CLI commands**: `PluginManager` (`src/extensibility/plugins/manager.ts`)\n2. **Legacy helper module**: installer functions (`src/extensibility/plugins/installer.ts`)\n\n`omp plugin` npm/git/link actions go through `PluginManager`; marketplace actions go through `MarketplaceManager`. `install` classifies each target (`classifyInstallTarget` in `cli/classify-install-target.ts`): `name@marketplace` routes to the marketplace manager, local paths route to `PluginManager.link()`, git and npm specs to `PluginManager.install()`.\n\n`installer.ts` still documents important safety checks and filesystem behavior, but it is not the path used by `src/commands/plugin.ts` + `src/cli/plugin-cli.ts`.\n\n## Lifecycle: from CLI invocation to runtime availability\n\n```text\nomp plugin <npm/link action> ...\n -> src/commands/plugin.ts\n -> runPluginCommand(...) in src/cli/plugin-cli.ts\n -> PluginManager method (install/list/uninstall/link/...)\n -> mutate ~/.omp/plugins/{package.json,node_modules,omp-plugins.lock.json}\n -> runtime discovery: discoverAndLoadCustomTools(...) and discoverAndLoadExtensions(...)\n -> getAllPluginToolPaths(cwd) / getAllPluginExtensionPaths(cwd)\n -> custom tool loader imports tool modules; extension loader imports extension modules\n\nomp plugin install name@marketplace / omp install name@marketplace\n -> MarketplaceManager\n -> mutate ~/.omp/marketplaces.json, ~/.omp/plugins/installed_plugins.json, cache dirs\n -> installed marketplace plugin cache is surfaced as plugin roots/capabilities\n```\n\n### Command entrypoints\n\n- `src/commands/plugin.ts` defines command/flags and forwards to `runPluginCommand`.\n- `src/cli/plugin-cli.ts` maps npm/link subcommands to `PluginManager` methods:\n - `install`, `uninstall`, `list`, `link`, `doctor`, `features`, `config`, `enable`, `disable`\n- `discover`, `upgrade`, and `marketplace ...` subcommands use `MarketplaceManager`.\n- No explicit npm-plugin `update` action exists; update is done by re-running `install` with a new package/version spec.\n\n## On-disk model\n\nGlobal plugin state lives under `~/.omp/plugins`:\n\n- `package.json` — dependency manifest used by `bun install`/`bun uninstall` for npm-installed plugins\n- `node_modules/` — installed npm plugin packages or symlinks\n- `omp-plugins.lock.json` — runtime state for npm/link plugins:\n - enabled/disabled per plugin\n - selected feature set per plugin\n - persisted plugin settings\n\nProject-local overrides live at:\n\n- `<cwd>/.omp/plugin-overrides.json`\n\nOverrides are read-only from manager/loader perspective (no write path here) and can disable plugins or override features/settings for this project.\n\nMarketplace registries live separately:\n\n- `~/.omp/marketplaces.json` — configured marketplace catalogs\n- `~/.omp/plugins/installed_plugins.json` — user-scoped marketplace installs\n- `<cwd>/.omp/plugins/installed_plugins.json` — project-scoped marketplace installs when available\n- `~/.omp/plugins/cache/{marketplaces,plugins}/` — cached catalogs and plugin directories\n\n## Plugin spec parsing and metadata interpretation\n\n## Install spec grammar\n\n`parsePluginSpec` (`parser.ts`) supports:\n\n- `pkg` -> `features: null` (defaults behavior)\n- `pkg[*]` -> enable all manifest features\n- `pkg[]` -> enable no optional features\n- `pkg[a,b]` -> enable named features\n- `@scope/pkg@1.2.3[feat]` -> scoped + versioned package with explicit feature selection\n\n`PluginManager.install` also accepts git sources (validated by `validateGitSpec` instead of the npm regex): namespaced shorthands `github:user/repo[#ref]`, `gitlab:`, `bitbucket:`, `codeberg:`, `sourcehut:`/`srht:`, and full git URLs (`https://github.com/user/repo`, `git@github.com:user/repo`, `ssh://…`, `git+https://…`). Git specs do not encode the package name, so install diffs `plugins/package.json#dependencies` before/after `bun install` to resolve it.\n\n`extractPackageName` strips version suffix for on-disk path lookup after install.\n\n## Manifest source and required fields\n\nManifest is resolved as:\n\n1. `package.json.omp`\n2. fallback `package.json.pi`\n3. fallback `{ version: package.version }`\n\nImplications:\n\n- There is no strict schema validation in manager/loader.\n- A package missing `omp`/`pi` is still installable and listable.\n- Runtime plugin loading (`getEnabledPlugins`) skips packages without `omp`/`pi` manifest.\n- `manifest.version` is always overwritten from package `version`.\n\nMalformed `package.json` JSON is a hard failure at read time; malformed manifest shape may fail later only when specific fields are consumed.\n\n## Install/update flow (`PluginManager.install`)\n\n1. Parse feature bracket syntax from install spec.\n2. Validate the spec: git specs via `validateGitSpec`; npm specs against the package-name regex + shell-metacharacter denylist.\n3. Ensure plugin `package.json` exists (`omp-plugins`, private dependencies map).\n4. Run `bun install <packageSpec>` in `~/.omp/plugins`.\n5. Resolve the installed package name (npm: strip version via `extractPackageName`; git: diff `dependencies` before/after) and read `node_modules/<name>/package.json`.\n6. Resolve manifest and compute `enabledFeatures`:\n - `[*]`: all declared features (or `null` if no feature map)\n - `[a,b]`: validates each feature exists in manifest features map\n - `[]`: empty feature list\n - bare spec: `null` (use defaults policy later in loader)\n7. Validate declared extension entries (`#validateInstalledExtensions`): each manifest `extensions` entry must resolve on disk and import to a factory function. On failure, roll back the install — restore the previous `plugins/package.json`, remove the freshly installed package, and restore any prior version from a backup taken before `bun install` — then abort.\n8. Upsert lockfile runtime state: `{ version, enabledFeatures, enabled: true }`.\n\n### Update semantics\n\nBecause update is install-driven:\n\n- `omp plugin install pkg@newVersion` updates dependency and lockfile version.\n- Existing settings are preserved; state entry is overwritten for version/features/enabled.\n- No separate “check updates” or transactional migration logic exists.\n\n## Remove flow (`PluginManager.uninstall`)\n\n1. Validate package name.\n2. Run `bun uninstall <name>` in plugin dir.\n3. Remove plugin runtime state from lockfile:\n - `config.plugins[name]`\n - `config.settings[name]`\n\nIf uninstall command fails, runtime state is not changed.\n\n## List flow (`PluginManager.list`)\n\n1. Read plugin dependency map from `~/.omp/plugins/package.json`.\n2. Load lockfile runtime config (missing file -> empty defaults).\n3. Load project overrides (`<cwd>/.omp/plugin-overrides.json`, parse/read errors -> empty object with warning).\n4. For each dependency with a resolvable package.json:\n - build `InstalledPlugin` record\n - merge feature/enable state:\n - base from lockfile (or defaults)\n - project overrides can replace feature selection\n - project `disabled` list masks plugin as disabled\n\nThis is the effective state used by CLI status output and settings/features operations.\n\n## Link flow (`PluginManager.link`)\n\n`link` supports local plugin development by symlinking a local package into `~/.omp/plugins/node_modules/<pkg.name>`.\n\nBehavior:\n\n1. Resolve `localPath` against manager cwd.\n2. Require local `package.json` and `name` field.\n3. Ensure plugin dirs exist.\n4. For scoped names, create scope directory.\n5. Remove existing path at target link location.\n6. Create symlink.\n7. Add runtime lockfile entry enabled with default features (`null`).\n\nCaveat: current `PluginManager.link` does not enforce the `cwd` path-boundary check present in legacy `installer.ts` (`normalizedPath.startsWith(normalizedCwd)`), so trust is the caller’s responsibility.\n\n## Runtime loading: from installed plugin to callable capabilities\n\n## Discovery gate\n\n`getEnabledPlugins(cwd)` (`plugins/loader.ts`) reads:\n\n- plugin dependency manifest (`package.json`), unioned with lockfile plugin entries so `plugin link`-only plugins without a dependency entry are still discovered\n- lockfile runtime state\n- project overrides via `getConfigDirPaths(\"plugin-overrides.json\", { user: false, cwd })`\n\nFiltering:\n\n- skip if no plugin package.json\n- skip if manifest (`omp`/`pi`) absent\n- skip if globally disabled in lockfile\n- skip if project-disabled\n\n## Capability path resolution\n\nFor each enabled plugin:\n\n- `resolvePluginExtensionPaths(plugin)`\n- `resolvePluginToolPaths(plugin)`\n- `resolvePluginHookPaths(plugin)`\n- `resolvePluginCommandPaths(plugin)`\n\nEach resolver includes base entries plus feature entries:\n\n- base entries are always included\n- explicit feature list -> only selected features\n- `enabledFeatures === null` -> enable features marked `default: true`\n\nManifest entries may point to a file or to a directory containing `index.ts`, `index.js`, `index.mjs`, or `index.cjs`. Missing files are silently skipped (`statSync`/`existsSync` guard).\n\n## Current runtime wiring differences\n\n- **Tools are wired into runtime today** via `discoverAndLoadCustomTools` (`custom-tools/loader.ts`), which calls `getAllPluginToolPaths(cwd)`.\n- **Extensions are wired into runtime today** via `discoverAndLoadExtensions` (`extensions/loader.ts`), which calls `getAllPluginExtensionPaths(cwd)`.\n- Paths are de-duplicated by resolved absolute path in custom tool and extension discovery (`seen` set, first path wins).\n- **Hooks/commands resolvers exist** and are exported, but this code path does not currently wire them into a runtime registry in the same way tools and extensions are wired.\n\n## Lock/state management details\n\n`PluginManager` caches runtime config in memory per instance (`#runtimeConfig`) and lazily loads once.\n\nLoad behavior:\n\n- lockfile missing -> `{ plugins: {}, settings: {} }`\n- lockfile read/parse failure -> warning + same empty defaults\n\nSave behavior:\n\n- writes full lockfile JSON pretty-printed each mutation\n\nNo cross-process locking or merge strategy exists; concurrent writers can overwrite each other.\n\n## Safety checks and trust boundaries\n\n## Input/package validation\n\nActive manager path enforces package-name validation:\n\n- npm specs: a package-name regex (`VALID_PACKAGE_NAME`) for scoped/unscoped specs, optionally with version.\n- npm shell-metacharacter denylist: `;`, `&`, `|`, backtick, `$`, `(`, `)`, `{`, `}`, `[`, `]`, `<`, `>`, `\\` — applied after `parsePluginSpec` strips the feature brackets, so a normal `pkg[feat]` spec never reaches it.\n- git specs: `validateGitSpec` rejects only the shared `SHELL_METACHARS` set (`;`, `&`, `|`, backtick, `$`, `(`, `)`, `{`, `}`, `<`, `>`, `\\`, newline, CR, tab) instead of the npm regex, so `:`, `/`, `#`, `+`, `.`, `-`, `_`, `~`, `@` are permitted.\n\nThis limits command-injection risk when invoking `bun install/uninstall`.\n\n## Filesystem trust boundary\n\n- Plugin code executes in-process when custom tool modules are imported; no sandboxing.\n- Manifest relative paths are joined against plugin package directory and only existence-checked.\n- The plugin package itself is trusted code once installed.\n\n## Legacy installer-only checks\n\n`installer.ts` includes additional link-time checks not mirrored in `PluginManager.link`:\n\n- local path must resolve inside project cwd\n- extra package name/path traversal guards for symlink target naming\n\nBecause CLI uses `PluginManager`, these stricter link guards are not currently on the main path.\n\n## Failure, partial success, and rollback behavior\n\nThe plugin manager is not transactional.\n\n| Operation stage | Failure behavior | Rollback |\n| -------------------------------------------------------- | -------------------------- | ----------------------------------------------------------------------------- |\n| `bun install` fails | install aborts with stderr | N/A (no state writes yet) |\n| Install succeeds, then feature validation fails | command fails | No uninstall rollback; dependency may remain in `node_modules`/`package.json` |\n| Install succeeds, then extension validation fails | command fails | Rolls back: restores `package.json`, removes installed package, restores prior version from backup |\n| Install succeeds, then lockfile write fails | command fails | No rollback of installed package |\n| `bun uninstall` succeeds, lockfile write fails | command fails | Package removed, stale runtime state may remain |\n| `link` removes old target then symlink creation fails | command fails | No restoration of previous link/dir |\n\nOperationally, `doctor --fix` can repair some drift (`bun install`, orphaned config cleanup, invalid-feature cleanup), but it is best-effort.\n\n## Malformed/missing manifest behavior summary\n\n- Missing `omp`/`pi` field:\n - install/list: tolerated (minimal manifest)\n - runtime enabled-plugin discovery: skipped as non-plugin\n- Missing feature referenced by install spec or `features --set/--enable`: hard error with available feature list\n- Invalid `plugin-overrides.json`: ignored with fallback to `{}` in both manager and loader paths\n- Missing tool/hook/command file paths referenced by manifest: silently ignored during resolver expansion; flagged as errors only by `doctor`\n\n## Mode differences and precedence\n\n- `--dry-run` (install): returns a synthetic install result with no `bun install`, no network, and no lockfile/runtime-state writes (it still ensures the plugins `package.json` skeleton exists).\n- `--json`: output formatting only, no behavior change.\n- Project overrides always take precedence over global lockfile for feature/settings view.\n- Effective enablement is `runtimeEnabled && !projectDisabled`.\n\n## Implementation files\n\n- [`src/commands/plugin.ts`](../packages/coding-agent/src/commands/plugin.ts) — CLI command declaration and flag mapping\n- [`src/cli/plugin-cli.ts`](../packages/coding-agent/src/cli/plugin-cli.ts) — action dispatch, user-facing command handlers\n- [`src/extensibility/plugins/manager.ts`](../packages/coding-agent/src/extensibility/plugins/manager.ts) — active install/remove/list/link/state/doctor implementation\n- [`src/extensibility/plugins/installer.ts`](../packages/coding-agent/src/extensibility/plugins/installer.ts) — legacy installer helpers and additional link safety checks\n- [`src/extensibility/plugins/loader.ts`](../packages/coding-agent/src/extensibility/plugins/loader.ts) — enabled-plugin discovery and tool/hook/command path resolution\n- [`src/extensibility/plugins/parser.ts`](../packages/coding-agent/src/extensibility/plugins/parser.ts) — install spec and package-name parsing helpers\n- [`src/extensibility/plugins/types.ts`](../packages/coding-agent/src/extensibility/plugins/types.ts) — manifest/runtime/override type contracts\n- [`src/extensibility/custom-tools/loader.ts`](../packages/coding-agent/src/extensibility/custom-tools/loader.ts) — runtime wiring for plugin-provided tool modules\n- [`src/extensibility/extensions/loader.ts`](../packages/coding-agent/src/extensibility/extensions/loader.ts) — runtime wiring for plugin-provided extension modules\n",
50
51
  "porting-from-pi-mono.md": "# Porting From pi-mono: A Practical Merge Guide\n\nThis guide is a repeatable checklist for porting changes from pi-mono into this repo.\nUse it for any merge: single file, feature branch, or full release sync.\n\n## Last Sync Point (historical upstream marker)\n\n**Commit:** `b21b42d032919de2f2e6920a76fa9a37c3920c0a`\n**Date:** 2026-03-22\n\nUpdate this section after each sync; do not reuse the previous range. This commit is an upstream pi-mono marker and may not exist in this repo's local object database.\n\nWhen starting a new sync, generate patches from this commit forward in a pi-mono checkout or remote that contains the commit:\n\n```bash\ngit format-patch b21b42d032919de2f2e6920a76fa9a37c3920c0a..HEAD --stdout > changes.patch\n```\n\n## 0) Define the scope\n\n- Identify the upstream reference (commit, tag, or PR).\n- List the packages or folders you plan to touch.\n- Decide which features are in-scope and which are intentionally skipped.\n\n## 1) Bring code over safely\n\n- Prefer a clean, focused diff rather than a wholesale copy.\n- Avoid copying built artifacts or generated files.\n- If upstream added new files, add them explicitly and review contents.\n\n## 2) Match import extension conventions\n\nMost runtime TypeScript sources omit `.js` in internal imports, but several current entrypoints and tool modules keep `.js` for ESM/runtime compatibility. Follow the surrounding file and package export style; do not blanket-strip or blanket-add extensions.\n\n- In `packages/coding-agent` runtime sources, prefer extensionless internal imports when the surrounding module does, but preserve existing `.js` imports in files that already require them.\n- In `packages/tui/test` and `packages/natives/bench`, keep `.js` where surrounding files already use it.\n- Keep real file extensions when required by tooling or import assertions (e.g., `.json`, `.css`, `.md` text embeds).\n- Example: `import { x } from \"./foo.js\";` → `import { x } from \"./foo\";` only when that package/file convention is extensionless.\n\n## 3) Replace import scopes\n\nUpstream uses different package scopes. Replace them consistently.\n\n- Replace old scopes with the local scope used here.\n- Examples (adjust to match the actual packages you are porting):\n - `@mariozechner/pi-coding-agent` → `@oh-my-pi/pi-coding-agent`\n - `@mariozechner/pi-agent-core` → `@oh-my-pi/pi-agent-core`\n - `@mariozechner/pi-tui` → `@oh-my-pi/pi-tui`\n - `@mariozechner/pi-ai` → `@oh-my-pi/pi-ai`\n - `@mariozechner/pi-utils` → `@oh-my-pi/pi-utils`\n- Some upstream packages publish under the `@earendil-works/*` scope instead of `@mariozechner/*`. Map it the same way (`@earendil-works/pi-coding-agent` → `@oh-my-pi/pi-coding-agent`, and so on).\n- The bare `typebox` package is not an `@oh-my-pi/*` scope; do not rewrite it as one. See the Extensions divergence in section 15 for how tool-parameter schemas map.\n\n## 4) Use Bun APIs where they improve on Node\n\nWe run on Bun, but the current source intentionally mixes Bun APIs with small Node standard-library APIs. Replace Node APIs only when Bun provides a clearer, safer, or simpler implementation; do not mechanically rewrite every Node import.\n\n**Prefer replacing when porting new code:**\n\n- Process spawning: prefer Bun Shell `$` for simple commands; use `Bun.spawn`/`Bun.spawnSync` for streaming or process control. Keep existing `child_process` only where its exact semantics are needed.\n- HTTP clients: `node-fetch`, `axios` → native `fetch`\n- SQLite: `better-sqlite3` → `bun:sqlite`\n- Env loading: `dotenv` → Bun loads `.env` automatically\n- Runtime text/assets: prefer Bun imports such as `with { type: \"text\" }` or `Bun.file()` over copy steps or bundled fallback file reads.\n\n**DO NOT replace (these work fine in Bun):**\n\n- `os.homedir()` — do NOT replace with `Bun.env.HOME` or literal `\"~\"`\n- `os.tmpdir()` — do NOT replace with `Bun.env.TMPDIR || \"/tmp\"` or hardcoded paths\n- `fs.mkdtempSync()` — do NOT replace with manual path construction\n- `path.join()`, `path.resolve()`, etc. — these are fine\n\n**Import style:** Use the `node:` prefix for Node standard-library imports. Namespace imports are common, but named imports are acceptable where the surrounding code already uses them.\n\n**Additional Bun conventions:**\n\n- Prefer Bun Shell `$` for short, non-streaming commands; use `Bun.spawn` only when you need streaming I/O or process control.\n- Use `Bun.file()`/`Bun.write()` for simple files and `node:fs/promises` for directory-oriented operations. Existing synchronous `node:fs` calls are acceptable when the calling flow is intentionally synchronous.\n- Avoid `Bun.file().exists()` checks; use `isEnoent` handling in try/catch.\n- Prefer `Bun.sleep(ms)` over `setTimeout` wrappers.\n\n**Wrong:**\n\n```typescript\n// BROKEN: env vars may be undefined, \"~\" is not expanded\nconst home = Bun.env.HOME || \"~\";\nconst tmp = Bun.env.TMPDIR || \"/tmp\";\n```\n\n**Correct:**\n\n```typescript\nimport * as os from \"node:os\";\nimport * as fs from \"node:fs\";\nimport * as path from \"node:path\";\n\nconst configDir = path.join(os.homedir(), \".config\", \"myapp\");\nconst tempDir = fs.mkdtempSync(path.join(os.tmpdir(), \"myapp-\"));\n```\n\n## 5) Prefer Bun embeds (no copying)\n\nDo not add new runtime asset copy steps. Keep assets in repo and prefer Bun embeds/imports; preserve existing explicit generation workflows such as `packages/coding-agent/src/export/html/tool-views.generated.js` (built from collab-web sources via `bun run build-tool-views`).\n\n- If upstream copies assets into a dist folder, replace with Bun-friendly embeds.\n- Prompts are static `.md` files; use Bun text imports (`with { type: \"text\" }`) and Handlebars instead of inline prompt strings.\n- Use `import.meta.dir` + `Bun.file` to load adjacent non-text resources.\n- Keep assets in-repo and let the bundler include them.\n- Eliminate copy scripts unless the user explicitly requests them or the package already has an intentional generation step.\n- If upstream reads a bundled fallback file at runtime, replace filesystem reads with a Bun text embed import unless the current package already uses a generated asset pipeline.\n - Example (Codex instructions fallback):\n - `const FALLBACK_PROMPT_PATH = join(import.meta.dir, \"codex-instructions.md\");` -> removed\n - `import FALLBACK_INSTRUCTIONS from \"./codex-instructions.md\" with { type: \"text\" };`\n - Use `return FALLBACK_INSTRUCTIONS;` instead of `readFileSync(FALLBACK_PROMPT_PATH, \"utf8\")`\n\n## 6) Port `package.json` carefully\n\nTreat `package.json` as a contract. Merge intentionally.\n\n- Keep existing `name`, `version`, `type`, `exports`, and `bin` unless the port requires changes.\n- Replace npm/node scripts with Bun equivalents (e.g., `bun check`, `bun test`).\n- Ensure dependencies use the correct scope.\n- Do not downgrade dependencies to fix type errors; upgrade instead.\n- Validate workspace package links and `peerDependencies`.\n\n## 7) Align code style and tooling\n\n- Keep existing formatting conventions.\n- Do not introduce `any` unless required.\n- Avoid dynamic imports unless they are required for optional dependencies, startup cost, or runtime-only modules; prefer top-level imports otherwise.\n- Never build prompts in code; prompts are static `.md` files rendered with Handlebars.\n- In `packages/coding-agent`, use `logger` from `@oh-my-pi/pi-utils` for internal/runtime logging; CLI command files may use `console.*` for intentional user-facing output.\n- Use `Promise.withResolvers()` instead of `new Promise((resolve, reject) => ...)`.\n- Prefer ES `#` private fields for new encapsulated state. Constructor parameter properties already exist in current code and are acceptable; do not churn unrelated access modifiers while porting.\n- Prefer existing helpers and utilities over new ad-hoc code.\n Preserve Bun-first infrastructure changes already made in this repo:\n - Runtime is Bun (no Node entry points for the main CLI).\n - Package manager is Bun (no npm lockfiles).\n - Heavy Node APIs should not be introduced casually; current source still uses selected Node APIs (`node:crypto`, `node:readline`, synchronous `node:fs`, and `child_process`) where they fit provider, CLI, or process-control semantics.\n - Lightweight Node APIs (`os.homedir`, `os.tmpdir`, `fs.mkdtempSync`, `path.*`) are kept.\n - CLI shebangs use `bun` (not `node`, not `tsx`).\n - TypeScript packages generally use source files directly; `@oh-my-pi/pi-natives` exports generated native bindings from `packages/natives/native`.\n - CI workflows run Bun for install/check/test.\n\n## 8) Remove old compatibility layers\n\nUnless requested, remove upstream compatibility shims.\n\n- Delete old APIs that were replaced.\n- Update all call sites to the new API directly.\n- Do not keep `*_v2` or parallel versions.\n\n## 9) Update docs and references\n\n- Replace pi-mono repo links where appropriate.\n- Update examples to use Bun and correct package scopes.\n- Ensure README instructions still match the current repo behavior.\n\n## 10) Validate the port\n\nRun the standard checks after changes:\n\n- `bun check`\n\nIf the repo already has failing checks unrelated to your changes, call that out.\nTests use Bun's runner (not Vitest), but only run `bun test` when explicitly requested.\n\n## 11) Protect improved features (regression trap list)\n\nIf you already improved behavior locally, treat those as **non‑negotiable**. Before porting, write down\nthe improvements and add explicit checks so they don’t get lost in the merge.\n\n- **Freeze the expected behavior**: add a short “before/after” note for each improvement (inputs, outputs,\n defaults, edge cases). This prevents silent rollback.\n- **Map old → new APIs**: if upstream renamed concepts (hooks → extensions, custom tools → tools, etc.),\n ensure every old entry point still wires through. One missed flag or export equals lost functionality.\n- **Verify exports**: check `package.json` `exports`, public types, and barrel files. Upstream ports often\n forget to re-export local additions.\n- **Cover non‑happy paths**: if you fixed error handling, timeouts, or fallback logic, add a test or at\n least a manual checklist that exercises those paths.\n- **Check defaults and config merge order**: improvements often live in defaults. Confirm new defaults\n didn’t revert (e.g., new config precedence, disabled features, tool lists).\n- **Audit env/shell behavior**: if you fixed execution or sandboxing, verify the new path still uses your\n sanitized env and does not reintroduce alias/function overrides.\n- **Re-run targeted samples**: keep a minimal set of \"known good\" examples and run them after the port\n (CLI flags, extension registration, tool execution).\n\n## 12) Detect and handle reworked code\n\nBefore porting a file, check if upstream significantly refactored it:\n\n```bash\n# Compare the file you're about to port against what you have locally\ngit diff HEAD upstream/main -- path/to/file.ts\n```\n\nIf the diff shows the file was **reworked** (not just patched):\n\n- New abstractions, renamed concepts, merged modules, changed data flow\n\nThen you must **read the new implementation thoroughly** before porting. Blind merging of reworked code loses functionality because:\n\nNote: interactive mode was recently split into controllers/utils/types. When backporting related changes, port updates into the individual files we created and ensure `interactive-mode.ts` wiring stays in sync.\n\n1. **Defaults change silently** - A new variable `defaultFoo = [a, b]` may replace an old `getAllFoo()` that returned `[a, b, c, d, e]`.\n\n2. **API options get dropped** - When systems merge (e.g., `hooks` + `customTools` → `extensions`), old options may not wire through to the new implementation.\n\n3. **Code paths go stale** - A renamed concept (e.g., `hookMessage` → `custom`) needs updates in every switch statement, type guard, and handler—not just the definition.\n\n4. **Context/capabilities shrink** - Old APIs may have exposed `{ logger, typebox, pi }` that new APIs forgot to include.\n\n### Semantic porting process\n\nWhen upstream reworked a module:\n\n1. **Read the old implementation** - Understand what it did, what options it accepted, what it exposed.\n\n2. **Read the new implementation** - Understand the new abstractions and how they map to old behavior.\n\n3. **Verify feature parity** - For each capability in the old code, confirm the new code preserves it or explicitly removes it.\n\n4. **Grep for stragglers** - Search for old names/concepts that may have been missed in switch statements, handlers, UI components.\n\n5. **Test the boundaries** - CLI flags, SDK options, event handlers, default values—these are where regressions hide.\n\n### Quick checks\n\n```bash\n# Find all uses of an old concept that may need updating\nrg \"oldConceptName\" --type ts\n\n# Compare default values between versions\ngit show upstream/main:path/to/file.ts | rg \"default|DEFAULT\"\n\n# Check if all enum/union values have handlers\nrg \"case \\\"\" path/to/file.ts\n```\n\n## 13) Quick audit checklist\n\nUse this as a final pass before you finish:\n\n- [ ] Import extensions follow the local package convention (no blanket `.js` stripping)\n- [ ] No newly introduced Node-only APIs unless they match an existing justified pattern\n- [ ] All package scopes updated\n- [ ] `package.json` scripts use Bun\n- [ ] Prompts are `.md` text imports (no inline prompt strings)\n- [ ] No internal/runtime `console.*` in coding-agent; CLI user-facing output is intentional\n- [ ] Assets load via Bun embed/import patterns, or through an existing intentional generation pipeline\n- [ ] Tests or checks run (or explicitly noted as blocked)\n- [ ] No functionality regressions (see sections 11-12)\n\n## 14) Commit message format\n\nWhen committing a backport, follow the repo format `<type>(scope): <past-tense description>` and keep the commit\nrange in the title.\n\n```\nfix(coding-agent): backported pi-mono changes (<from>..<to>)\n\npackages/<package>:\n- <type>: <description>\n- <type>: <description> (#<issue> by @<contributor>)\n\npackages/<other-package>:\n- <type>: <description>\n```\n\n**Example:**\n\n```\nfix(coding-agent): backported pi-mono changes (9f3eef65f..52532c7c0)\n\npackages/ai:\n- fix: handle \"sensitive\" stop reason from Anthropic API\n- fix: normalize tool call IDs with special characters for Responses API\n- fix: add overflow detection for Bedrock, MiniMax, Kimi providers\n- fix: 429 status is rate limiting, not context overflow\n\npackages/tui:\n- fix: refactored autocomplete state tracking\n- fix: file autocomplete should not trigger on empty text\n- fix: configurable autocomplete max visible items\n- fix: improved table column width calculation with word-aware wrapping\n\npackages/coding-agent:\n- fix: preserve external config.yml edits on save (#1046 by @nicobailonMD)\n- fix: resolve macOS NFD and curly quote variants in file paths\n```\n\n**Rules:**\n\n- Group changes by package\n- Use conventional commit types (`fix`, `feat`, `refactor`, `perf`, `docs`)\n- Include upstream issue/PR numbers and contributor attribution for external contributions\n- The commit range in the title helps track sync points\n\n## 15) Intentional Divergences\n\nOur fork has architectural decisions that differ from upstream. **Do not port these upstream patterns:**\n\n### UI Architecture\n\n| Upstream | Our Fork | Reason |\n| ------------------------------------------- | --------------------------------------------------------- | --------------------------------------------------------------------- |\n| `FooterDataProvider` class | `StatusLineComponent` | Simpler, integrated status line |\n| `ctx.ui.setHeader()` / `ctx.ui.setFooter()` | No-op stubs in current extension contexts | Not currently wired to replace the TUI status/header UI |\n| `ctx.ui.setEditorComponent()` | Wired in interactive mode; no-op stubs in ACP/RPC/headless contexts | Custom editor replacement works in the interactive TUI; non-TUI runtimes keep stubs |\n| `InteractiveModeOptions` options object | Positional constructor args (options type still exported) | Keep constructor signature; update the type when upstream adds fields |\n\n### Component Naming\n\n| Upstream | Our Fork |\n| ---------------------------- | ----------------------- |\n| `extension-input.ts` | `hook-input.ts` |\n| `extension-selector.ts` | `hook-selector.ts` |\n| `ExtensionInputComponent` | `HookInputComponent` |\n| `ExtensionSelectorComponent` | `HookSelectorComponent` |\n\n### API Naming\n\n| Upstream | Our Fork | Notes |\n| ---------------------------------------- | ---------------------------------------- | ----------------------------------------- |\n| `sessionManager.appendSessionInfo(name)` | `sessionManager.setSessionName(name)` | We use `sessionName` throughout |\n| `sessionManager.getSessionName()` | `sessionManager.getSessionName()` | Same (we unified to match upstream's RPC) |\n| `agent.sessionName` / `setSessionName()` | `agent.sessionName` / `setSessionName()` | Same |\n\n### File Consolidation\n\n| Upstream | Our Fork | Reason |\n| -------------------------------------------------- | --------------------------------------------------------- | --------------------------------------------- |\n| `clipboard.ts` + `clipboard-image.ts` (tool files) | `src/utils/clipboard.ts` backed by `@oh-my-pi/pi-natives` | Native implementation with a small TS wrapper |\n\n### Test Framework\n\n| Upstream | Our Fork |\n| ------------------------- | ----------------------------- |\n| `vitest` with `vi.mock()` | `bun:test` with `vi` from bun |\n| `node:test` assertions | `expect()` matchers |\n\n### Tool Architecture\n\n| Upstream | Our Fork | Notes |\n| ----------------------------------- | ------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------- |\n| `createTool(cwd: string, options?)` | `createTools(session: ToolSession)` via `BUILTIN_TOOLS` registry | Tool factories accept `ToolSession` and can return `null` |\n| Per-tool `*Operations` interfaces | Only current per-tool override interfaces remain (for example `FindOperations`) | Used for SSH/remote overrides where present |\n| Node.js `fs/promises` everywhere | Bun file APIs for simple file writes/reads, `node:fs/promises` for dirs, selected sync `node:fs` where needed | Prefer Bun APIs when they simplify |\n\n### Auth Storage\n\n| Upstream | Our Fork | Notes |\n| ------------------------------- | ------------------------------------------- | -------------------------------------------- |\n| `proper-lockfile` + `auth.json` | `agent.db` (bun:sqlite) | Credentials stored exclusively in `agent.db` |\n| Single credential per provider | Multi-credential with round-robin selection | Session affinity and backoff logic preserved |\n\n### Extensions\n\n| Upstream | Our Fork |\n| ---------------------------------------------------------------- | ------------------------------------------------------------------------------------------------- |\n| `jiti` for TypeScript loading | Native Bun `import()` |\n| `pkg.pi` manifest field | `pkg.omp` preferred; fallback to `pkg.pi` remains |\n| `StringEnum` from `pi-ai` | `Type.Enum` from the `pi.typebox` shim (or author the schema with `pi.zod`); `pi-ai` no longer exports `StringEnum` |\n| `formatSize` from `pi-coding-agent` | `formatBytes` from `@oh-my-pi/pi-utils` |\n| `DefaultResourceLoader` / `DefaultPackageManager` / `SettingsManager` / `createEventBus` | Capability-based discovery (`loadCapability(...)`) plus the `Settings` singleton and `EventBus` |\n\n### Skip These Upstream Features\n\nWhen porting, **skip** these files/features entirely:\n\n- `footer-data-provider.ts` — we use StatusLineComponent\n- `clipboard-image.ts` — image clipboard support is exposed through `src/utils/clipboard.ts` backed by `@oh-my-pi/pi-natives`\n- GitHub workflow files — we have our own CI\n- `models.generated.ts` — auto-generated, regenerate locally (as models.json instead)\n\n### Features We Added (Preserve These)\n\nThese exist in our fork but not upstream. **Never overwrite:**\n\n- `StatusLineComponent` in interactive mode\n- Multi-credential auth with session affinity\n- Capability-based discovery system (`defineCapability`, `registerProvider`, `loadCapability`, `skillCapability`, etc.)\n- MCP/Exa/SSH integrations\n- LSP writethrough for format-on-save\n- Bash interception (`checkBashInterception`)\n- Fuzzy path suggestions in read tool\n",
51
52
  "porting-to-natives.md": "# Porting to pi-natives (N-API) — Field Notes\n\nThis is a practical guide for moving hot paths into `crates/pi-natives` and wiring them through the generated native package entrypoint. It exists to avoid the same failures happening twice.\n\n## When to port\n\nPort when any of these are true:\n\n- The hot path runs in render loops, tight UI updates, or large batches.\n- JS allocations dominate (string churn, regex backtracking, large arrays).\n- You already have a JS baseline and can benchmark both versions side by side.\n- The work is CPU-bound or blocking I/O that can run on the libuv thread pool.\n- The work is async I/O that can run on Tokio's runtime (for example shell execution).\n\nAvoid ports that depend on JS-only state or dynamic imports. N-API exports should be data-in/data-out. Long-running work should go through `task::blocking` (CPU-bound/blocking I/O) or `task::future` (async I/O) with cancellation where the caller needs `timeoutMs` or `AbortSignal`.\n\n## Current package shape\n\n`@oh-my-pi/pi-natives` no longer has a `packages/natives/src/<module>` TypeScript wrapper layer. The package root points at generated native artifacts:\n\n- runtime entry/export wrapper: `packages/natives/native/index.js`\n- types entry: `packages/natives/native/index.d.ts`\n- loader helpers: `packages/natives/native/loader-state.js`\n- embedded manifest: `packages/natives/native/embedded-addon.js`\n\nConsumers import directly from `@oh-my-pi/pi-natives`. The generated declarations and explicit ESM exports are produced during `bun --cwd=packages/natives run build`.\n\n## Anatomy of a native export\n\n**Rust side:**\n\n- Implementation lives in `crates/pi-natives/src/<module>.rs`.\n- If you add a new module, register it in `crates/pi-natives/src/lib.rs`.\n- Export with `#[napi]`; snake_case exports are converted to camelCase automatically. Use explicit JS names only for true aliases/non-default names. Use `#[napi(object)]` for object-shaped structs.\n- For CPU-bound or blocking work, use `task::blocking(tag, cancel_token, work)`.\n- For async work that needs Tokio, use `task::future(env, tag, work)`.\n- Pass a `CancelToken` when the API exposes `timeoutMs` or `AbortSignal`, and call `heartbeat()` inside long loops.\n\n**Package/build side:**\n\n- `packages/natives/scripts/build-native.ts` runs napi-rs, installs the `.node` artifact, copies generated `index.d.ts`, and regenerates explicit ESM class/function exports plus enum runtime exports in the checked-in `native/index.js`.\n- `packages/natives/native/index.js` is the ESM entrypoint that calls the loader, exposes named exports, and rejects install/compiled `.node` files that do not expose the package-version sentinel.\n- `packages/natives/package.json` exposes only the package root (`@oh-my-pi/pi-natives`) as the import surface. At publish time the binaries are split out: the core ships the loader only (no `.node`), and each platform's `.node` is published as an optional-dependency leaf package `@oh-my-pi/pi-natives-<tag>` (`scripts/ci-release-publish.ts` + `packages/natives/scripts/gen-npm-packages.ts`). This is transparent to importers — you still `import` from `@oh-my-pi/pi-natives`.\n\n**Consumer side:**\n\n- Update direct imports/callsites in `packages/coding-agent` or `packages/tui` when the new export replaces a JS implementation.\n- Keep higher-level policy in consumers unless it belongs in the native primitive itself.\n\n## Porting checklist\n\n1. **Add the Rust implementation**\n\n- Put the core logic in a plain Rust function.\n- If it is a new module, add it to `crates/pi-natives/src/lib.rs`.\n- Expose it with `#[napi]` so the default snake_case -> camelCase mapping stays consistent.\n- Keep signatures owned and simple: `String`, `Vec<String>`, `Uint8Array`, `Either<JsString, Uint8Array>`, or `#[napi(object)]` structs.\n- For CPU-bound or blocking work, use `task::blocking`; for async work, use `task::future`.\n- If exposing cancellation, include `timeout_ms: Option<u32>` and `signal: Option<Unknown<'env>>` in options, create `CancelToken::new(...)`, and heartbeat in long loops.\n\n2. **Build generated bindings**\n\n- Run `bun --cwd=packages/natives run build`.\n- Confirm the generated `packages/natives/native/index.d.ts` includes the new export with the intended JS name/signature.\n- Confirm `packages/natives/native/index.js` has generated explicit ESM exports for the new class/function and enum objects when enum changes are involved.\n\n3. **Update consumers**\n\n- Import the new export directly from `@oh-my-pi/pi-natives`.\n- Replace only callsites where the native implementation is faster/equivalent and preserves behavior.\n- Remove obsolete JS implementation code in the same change when the native path becomes canonical.\n\n4. **Add benchmarks**\n\n- Put benchmarks next to the owning package (`packages/tui/bench`, `packages/natives/bench`, or `packages/coding-agent/bench`).\n- Include a JS baseline and native version in the same run.\n- Use `Bun.nanoseconds()` and a fixed iteration count.\n- Keep benchmark inputs realistic for the hot path.\n\n5. **Run focused verification**\n\n- Build the native package.\n- Run the benchmark.\n- Run the narrow tests or scenario covering the changed export/callsites.\n\n## Pain points and how to avoid them\n\n### 1) Stale platform/variant artifacts\n\nThe loader probes platform-tagged artifacts in deterministic order. For x64, selected variant candidates are tried before the unsuffixed default fallback:\n\n- `modern`: `pi_natives.<tag>-modern.node`, then `...-baseline.node`, then `pi_natives.<tag>.node`.\n- `baseline`: `pi_natives.<tag>-baseline.node`, then `pi_natives.<tag>.node`.\n\nNon-x64 uses `pi_natives.<tag>.node`.\n\nCompiled binaries also probe `<getNativesDir()>/<version>/...` and a legacy user-data directory before package/executable locations. Windows `node_modules` installs stage leaf/core addons into the same versioned directory before probing. If any earlier candidate is stale, a new export may appear missing unless the version sentinel rejects it first.\n\n**Fix:** remove stale candidate/cache files and rebuild.\n\n```bash\nrm packages/natives/native/pi_natives.<platform>-<arch>.node\nrm packages/natives/native/pi_natives.<platform>-<arch>-modern.node\nrm packages/natives/native/pi_natives.<platform>-<arch>-baseline.node\nbun --cwd=packages/natives run build\n```\n\nFor compiled binaries or Windows staging, delete the versioned addon cache shown in the loader error (normally under `~/.omp/natives/<version>` unless `$XDG_DATA_HOME/omp` is used).\n\n### 2) Generated types do not match loaded binary\n\nThis can happen when `native/index.d.ts` was regenerated but the `.node` file being loaded is stale, same-version incomplete, or from a different platform/variant. Different-version install/compiled binaries should be rejected by the version sentinel during loading.\n\nVerify the loaded export set from the actual candidate path reported by the loader:\n\n```bash\nbun -e 'import { createRequire } from \"node:module\"; const require = createRequire(import.meta.url); const mod = require(process.argv[2]); console.log(Object.keys(mod).sort())' -- /path/from/loader/error/pi_natives.<tag>[-variant].node\n```\n\nFix the build/candidate mismatch. Do not paper over it with optional consumer checks if the export is required.\n\n### 3) Rust signature mismatch\n\nKeep N-API signatures simple and owned. Avoid borrowed references like `&str` in public exports. If you need structured data, use `#[napi(object)]` structs. If you need callbacks, use napi-rs `ThreadsafeFunction` and keep callback error/value behavior explicit.\n\n### 4) Enum runtime exports and ESM named exports\n\nnapi-rs declarations alone are not enough for JS callers that import named symbols or use enum objects at runtime. `scripts/gen-enums.ts` reads `native/index.d.ts`, writes explicit `export const ... = nativeBindings...` entries for public classes/functions, and emits enum objects in `native/index.js`. If you add or change a native export, verify both `native/index.d.ts` and the generated export block in `native/index.js`.\n\n### 5) Benchmarking mistakes\n\n- Do not compare different inputs or allocations.\n- Keep JS and native using identical input arrays.\n- Run both in the same benchmark file to avoid skew.\n- Include enough iterations to smooth startup noise, but keep inputs realistic.\n\n## Benchmark template\n\n```ts\nconst ITERATIONS = 2000;\n\nfunction bench(name: string, fn: () => void): number {\n const start = Bun.nanoseconds();\n for (let i = 0; i < ITERATIONS; i++) fn();\n const elapsed = (Bun.nanoseconds() - start) / 1e6;\n console.log(\n `${name}: ${elapsed.toFixed(2)}ms total (${(elapsed / ITERATIONS).toFixed(6)}ms/op)`,\n );\n return elapsed;\n}\n\nbench(\"feature/js\", () => {\n jsImpl(sample);\n});\n\nbench(\"feature/native\", () => {\n nativeImpl(sample);\n});\n```\n\n## Verification checklist\n\n- Generated `native/index.d.ts` includes the new export and intended TS signature.\n- `native/index.js` includes the generated named export; enum objects are present when the change adds/changes enums.\n- The loaded `.node` file's `Object.keys(require(candidate))` includes the new export and the package-version sentinel.\n- Bench numbers are recorded in the PR/notes.\n- Call sites are updated only if native is faster/equal and behavior-compatible.\n- Obsolete JS code is removed when the native implementation becomes canonical.\n\n## Rule of thumb\n\n- If native is slower, do not switch callsites. Keep or remove the export based on whether it has a near-term owner.\n- If native is faster and behavior-compatible, switch callsites and keep a benchmark to catch regressions.\n",
52
- "provider-streaming-internals.md": "# Provider streaming internals\n\nThis document explains how token/tool streaming is normalized in `@oh-my-pi/pi-ai`, then propagated through `@oh-my-pi/pi-agent-core` and `coding-agent` session events.\n\n## End-to-end flow\n\n1. `streamSimple()` (`packages/ai/src/stream.ts`) maps generic options and dispatches to a provider stream function.\n2. Provider stream functions translate provider-native stream events into the unified `AssistantMessageEvent` sequence. Current built-ins include Anthropic, OpenAI Responses/Completions/Codex/Azure Responses, Google Gemini/Gemini CLI/Vertex, Bedrock Converse, Ollama, Cursor, pi-native gateway transport, plus GitLab Duo/Kimi/Synthetic/xAI-Grok-Responses wrappers and extension-registered custom APIs.\n3. Each provider pushes events into `AssistantMessageEventStream` (`packages/ai/src/utils/event-stream.ts`), which exposes:\n - async iteration for incremental updates\n - `result()` for final `AssistantMessage`\n4. `agentLoop` (`packages/agent/src/agent-loop.ts`) consumes those events, mutates in-flight assistant state, and emits `message_update` events carrying the raw `assistantMessageEvent`.\n5. `AgentSession` (`packages/coding-agent/src/session/agent-session.ts`) subscribes to agent events, persists messages, drives extension hooks, and applies session behaviors (retry, compaction, TTSR, streaming-edit abort checks).\n\n## Unified stream contract in `@oh-my-pi/pi-ai`\n\nAll providers emit the same shape (`AssistantMessageEvent` in `packages/ai/src/types.ts`):\n\n- `start`\n- content block lifecycle triplets:\n - text: `text_start` → `text_delta`\\* → `text_end`\n - thinking: `thinking_start` → `thinking_delta`\\* → `thinking_end`\n - tool call: `toolcall_start` → `toolcall_delta`\\* → `toolcall_end`\n- terminal event:\n - `done` with `reason: \"stop\" | \"length\" | \"toolUse\"`\n - or `error` with `reason: \"aborted\" | \"error\"`\n\n`AssistantMessageEventStream` guarantees:\n\n- final result is resolved by terminal event (`done` or `error`)\n- events are delivered to consumers immediately, in push order (no batching or merging)\n\n## Delta throttling behavior\n\n`AssistantMessageEventStream` itself no longer throttles or merges delta events — every provider event is delivered as pushed. The per-delta cost control moved into tool-call argument parsing: providers accumulate partial JSON and re-parse it via `parseStreamingJsonThrottled()` (`packages/ai/src/utils/json-parse.ts`), which skips the re-parse until at least `STREAMING_JSON_PARSE_MIN_GROWTH` (256) new bytes have arrived, bounding mid-stream parse cost from quadratic to linear. The final `toolcall_end` parse is always unconditional and authoritative.\n\nThere is no provider backpressure: providers still produce at full speed, while the local stream queues.\n\n## Provider normalization details\n\n## Anthropic (`anthropic-messages`)\n\nSource: `packages/ai/src/providers/anthropic.ts`\n\nNormalization points:\n\n- `message_start` initializes usage (input/output/cache tokens)\n- `content_block_start` maps to text/thinking/toolcall starts\n- `content_block_delta` maps:\n - `text_delta` → `text_delta`\n - `thinking_delta` → `thinking_delta`\n - `input_json_delta` → `toolcall_delta`\n - `signature_delta` updates `thinkingSignature` only (no event)\n- `content_block_stop` emits corresponding `*_end`\n- `message_delta.stop_reason` maps via `mapStopReason()`\n\nTool-call argument streaming:\n\n- each tool block carries internal `partialJson`\n- every JSON delta appends to `partialJson`\n- `arguments` are reparsed on appended deltas via `parseStreamingJsonThrottled()` (re-parse only after ≥256 new bytes)\n- `toolcall_end` reparses once more, then strips `partialJson`\n\n## OpenAI Responses family (`openai-responses`, `openai-codex-responses`, `azure-openai-responses`)\n\nSources: `packages/ai/src/providers/openai-responses.ts`, `openai-codex-responses.ts`, and `azure-openai-responses.ts`\n\nNormalization points:\n\n- `response.output_item.added` starts reasoning/text/function-call/custom-tool blocks\n- reasoning summary events (`response.reasoning_summary_text.delta`) and raw reasoning events (`response.reasoning_text.delta`) become `thinking_delta`\n- output/refusal deltas become `text_delta`\n- `response.function_call_arguments.delta` and `response.custom_tool_call_input.delta` become `toolcall_delta`\n- `response.output_item.done` emits `thinking_end` / `text_end` / `toolcall_end`\n- `response.completed` maps status to stop reason and usage; `response.failed` / SDK `error` events throw into the wrapper's terminal `error` path\n\nTool-call argument streaming:\n\n- same `partialJson` accumulation pattern as Anthropic for function-call JSON arguments\n- custom tools stream raw string input and expose final arguments as `{ input: <raw> }`\n- providers that send only `response.function_call_arguments.done` still populate final args\n- tool call IDs are normalized as `\"<call_id>|<item_id>\"`\n\n## Google Generative AI (`google-generative-ai`)\n\nSource: `packages/ai/src/providers/google.ts` (thin request wrapper) and `google-shared.ts` (`streamGoogleGenAI`, shared chunk-to-block translation)\n\nNormalization points:\n\n- iterates `candidate.content.parts`\n- text parts are split into thinking vs text by `isThinkingPart(part)`\n- block transitions close previous block before starting a new one\n- `part.functionCall` is treated as a complete tool call (start/delta/end emitted immediately)\n- finish reason mapped by `mapStopReason()` from `google-shared.ts`\n\nTool-call argument streaming:\n\n- function call args arrive as structured object, not incremental JSON text\n- implementation emits one synthetic `toolcall_delta` containing `JSON.stringify(arguments)`\n- no partial JSON parser needed for Google in this path\n\n## Partial tool-call JSON accumulation and recovery\n\nShared behavior for Anthropic/OpenAI Responses uses `parseStreamingJson()` / `parseStreamingJsonThrottled()` (`packages/ai/src/utils/json-parse.ts`):\n\n1. try `JSON.parse`\n2. fallback to `repairJson()` + the `partial-json` parser for incomplete fragments\n3. if both fail, return `{}`\n\nImplications:\n\n- malformed or truncated argument deltas do not crash stream processing immediately\n- in-progress `arguments` may temporarily be `{}`\n- later valid deltas can recover structured arguments because parsing is retried as the buffer grows (throttled to ≥256-byte growth steps mid-stream)\n- final `toolcall_end` performs one more parse attempt before emission\n\n## Stop reasons vs transport/runtime errors\n\nProvider stop reasons are mapped to normalized `stopReason`:\n\n- Anthropic: `end_turn`→`stop`, `max_tokens`→`length`, `tool_use`→`toolUse`, safety/refusal cases→`error`\n- OpenAI Responses: `completed`→`stop`, `incomplete`→`length`, `failed/cancelled`→`error`\n- Google: `STOP`→`stop`, `MAX_TOKENS`→`length`, safety/prohibited/malformed-function-call classes→`error`\n\nError semantics are split in two stages:\n\n1. **Model completion semantics** (provider reported finish reason/status)\n2. **Transport/runtime failure** (network/client/parser/abort exceptions)\n\nIf provider stream throws or signals failure, each provider wrapper catches and emits terminal `error` event with:\n\n- `stopReason = \"aborted\"` when abort signal is set\n- otherwise `stopReason = \"error\"`\n- `errorMessage = finalizeErrorMessage(error, rawRequestDump)` (`packages/ai/src/utils/http-inspector.ts`), which wraps `formatErrorMessageWithRetryAfter()` and appends any captured HTTP-error body / raw-request dump (the `cursor` wrapper calls `formatErrorMessageWithRetryAfter()` directly)\n\n## Malformed chunk / SSE parse failure behavior\n\nThe OpenAI Completions/Responses paths use the in-repo HTTP+SSE transport `postOpenAIStream()` (`packages/ai/src/utils/openai-http.ts`), which decodes frames with `readSseJson()` and replaced the `openai` SDK client. Anthropic uses the in-repo `AnthropicMessagesClient` (`packages/ai/src/providers/anthropic-client.ts`); the Google paths and the Codex SSE fallback read SSE via `readSseJson()` directly, and websocket Codex frames are normalized through the same event handler.\n\nObserved behavior in current implementation:\n\n- malformed SSE framing or chunk JSON surfaces as an exception or stream `error` event\n- malformed Codex SSE JSON/framing throws from the local SSE reader\n- provider wrapper converts failures into unified terminal `error` events\n- no provider-specific resume/retry inside the stream function itself, except Codex websocket-to-SSE transport fallback before replay-unsafe output is emitted\n- higher-level retries are handled in `AgentSession` auto-retry logic (message-level retry, not stream-chunk replay)\n\n## Cancellation boundaries\n\nCancellation is layered:\n\n- AI provider request: `options.signal` is passed into provider client stream call.\n- Provider wrapper: after stream loop, aborted signal forces error path (`\"Request was aborted\"`).\n- Agent loop: checks `signal.aborted` before handling each provider event and can synthesize an aborted assistant message from the latest partial.\n- Session/agent controls: `AgentSession.abort()` -> `agent.abort()` -> shared abort controller cancellation.\n\nTool execution cancellation is separate from model stream cancellation:\n\n- tool runners use `AbortSignal.any([agentSignal, steeringAbortSignal])`\n- steering interrupts can abort remaining tool execution while preserving already-produced tool results\n\n## Backpressure boundaries\n\nThere is no hard backpressure mechanism between provider SDK stream and downstream consumers:\n\n- `EventStream` uses in-memory queues with no max size\n- the throttled partial-JSON re-parse reduces per-delta CPU cost but does not slow provider intake\n- if consumers lag significantly, queued events can grow until completion\n\nCurrent design favors responsiveness and simple ordering over bounded-buffer flow control.\n\n## How stream events surface as agent/session events\n\n`agentLoop.streamAssistantResponse()` bridges `AssistantMessageEvent` to `AgentEvent`:\n\n- on `start`: pushes placeholder assistant message and emits `message_start`\n- on block events (`text_*`, `thinking_*`, `toolcall_*`): updates last assistant message, emits `message_update` with raw `assistantMessageEvent`\n- on terminal (`done`/`error`): resolves final message from `response.result()`, emits `message_end`\n\n`AgentSession` then consumes those events for session-level behaviors:\n\n- TTSR watches `message_update.assistantMessageEvent` for `text_delta`, `thinking_delta`, and `toolcall_delta`\n- streaming edit guard inspects `toolcall_delta`/`toolcall_end` on `edit` calls and can abort early\n- persistence writes finalized messages at `message_end`\n- auto-retry examines assistant `stopReason === \"error\"` plus `errorMessage` heuristics\n\n## Unified vs provider-specific responsibilities\n\nUnified (common contract):\n\n- event shape (`AssistantMessageEvent`)\n- final result extraction (`done`/`error`)\n- immediate in-order event delivery\n- agent/session event propagation model\n\nProvider-specific (not fully abstracted):\n\n- upstream event taxonomies and mapping logic\n- stop-reason translation tables\n- tool-call ID conventions\n- reasoning/thinking block semantics and signatures\n- usage token semantics and availability timing\n- message conversion constraints per API\n\n## Implementation files\n\n- [`../../ai/src/stream.ts`](../packages/ai/src/stream.ts) — provider dispatch, option mapping, API key/session plumbing, custom API dispatch, and provider-specific credential handling.\n- [`../../ai/src/utils/event-stream.ts`](../packages/ai/src/utils/event-stream.ts) — generic stream queue + final-result resolution.\n- [`../../ai/src/utils/json-parse.ts`](../packages/ai/src/utils/json-parse.ts) — partial JSON parsing for streamed tool arguments.\n- [`../../ai/src/providers/anthropic.ts`](../packages/ai/src/providers/anthropic.ts) — Anthropic event translation and tool JSON delta accumulation.\n- [`../../ai/src/providers/openai-responses.ts`](../packages/ai/src/providers/openai-responses.ts), [`openai-responses-shared.ts`](../packages/ai/src/providers/openai-responses-shared.ts), [`openai-codex-responses.ts`](../packages/ai/src/providers/openai-codex-responses.ts), [`azure-openai-responses.ts`](../packages/ai/src/providers/azure-openai-responses.ts) — Responses-family event translation and status mapping.\n- [`../../ai/src/providers/google.ts`](../packages/ai/src/providers/google.ts), [`google-gemini-cli.ts`](../packages/ai/src/providers/google-gemini-cli.ts), [`google-vertex.ts`](../packages/ai/src/providers/google-vertex.ts) — Gemini stream chunk-to-block translation variants.\n- [`../../ai/src/providers/google-shared.ts`](../packages/ai/src/providers/google-shared.ts) — Gemini finish-reason mapping and shared conversion rules.\n- [`../../ai/src/providers/amazon-bedrock.ts`](../packages/ai/src/providers/amazon-bedrock.ts), [`openai-completions.ts`](../packages/ai/src/providers/openai-completions.ts), [`ollama.ts`](../packages/ai/src/providers/ollama.ts), [`cursor.ts`](../packages/ai/src/providers/cursor.ts), [`pi-native-client.ts`](../packages/ai/src/providers/pi-native-client.ts) — additional built-in stream adapters using the same event contract.\n- [`../../agent/src/agent-loop.ts`](../packages/agent/src/agent-loop.ts) — provider stream consumption and `message_update` bridging.\n- [`../src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts) — session-level handling of streaming updates, abort, retry, and persistence.\n",
53
- "providers.md": "# Providers\n\nProviders are the model backends `omp` can route requests to: Anthropic, OpenAI, Google Gemini, Groq, OpenRouter, Mistral, xAI, local engines like Ollama, hosted gateways, custom `models.yml` providers, and providers registered by extensions.\n\nA **provider** is the account or backend namespace, such as `anthropic`, `openai`, `google`, or `ollama`. A **model** is a concrete model under that provider, selected as `provider/model-id`, such as `anthropic/claude-opus-4-6`. Disabling a provider removes every model under it from selection; if you only want to narrow individual models, use model settings instead.\n\nThis page covers how providers become available, how credentials are resolved, the provider/environment-variable map, local engines, disabling providers, and custom providers. For model selection and the full `models.yml` schema, see [Model and Provider Configuration](./models.md). For config-file locations and merge precedence, see [Settings](./settings.md). For credential storage and login flows in depth, see [Secrets and credentials](./secrets.md). For the complete environment-variable reference, see [Environment variables](./environment-variables.md). For local engine setup, see [Local models](./local-models.md). For context-file discovery providers, see [Context files](./context-files.md).\n\n## How `omp` decides a provider is available\n\nAt startup the model registry assembles its catalog from four sources, in order:\n\n1. The bundled model catalog (every built-in provider and its known models).\n2. Custom provider and model entries from `~/.omp/agent/models.yml`.\n3. Runtime-discovered models for providers that support discovery (local engines and discovery-enabled gateways).\n4. Providers and models registered by extensions.\n\nThe registry can hold a model even when it is not currently selectable. A model becomes **available** only when both conditions hold:\n\n1. its provider ID is **not** in the effective `disabledProviders` list; **and**\n2. the provider is either **keyless** (an implicit local provider, or a custom provider with `auth: none`) **or** has resolvable credentials.\n\n`disabledProviders` is checked *before* credentials. If a provider ID is disabled, no stored key, OAuth session, environment variable, `.env` entry, or `models.yml` `apiKey` will make it selectable — the provider's models are dropped from availability regardless of credentials. Removing the ID from the effective list restores them.\n\nKeyless local engines are a special case: `ollama`, `llama.cpp`, and `lm-studio` are treated as keyless when no key is configured, so their discovered models are selectable as soon as the engine answers — no login required. See [Built-in local engines](#built-in-local-engines).\n\n## Credentials and precedence\n\nWhen a provider needs an API key, `omp` resolves it in this order (first match wins):\n\n1. **Runtime override** — a key supplied for the current process, e.g. CLI `--api-key`. Never persisted.\n2. **`models.yml` config key** — an `apiKey` pinned on a custom provider, registered as a config-sourced bearer. This deliberately beats stored OAuth, so a key supplied for a custom `baseUrl`/gateway is honored instead of forwarding an upstream OAuth token the proxy would reject.\n3. **Stored API key** — an API-key credential saved in the auth store.\n4. **Stored OAuth credential** — refreshed when needed; multiple accounts are ranked/rotated automatically.\n5. **Provider environment variable** — including values loaded from `.env` files (see [the env-var table](#environment-variables-and-env-files)).\n6. **`models.yml` fallback resolver** — keys for custom providers not otherwise registered.\n\nStored credentials live in the auth store at `~/.omp/agent/agent.db` for local auth, or in the configured auth-broker snapshot when running in broker mode. (`PI_CODING_AGENT_DIR` relocates the `~/.omp/agent` base, and the auth store moves with it.)\n\n### OAuth vs API key, and provider-scoped logins\n\nLogins are **provider-scoped**: authenticating `anthropic` does not authenticate `openai`, and each provider tracks its own credentials. A disabled provider stays disabled even with valid stored auth.\n\nUse the interactive slash commands inside a session:\n\n- `/login` — opens the OAuth/key selector. `/login <provider>` jumps straight to one provider (e.g. `/login anthropic`); for an OAuth flow that needs a pasted callback, run `/login <redirect-url>` to complete it.\n- `/logout` — opens the provider selector to remove stored credentials.\n\nFor headless or remote setups backed by a shared auth broker, the CLI exposes `omp auth-broker login <provider>` / `omp auth-broker logout` (and `status`, `list`, `import`, `migrate`). See [Secrets and credentials](./secrets.md) for the broker model.\n\nWhen a model has no credentials, `omp` tells you to run `/login` or set the provider's environment variable.\n\n### Pinning a key in `models.yml`\n\nA custom provider's `apiKey` is resolved as **environment-variable-name-or-literal**: if the value names an existing environment variable, that variable's value is used; otherwise the string itself is the key. Prefixing the value with `!` runs it as a shell command and uses the trimmed stdout (see [Model and Provider Configuration](./models.md) for the full value syntax).\n\n```yaml\n# ~/.omp/agent/models.yml\nproviders:\n my-gateway:\n baseUrl: https://gateway.example.com/v1\n api: openai-completions\n apiKey: MY_GATEWAY_API_KEY # reads this env var if set, else literal text\n models:\n - id: claude-sonnet\n name: Claude Sonnet via Gateway\n contextWindow: 200000\n maxTokens: 8192\n```\n\nIf `authHeader: true` is set on a custom provider, the resolved key is injected as an `Authorization: Bearer <key>` header on every request to that provider.\n\n## Environment variables and `.env` files\n\nEach provider has one or more environment variables that supply a key when no stored credential exists. The table below is the verified provider → variable map; the full catalog is large, so it is split into core and additional providers. OAuth-backed providers can also accept a token variable in addition to (or instead of) an API key.\n\n### Core providers\n\n| Provider ID | Environment variable(s) |\n|---|---|\n| `anthropic` | `ANTHROPIC_OAUTH_TOKEN`, then `ANTHROPIC_API_KEY` (Foundry mode prefers `ANTHROPIC_FOUNDRY_API_KEY` when `CLAUDE_CODE_USE_FOUNDRY=true`) |\n| `openai` | `OPENAI_API_KEY` |\n| `openai-codex` | `OPENAI_CODEX_OAUTH_TOKEN` |\n| `google` | `GEMINI_API_KEY` |\n| `google-vertex` | `GOOGLE_CLOUD_API_KEY`, or Application Default Credentials (`GOOGLE_APPLICATION_CREDENTIALS` + `GOOGLE_CLOUD_PROJECT` + `GOOGLE_CLOUD_LOCATION`) |\n| `groq` | `GROQ_API_KEY` |\n| `openrouter` | `OPENROUTER_API_KEY` |\n| `mistral` | `MISTRAL_API_KEY` |\n| `xai` | `XAI_API_KEY` |\n| `xai-oauth` | `XAI_OAUTH_TOKEN`, then `XAI_API_KEY` |\n| `github-copilot` | `COPILOT_GITHUB_TOKEN` |\n| `cursor` | `CURSOR_ACCESS_TOKEN` |\n| `azure` | `AZURE_OPENAI_API_KEY` |\n| `amazon-bedrock` | `AWS_PROFILE`, or `AWS_ACCESS_KEY_ID` + `AWS_SECRET_ACCESS_KEY`, or an ECS/IRSA credential chain |\n\n### Additional hosted providers\n\n| Provider ID | Environment variable(s) |\n|---|---|\n| `cerebras` | `CEREBRAS_API_KEY` |\n| `deepseek` | `DEEPSEEK_API_KEY` |\n| `fireworks` | `FIREWORKS_API_KEY` |\n| `together` | `TOGETHER_API_KEY` |\n| `nvidia` | `NVIDIA_API_KEY` |\n| `huggingface` | `HUGGINGFACE_HUB_TOKEN`, then `HF_TOKEN` |\n| `moonshot` | `MOONSHOT_API_KEY` |\n| `nanogpt` | `NANO_GPT_API_KEY` |\n| `venice` | `VENICE_API_KEY` |\n| `vercel-ai-gateway` | `AI_GATEWAY_API_KEY` (also `VERCEL_AI_GATEWAY_API_KEY` for catalog discovery) |\n| `cloudflare-ai-gateway` | `CLOUDFLARE_AI_GATEWAY_API_KEY` |\n| `litellm` | `LITELLM_API_KEY` |\n| `kilo` | `KILO_API_KEY` |\n| `zai` | `ZAI_API_KEY` |\n| `zenmux` | `ZENMUX_API_KEY` |\n| `zhipu-coding-plan` | `ZHIPU_API_KEY` |\n| `umans` | `UMANS_AI_CODING_PLAN_API_KEY` |\n| `qianfan` | `QIANFAN_API_KEY` |\n| `qwen-portal` | `QWEN_OAUTH_TOKEN`, then `QWEN_PORTAL_API_KEY` |\n| `synthetic` | `SYNTHETIC_API_KEY` |\n| `minimax` | `MINIMAX_API_KEY` |\n| `alibaba-coding-plan` | `ALIBABA_CODING_PLAN_API_KEY` |\n| `aimlapi` | `AIMLAPI_API_KEY` |\n| `gitlab-duo` | `GITLAB_TOKEN` |\n| `opencode-zen`, `opencode-go` | `OPENCODE_API_KEY` |\n| `firepass` | `FIREPASS_API_KEY` |\n| `wafer-pass` | `WAFER_PASS_API_KEY` |\n| `wafer-serverless` | `WAFER_SERVERLESS_API_KEY` |\n| `xiaomi` | `XIAOMI_API_KEY` |\n| `ollama-cloud` | `OLLAMA_CLOUD_API_KEY` |\n| `ollama` | `OLLAMA_API_KEY` (optional; local discovery is keyless by default) |\n| `lm-studio` | `LM_STUDIO_API_KEY` (optional; keyless by default) |\n| `llama.cpp` | `LLAMA_CPP_API_KEY` (only when the server requires auth) |\n\nOAuth-backed providers such as `anthropic`, `github-copilot`, `cursor`, `ollama-cloud`, `qwen-portal`, `kimi-code`, `xai-oauth`, `wafer-pass`, `wafer-serverless`, `google-gemini-cli`, and `google-antigravity` are normally reached through `/login` rather than an environment variable. See [Environment variables](./environment-variables.md) for search-tool and configuration variables not listed here.\n\n### `.env` discovery and precedence\n\n`omp` eagerly loads `.env` files into the process environment before any provider lookup. It reads four files and, for each variable, the **first** source that defines it wins. Effective precedence, high to low:\n\n1. The process environment inherited by `omp` (already-set variables always win).\n2. `<cwd>/.env`\n3. `~/.omp/agent/.env`\n4. `~/.omp/.env`\n5. `~/.env`\n\nA variable already present in the process environment is never overwritten by a `.env` file. Among the files, a value set in `<cwd>/.env` wins over `~/.omp/agent/.env`, which wins over `~/.omp/.env`, which wins over `~/.env`. So a shell-exported `OPENAI_API_KEY` beats every `.env` file, and a project's `<cwd>/.env` beats your home `~/.env`.\n\nProject-local `.env` is the simplest way to make one repository use a project-specific gateway, key, or local endpoint:\n\n```dotenv\n# <project>/.env\nOPENROUTER_API_KEY=sk-or-...\nOLLAMA_BASE_URL=http://127.0.0.1:11434\n```\n\n`.env` parsing is intentionally minimal:\n\n- blank lines and lines starting with `#` are ignored;\n- keys must match `[A-Za-z_][A-Za-z0-9_]*` (shell-identifier shape) — other names are dropped;\n- values may be wrapped in single or double quotes, which are stripped;\n- values containing a NUL byte are dropped;\n- an `OMP_`-prefixed key is also mirrored to the matching `PI_`-prefixed name.\n\n## Built-in local engines\n\nThree local engines are discovered automatically without needing a `models.yml` entry. Each uses a base URL that can be overridden by an environment variable:\n\n| Provider ID | Base URL (env override → default) | Notes |\n|---|---|---|\n| `ollama` | `OLLAMA_BASE_URL`, then `OLLAMA_HOST` (normalized), else `http://127.0.0.1:11434` | Keyless by default. |\n| `llama.cpp` | `LLAMA_CPP_BASE_URL`, else `http://127.0.0.1:8080` | Keyless unless a key is stored for `llama.cpp`. |\n| `lm-studio` | `LM_STUDIO_BASE_URL`, else `http://127.0.0.1:1234/v1` | Keyless by default. |\n\nThese implicit engines are **skipped** when:\n\n- a provider with the same ID is already configured in `models.yml` (your explicit config wins); or\n- the provider ID appears in the effective `disabledProviders` list.\n\nFor installing and running these engines, see [Local models](./local-models.md).\n\n## Disabling model providers\n\nUse the `disabledProviders` setting to remove a provider's models from selection:\n\n```yaml\n# ~/.omp/agent/config.yml or <project>/.omp/config.yml\ndisabledProviders:\n - anthropic\n - openai\n - google\n - groq\n```\n\nProvider IDs are matched exactly. Disable `google` to hide the Google Gemini API provider; the OAuth-backed Google providers `google-gemini-cli` and `google-antigravity` are separate IDs and must be disabled individually. Disable `ollama`, `llama.cpp`, or `lm-studio` to stop local discovery for that engine.\n\n`disabledProviders` applies uniformly to:\n\n- bundled catalog providers;\n- custom `models.yml` providers;\n- runtime-discovered provider models;\n- extension-registered providers;\n- implicit local engines.\n\nDisabling a provider does not delete its stored credentials — re-enable it by removing its ID from the effective list.\n\n## Project-specific provider control\n\nProject settings live in `<project>/.omp/config.yml`. Use them when one repository must allow or hide a different provider set than your global default:\n\n```yaml\n# <project>/.omp/config.yml\ndisabledProviders:\n - openai\n - openrouter\n```\n\nSettings arrays are **replaced** wholesale by the higher-precedence layer, not merged or appended. If the global file disables three providers and the project file disables one, the project sees only the project list:\n\n```yaml\n# ~/.omp/agent/config.yml\ndisabledProviders:\n - anthropic\n - openai\n - google\n\n# <project>/.omp/config.yml\ndisabledProviders:\n - groq\n```\n\nEffective result inside the project:\n\n```json\n[\"groq\"]\n```\n\nThe project array re-enables `anthropic`, `openai`, and `google` for sessions launched from that project. If you want a project to *add* to the global set, repeat the global IDs in the project file. See [Settings](./settings.md) for the full precedence chain, including `--config` overlays and runtime overrides.\n\n## Path-scoped `disabledProviders`\n\n`disabledProviders` can mix plain string entries (apply everywhere) with path-scoped entries (apply only when the current working directory matches a configured path):\n\n```yaml\ndisabledProviders:\n - ollama\n - path: ~/projects/sensitive\n providers:\n - anthropic\n - openai\n - paths:\n - ~/work/client-a\n - ~/work/client-b\n values:\n - openrouter\n```\n\n- Bare string entries always apply.\n- A scoped entry applies when the current working directory **is** the configured path or sits **under** it. `~` expands to the home directory.\n- Accepted path keys: `path`, `paths`, `pathPrefix`, `pathPrefixes`.\n- Accepted value keys: `providers`, `values`, `items`.\n\nFor the example above:\n\n- `ollama` is disabled everywhere.\n- `anthropic` and `openai` are additionally disabled under `~/projects/sensitive`.\n- `openrouter` is additionally disabled under `~/work/client-a` and `~/work/client-b`.\n\nPath scopes are resolved **after** the settings merge. Because a higher-precedence layer replaces the whole array, a project-level `disabledProviders` array drops any scoped entries that only existed in the global array. `enabledModels` is the only other setting that supports the same path-scoped form. See [Settings](./settings.md) for details.\n\n## Provider IDs vs discovery provider IDs\n\n`disabledProviders` uses a **single shared ID namespace** that gates two different subsystems:\n\n- **Model providers** — the backends on this page (`anthropic`, `openai`, `ollama`, a custom `models.yml` ID, …). Disabling one removes its models from selection.\n- **Discovery providers** — sources of context files, MCP servers, commands, skills, hooks, tools, prompts, and settings. Disabling one stops that source from contributing capability items.\n\n| Entry type | Examples | Effect |\n|---|---|---|\n| Model provider ID | `anthropic`, `openai`, `google`, `groq`, `openrouter`, `ollama`, `my-gateway` | Removes that provider's models from availability. |\n| Discovery provider ID | `native`, `claude`, `codex`, `gemini`, `agents`, `github` | Stops that discovery source from contributing capability items. |\n\nWatch the related names. The Google Gemini **API** models use the model provider ID `google`; `gemini` is a **discovery** provider ID (the source that reads `GEMINI.md`), not the Google model provider. Use discovery IDs only when you intend to disable an entire config source. See [Context files](./context-files.md) for the discovery-provider side.\n\n## Custom providers in `models.yml`\n\nCustom providers live in `~/.omp/agent/models.yml` under `providers:`. A provider ID defined there participates in the same selection, credential resolution, and `disabledProviders` rules as built-in providers.\n\nMinimal OpenAI-compatible provider:\n\n```yaml\nproviders:\n my-openai-compatible:\n baseUrl: https://api.example.com/v1\n api: openai-completions\n apiKey: MY_OPENAI_COMPATIBLE_KEY # env-var-name or literal\n models:\n - id: fast-chat\n name: Fast Chat\n contextWindow: 128000\n maxTokens: 8192\n```\n\nKeyless local provider (no credentials required):\n\n```yaml\nproviders:\n local-proxy:\n baseUrl: http://127.0.0.1:4000/v1\n api: openai-completions\n auth: none\n models:\n - id: local-model\n name: Local Model\n contextWindow: 32768\n maxTokens: 4096\n```\n\nDiscovery-enabled provider (models fetched from the endpoint at runtime):\n\n```yaml\nproviders:\n team-proxy:\n baseUrl: https://models.example.com/v1\n apiKey: TEAM_PROXY_API_KEY\n authHeader: true # send Authorization: Bearer <resolved key>\n disableStrictTools: true\n discovery:\n type: proxy\n```\n\nFor the full schema, all allowed `api` values, discovery `type`s, model overrides, and equivalence settings, see [Model and Provider Configuration](./models.md).\n\nTo disable a custom provider, list its ID exactly:\n\n```yaml\ndisabledProviders:\n - my-openai-compatible\n - team-proxy\n```\n\n## Troubleshooting\n\n**A provider's models are not selectable.** Confirm the provider has credentials (`/login <provider>`, an exported environment variable, or a `models.yml` `apiKey`) and that its ID is not in the effective `disabledProviders` list. Remember the rule: not disabled **and** (keyless **or** has credentials). Keyless local engines only appear once the engine is actually running and responding.\n\n**The wrong key is being used (a stale key from `.env`).** Resolution favors runtime `--api-key`, then a `models.yml` config key, then stored credentials, then environment/`.env`. An already-set process environment variable also beats every `.env` file, and `<cwd>/.env` beats `~/.env`. If an unexpected key wins, check for an exported shell variable and the four `.env` files in precedence order, and clear the one that should not apply.\n\n**A provider still appears even though I disabled it.** `disabledProviders` arrays are replaced, not merged: a project `<project>/.omp/config.yml` array fully overrides the global one. Verify the *effective* list for the directory you are in (path-scoped entries only apply at or under their configured path), and confirm the ID is spelled exactly. Use `omp config get disabledProviders` to inspect the merged value (see [Settings](./settings.md)).\n\n**A discovery provider name had no effect on models (or vice-versa).** The ID namespace is shared. `gemini`, `codex`, `claude`, `native`, and `agents` are discovery-source IDs; the Google model backend is `google`. Make sure you are disabling the right kind of provider.\n\n**A custom `models.yml` provider does not load.** A YAML or schema error makes the registry skip the custom file. Validate the file with `omp models` (use `omp models find <substr>` to scope it to one provider), confirm each provider has a `baseUrl`, a valid `api`, and at least one model entry, and that an implicit local engine is not silently shadowing it (an explicit `ollama`/`lm-studio`/`llama.cpp` entry replaces the built-in discovery for that ID). See [Model and Provider Configuration](./models.md).\n",
53
+ "provider-endpoint-constraints.md": "# Provider endpoint constraints\n\nProvider integrations are not interchangeable just because they speak an\nOpenAI-shaped HTTP protocol. A request is shaped by four layers at once:\n\n1. endpoint family: `openai-completions`, `openai-responses`,\n `openai-codex-responses`, `anthropic-messages`, etc.\n2. gateway/auth surface: OpenRouter, Vercel AI Gateway, Azure OpenAI, Copilot,\n Alibaba Coding Plan, Kimi Code, Fireworks/Firepass, and similar hosts\n3. model metadata and `compat` overrides\n4. request context: tools, images, reasoning mode, stateful session, service tier\n\nUse this page when adding a provider, adding a compat flag, or moving logic out\nof a provider-specific branch. The goal is to encode endpoint constraints once,\nat the narrowest layer that actually owns the behavior.\n\nRelated references:\n\n- [Providers](./providers.md) — provider availability, credentials, custom providers\n- [Model and Provider Configuration](./models.md) — `models.yml`, routing, and compat fields\n- [Provider streaming internals](./provider-streaming-internals.md) — stream event normalization\n- [Adding a provider](./adding-a-provider.md) — catalog/auth wiring for a new provider\n\n## Baseline rules\n\n- Prefer compat metadata over provider-name branches when behavior is model or\n endpoint configurable.\n- Keep transport mechanics transport-local. Codex websocket replay, Responses\n item routing, and Chat Completions SSE decoding are protocol behavior, not\n generic compat flags.\n- Scope fallbacks to the failing capability. A strict-tool failure should not\n disable unrelated features. A stale Responses chain should reset chain state,\n not disable Responses entirely.\n- Do not emit defaults that alter gateway routing. OpenRouter is the known case\n for default `max_tokens`, but any gateway can treat optional fields as routing\n hints.\n- Stop retrying after visible side effects. Once text or a tool call is visible\n to the user/session, retry policy must avoid duplicate output and duplicate\n tool execution.\n\n## 1. Choose the endpoint family first\n\n### OpenAI Chat Completions compatible\n\nPreserve these differences instead of treating every host as stock OpenAI:\n\n- `stream_options.include_usage` is only safe when compat says streaming usage\n is supported.\n- `store: false` is accepted only by some hosts.\n- max-output caps use either `max_tokens` or `max_completion_tokens`.\n- stop sequences and frequency penalty live on this path among the current\n OpenAI-like endpoint set.\n- OpenRouter-style reasoning and routing fields are not portable to other\n OpenAI-compatible hosts unless compat says so.\n\n### OpenAI Responses compatible\n\nResponses request shape is its own dialect:\n\n- uses `input`, `instructions`, `store`, `prompt_cache_key`, optional\n `previous_response_id`, and `max_output_tokens`\n- can default official OpenAI requests to stateful chaining with\n `previous_response_id` plus `store: true`\n- third-party Responses proxies may reject native reasoning history, encrypted\n reasoning replay, or `previous_response_id`\n- stream completion is authoritative only after `response.completed` or\n `response.incomplete`; a stream close before either terminal event should fail\n for OpenAI Responses rather than surface partial output as success\n\n### OpenAI Codex Responses\n\nCodex is not plain Responses with a different URL. Keep these as Codex transport\npolicy:\n\n- Codex account headers and beta headers\n- `x-codex-turn-state` and `x-models-etag`\n- optional websocket transport plus SSE fallback\n- `responsesLite`\n- prompt-cache/session ids used as transport state\n- websocket-only `previous_response_id` chaining; SSE never chains\n- Codex retry/replay rules, including reconnect and SSE replay boundaries\n- provider retry only before user-visible content has been emitted\n- whitespace-only tool-call argument loop breaker\n\nCodex intentionally does not forward caller max-token caps because the backend\nrejects them.\n\n### Anthropic/OpenAI dual-surface providers\n\nKimi Code and Synthetic can be called as OpenAI-compatible or\nAnthropic-compatible. The shim may need to:\n\n- switch `format`\n- rebuild an Anthropic model when needed\n- map internal reasoning to Anthropic thinking budgets\n- delegate back to OpenAI Completions\n\nDo not encode these as one-way provider migrations; they are runtime surface\nselection decisions.\n\n## 2. Apply gateway and auth overlays\n\nThese constraints sit above the endpoint family. They affect auth, headers,\nrouting, model ids, or usage accounting.\n\n### Azure OpenAI\n\n- Chat Completions base URL reshapes to\n `/deployments/{deployment}/chat/completions?api-version=...`.\n- Deployment names may differ from model ids through\n `AZURE_OPENAI_DEPLOYMENT_NAME_MAP`.\n\n### GitHub Copilot\n\n- The API key is parsed into an access token.\n- Dynamic Copilot headers depend on messages/images.\n- `premiumRequests` must survive usage population and replacement.\n- Base URL may be resolved from the raw key.\n\n### OpenRouter\n\n- Adds attribution/cache headers.\n- Supports routing suffixes such as `:nitro` and `:floor`.\n- Appends a routing suffix only when the model id has no explicit suffix after\n the last provider path segment.\n- Uses nested `reasoning` request fields.\n- Routes providers through the OpenRouter `provider` object.\n- Has special cache-write usage accounting.\n- Has strict-tool fallback for Anthropic grammar-size failures.\n- Should omit catalog-default `max_tokens` unless the caller explicitly set a\n cap, so upstream routing is not biased.\n\n### Vercel AI Gateway\n\n- Routing preferences go under `providerOptions.gateway.only` and\n `providerOptions.gateway.order`.\n- Do not reuse OpenRouter's `provider` object.\n\n### Alibaba Coding Plan\n\n- API key bytes may be JSON carrying `{ token, enterpriseUrl }`.\n- Auth and base URL resolution are provider-specific.\n\n### Kimi Code\n\n- The OpenAI-compatible path needs common Kimi headers.\n- It also participates in the OpenAI/Anthropic dual-surface shim.\n\n### Fireworks and Firepass\n\n- Wire model ids need provider-specific mapping.\n- Fireworks can conflict when DeepSeek-style `thinking` and OpenAI-style\n `reasoning_effort` are both present after extra body fields are merged.\n\n## 3. Serialize request parameters by dialect\n\nCheck these before adding or forwarding a field:\n\n- **Model id.** Some models resolve a wire id from reasoning effort.\n Firepass/Fireworks transform ids. OpenRouter suffix handling is path-segment\n aware.\n- **Max output tokens.** Kimi-family models may require a max-token field even\n when the caller did not set one. OpenRouter should omit catalog defaults unless\n explicit. Codex drops caller caps. Responses uses `max_output_tokens`; Chat\n Completions uses `max_tokens` or `max_completion_tokens`.\n- **Service tier.** Completions, Responses, and Codex all handle service tiers,\n but allowed values and pricing multipliers differ. Codex has a special\n priority multiplier for `gpt-5.5`.\n- **Prompt cache/session.** OpenAI Responses uses `prompt_cache_key`.\n OpenRouter Responses uses `session_id`. Codex uses prompt cache/session ids for\n transport state. Anthropic-style cache control requires `cache_control` on a\n text part.\n- **Stateful chaining.** Official OpenAI Responses may chain by default.\n Third-party endpoints generally should not. Codex chains only on websocket\n `response.create`.\n\n## 4. Map reasoning and thinking explicitly\n\nReasoning fields are not interchangeable.\n\n### OpenAI-style `reasoning_effort`\n\n- Effort values come from compat/model metadata.\n- If reasoning is disabled but the host has no real off switch, map to the\n lowest supported effort rather than inventing an unsupported value.\n\n### Responses `reasoning`\n\n- Uses `reasoning: { effort, summary }`.\n- Can include `reasoning.encrypted_content` for replay.\n- xAI Grok models may require omitting `reasoning.effort`.\n- Some compat paths inject the GPT-5 `# Juice: 0 !important` developer scaffold.\n\n### OpenRouter `reasoning`\n\n- Uses nested `reasoning: { effort }`.\n- Disabling reasoning must send `reasoning: { enabled: false }`; OpenRouter can\n otherwise default reasoning models into thinking.\n\n### Z.AI / GLM\n\n- Uses `thinking: { type: \"enabled\" }` or\n `thinking: { type: \"disabled\" }`.\n- GLM 5.2 reasoning-effort models may also receive `reasoning_effort`.\n- Tool requests need `tool_stream: true`.\n\n### Qwen\n\n- One dialect uses top-level `enable_thinking`.\n- Another uses `chat_template_kwargs.enable_thinking`.\n\n### Anthropic-compatible format\n\n- Reasoning maps to Anthropic thinking enablement and thinking-budget tokens,\n not OpenAI-style fields.\n\n### DeepSeek reasoning history\n\n- DeepSeek-compatible reasoning models may require exact `reasoning_content`\n replay.\n- Some variants require replay on every assistant turn, not only tool-call turns.\n- Synthetic `\".\"` placeholders are acceptable for Kimi/OpenRouter-style compat,\n but not DeepSeek V4 exact replay.\n\n### Reasoning plus tool choice\n\n- DeepSeek reasoning models can reject `tool_choice` while thinking is enabled.\n- Kimi can reject forced tool choice while thinking is enabled.\n- Compat needs both policies: disable reasoning for any tool choice, and disable\n reasoning only for forced tool choice.\n\n### xAI Grok through Responses/SuperGrok\n\nKeep these independent:\n\n- omit `reasoning.effort`\n- include or drop encrypted reasoning replay\n- filter reasoning-history wrappers\n\nSome models reject only one of those fields; do not collapse them into one\n\"Grok mode\" branch.\n\n## 5. Normalize tools and schemas per endpoint\n\n### Strict tools\n\nStrict schemas are not a universal capability:\n\n- some providers support strict tools\n- some reject mixed strict/non-strict tools\n- some reject strictified schemas\n- OpenRouter Anthropic models can fail with “compiled grammar too large”\n\nRetry-without-strict should be a compat recovery policy scoped to the current\nsession/provider path.\n\n### Responses and Codex custom tools\n\nResponses and Codex both support freeform custom grammar tools for `apply_patch`.\nBoth disable request-level parallel tool calls when any custom grammar tool is\npresent. Responses additionally:\n\n- sanitizes schemas differently\n- quarantines invalid enum/const schema contradictions\n- repairs orphan tool outputs into assistant notes\n- synthesizes placeholder outputs for orphan tool calls\n\nCodex applies its own request transformation before sending.\n\n### Tool choice\n\nBefore emitting `tool_choice`:\n\n- confirm the endpoint supports it\n- downgrade forced choice to `auto` if forced choice is unsupported\n- drop `tool_choice: \"none\"` when no tools are emitted\n- drop forced named tool choice if that named tool was filtered out\n\n### Anthropic through LiteLLM/Bedrock\n\n- If history contains tool calls/results and `context.tools` is undefined, send\n `tools: []` as a sentinel.\n- If `context.tools = []`, treat it as explicit opt-out and do not emit the\n sentinel.\n\n### Mistral / Devstral\n\n- Tool-call ids must be exactly 9 alphanumeric characters.\n- Some flows need a synthetic assistant bridge after tool results before the next\n user message.\n\n### Custom tool outputs\n\nResponses/Codex must remember whether a call was `custom_tool_call`; the paired\noutput must then be `custom_tool_call_output`, not `function_call_output`.\n\n### MiniMax-compatible streaming arguments\n\nTool arguments can stream as objects instead of JSON strings. Deep-merge object\ndeltas, then emit one final concat-safe JSON delta.\n\n## 6. Convert messages and replay history safely\n\n- **System/developer roles.** Reasoning models may require `developer`. Some\n providers do not support `developer` and must downgrade to `user`. Some reject\n multiple system messages and need coalescing.\n- **Responses system prompts.** Responses usually uses top-level `instructions`.\n Reasoning models that support `developer` put system prompts inline as\n developer messages.\n- **Assistant content.** Some OpenAI-compatible backends mirror array content\n literally, so assistant content is normalized to a string. Tool-call replay may\n require `content: \"\"` or `content: \".\"` instead of `null`.\n- **Thinking replay.** Some models want thinking as visible text. Others need a\n provider-specific reasoning field. Some permit synthetic placeholders; others\n need exact replay.\n- **Vision.** If the model/provider cannot accept images, convert image input and\n tool-result images to placeholders. Some Qwen/Dashscope-compatible modes are\n text-only even when the high-level model is multimodal.\n- **Native Responses history.** Native provider payload replay is model-bound.\n Strip or normalize foreign reasoning signatures. Shared code normalizes\n Responses pipe-separated tool ids, hashes foreign item ids, and can filter\n reasoning history.\n\n## 7. Decode streams by provider behavior, not just schema\n\n- **Generic OpenAI-compatible streams.** Keepalive chunks, role-only deltas, and\n empty `choices: []` are not progress. Idle watchdogs must not sleep forever\n because of them.\n- **Mistral Medium 3.5-style content.** `delta.content` can be an array/object of\n text parts, not a string; normalize it to text.\n- **DeepSeek via NVIDIA/native/proxies.** Some endpoints leak chat-template\n markers like `<|...|>` into visible content. Buffering is required because\n markers can be split across chunks.\n- **DeepSeek/template-leak tool calls.** Some providers leak tool-call markup in\n text while also producing structured tool calls. Markup healing belongs in the\n stream decoder policy, not endpoint business logic.\n- **MiniMax-M3 cumulative reasoning.** Reasoning deltas may be cumulative\n snapshots. Deduplicate by reasoning field signature.\n- **Responses streams.** Route parallel items by `output_index`, `item_id`,\n call-id aliases, and prefixed `fc_` aliases. Tolerate missing\n `content_part.added` or `output_item.added`. Finalize pending tool calls at the\n terminal event.\n- **Terminal behavior.** Chat Completions can break after `finish_reason` plus\n usage. Responses breaks on `response.completed` or `response.incomplete`. Tool\n calls with `stop` promote to `toolUse`. Codex/Responses `end_turn:false` maps\n to `pause_turn`.\n- **Ollama length failures.** `finish_reason: length` with no visible content is\n treated as context-window failure and mapped to an error.\n\n## 8. Preserve usage and cost semantics\n\n- OpenRouter `prompt_tokens_details.cache_write_tokens` is billed differently:\n subtract it from input tokens and emit it as cache-write usage.\n- DeepSeek native `prompt_cache_miss_tokens` is the billed input portion, not a\n separate cache-write charge. Do not double-count it.\n- GitHub Copilot `premiumRequests` must survive when usage is populated or\n replaced.\n- Responses and Codex both adjust cost by resolved service tier, but Codex uses\n different multipliers.\n\n## 9. Implement recovery at the right boundary\n\n- **Strict tool fallback.** `400`/`422` schema or strict-tool failures should\n disable strict tools for the appropriate session scope and retry non-strict.\n- **OpenAI Responses stateful fallback.** Stale, invalid, or unsupported\n `previous_response_id` resets chain state and retries with full context. Zero\n Data Retention disables chaining immediately.\n- **Codex websocket fallback.** Websocket connection errors, stale sockets,\n connection limits, retry-budget exhaustion, or unsafe partial output can\n trigger reconnect or SSE replay.\n- **Codex whitespace tool-loop breaker.** Codex can stream whitespace-only\n tool-call argument deltas indefinitely. Cap events/chars, drop the degenerate\n partial tool call, and retry only when safe.\n- **Codex `previous_response_id` fallback.** Stale or unsupported ids are chain\n breaks and retry with full context, but only for websocket because SSE never\n chains.\n- **Provider retry before content.** Codex retries retryable provider stream\n errors only before user-visible content has been emitted.\n\n## 10. Checklist for a new constraint\n\nBefore adding a branch or compat field, answer these in order:\n\n1. Is this endpoint-family behavior, gateway behavior, model behavior, or request\n context behavior?\n2. Can it be represented by existing `compat` metadata?\n3. If not, is a new compat field better than a provider-name branch?\n4. Does the field need provider-level defaults, model-level overrides, or both?\n5. Does it interact with tools, images, reasoning, stateful Responses chains, or\n service tier?\n6. Can retry happen before visible text/tool calls only?\n7. Does usage accounting still preserve cache reads/writes, billed input, service\n tier multipliers, and provider-specific counters such as Copilot\n `premiumRequests`?\n",
54
+ "provider-streaming-internals.md": "# Provider streaming internals\n\nThis document explains how token/tool streaming is normalized in `@oh-my-pi/pi-ai`, then propagated through `@oh-my-pi/pi-agent-core` and `coding-agent` session events.\n\n## End-to-end flow\n\n1. `streamSimple()` (`packages/ai/src/stream.ts`) maps generic options and dispatches to a provider stream function.\n2. Provider stream functions translate provider-native stream events into the unified `AssistantMessageEvent` sequence. Current built-ins include Anthropic, OpenAI Responses/Completions/Codex/Azure Responses, Google Gemini/Gemini CLI/Vertex, Bedrock Converse, Ollama, Cursor, pi-native gateway transport, plus GitLab Duo/Kimi/Synthetic/xAI-Grok-Responses wrappers and extension-registered custom APIs.\n3. Each provider pushes events into `AssistantMessageEventStream` (`packages/ai/src/utils/event-stream.ts`), which exposes:\n - async iteration for incremental updates\n - `result()` for final `AssistantMessage`\n4. `agentLoop` (`packages/agent/src/agent-loop.ts`) consumes those events, mutates in-flight assistant state, and emits `message_update` events carrying the raw `assistantMessageEvent`.\n5. `AgentSession` (`packages/coding-agent/src/session/agent-session.ts`) subscribes to agent events, persists messages, drives extension hooks, and applies session behaviors (retry, compaction, TTSR, streaming-edit abort checks).\n\n## Unified stream contract in `@oh-my-pi/pi-ai`\n\nAll providers emit the same shape (`AssistantMessageEvent` in `packages/ai/src/types.ts`):\n\n- `start`\n- content block lifecycle triplets:\n - text: `text_start` → `text_delta`\\* → `text_end`\n - thinking: `thinking_start` → `thinking_delta`\\* → `thinking_end`\n - tool call: `toolcall_start` → `toolcall_delta`\\* → `toolcall_end`\n- terminal event:\n - `done` with `reason: \"stop\" | \"length\" | \"toolUse\"`\n - or `error` with `reason: \"aborted\" | \"error\"`\n\n`AssistantMessageEventStream` guarantees:\n\n- final result is resolved by terminal event (`done` or `error`)\n- events are delivered to consumers immediately, in push order (no batching or merging)\n\n## Delta throttling behavior\n\n`AssistantMessageEventStream` itself no longer throttles or merges delta events — every provider event is delivered as pushed. The per-delta cost control moved into tool-call argument parsing: providers accumulate partial JSON and re-parse it via `parseStreamingJsonThrottled()` (`packages/ai/src/utils/json-parse.ts`), which skips the re-parse until at least `STREAMING_JSON_PARSE_MIN_GROWTH` (256) new bytes have arrived, bounding mid-stream parse cost from quadratic to linear. The final `toolcall_end` parse is always unconditional and authoritative.\n\nThere is no provider backpressure: providers still produce at full speed, while the local stream queues.\n\n## Provider normalization details\n\n## Anthropic (`anthropic-messages`)\n\nSource: `packages/ai/src/providers/anthropic.ts`\n\nNormalization points:\n\n- `message_start` initializes usage (input/output/cache tokens)\n- `content_block_start` maps to text/thinking/toolcall starts\n- `content_block_delta` maps:\n - `text_delta` → `text_delta`\n - `thinking_delta` → `thinking_delta`\n - `input_json_delta` → `toolcall_delta`\n - `signature_delta` updates `thinkingSignature` only (no event)\n- `content_block_stop` emits corresponding `*_end`\n- `message_delta.stop_reason` maps via `mapStopReason()`\n\nTool-call argument streaming:\n\n- each tool block carries internal `partialJson`\n- every JSON delta appends to `partialJson`\n- `arguments` are reparsed on appended deltas via `parseStreamingJsonThrottled()` (re-parse only after ≥256 new bytes)\n- `toolcall_end` reparses once more, then strips `partialJson`\n\n## OpenAI Responses family (`openai-responses`, `openai-codex-responses`, `azure-openai-responses`)\n\nSources: `packages/ai/src/providers/openai-responses.ts`, `openai-codex-responses.ts`, and `azure-openai-responses.ts`\n\nNormalization points:\n\n- `response.output_item.added` starts reasoning/text/function-call/custom-tool blocks\n- reasoning summary events (`response.reasoning_summary_text.delta`) and raw reasoning events (`response.reasoning_text.delta`) become `thinking_delta`\n- output/refusal deltas become `text_delta`\n- `response.function_call_arguments.delta` and `response.custom_tool_call_input.delta` become `toolcall_delta`\n- `response.output_item.done` emits `thinking_end` / `text_end` / `toolcall_end`\n- `response.completed` maps status to stop reason and usage; `response.failed` / SDK `error` events throw into the wrapper's terminal `error` path\n\nTool-call argument streaming:\n\n- same `partialJson` accumulation pattern as Anthropic for function-call JSON arguments\n- custom tools stream raw string input and expose final arguments as `{ input: <raw> }`\n- providers that send only `response.function_call_arguments.done` still populate final args\n- tool call IDs are normalized as `\"<call_id>|<item_id>\"`\n\n## Google Generative AI (`google-generative-ai`)\n\nSource: `packages/ai/src/providers/google.ts` (thin request wrapper) and `google-shared.ts` (`streamGoogleGenAI`, shared chunk-to-block translation)\n\nNormalization points:\n\n- iterates `candidate.content.parts`\n- text parts are split into thinking vs text by `isThinkingPart(part)`\n- block transitions close previous block before starting a new one\n- `part.functionCall` is treated as a complete tool call (start/delta/end emitted immediately)\n- finish reason mapped by `mapStopReason()` from `google-shared.ts`\n\nTool-call argument streaming:\n\n- function call args arrive as structured object, not incremental JSON text\n- implementation emits one synthetic `toolcall_delta` containing `JSON.stringify(arguments)`\n- no partial JSON parser needed for Google in this path\n\n## Partial tool-call JSON accumulation and recovery\n\nShared behavior for Anthropic/OpenAI Responses uses `parseStreamingJson()` / `parseStreamingJsonThrottled()` (`packages/ai/src/utils/json-parse.ts`):\n\n1. try `JSON.parse`\n2. fallback to `repairJson()` + the `partial-json` parser for incomplete fragments\n3. if both fail, return `{}`\n\nImplications:\n\n- malformed or truncated argument deltas do not crash stream processing immediately\n- in-progress `arguments` may temporarily be `{}`\n- later valid deltas can recover structured arguments because parsing is retried as the buffer grows (throttled to ≥256-byte growth steps mid-stream)\n- final `toolcall_end` performs one more parse attempt before emission\n\n## Stop reasons vs transport/runtime errors\n\nProvider stop reasons are mapped to normalized `stopReason`:\n\n- Anthropic: `end_turn`→`stop`, `max_tokens`→`length`, `tool_use`→`toolUse`, safety/refusal cases→`error`\n- OpenAI Responses: `completed`→`stop`, `incomplete`→`length`, `failed/cancelled`→`error`\n- Google: `STOP`→`stop`, `MAX_TOKENS`→`length`, safety/prohibited/malformed-function-call classes→`error`\n\nError semantics are split in two stages:\n\n1. **Model completion semantics** (provider reported finish reason/status)\n2. **Transport/runtime failure** (network/client/parser/abort exceptions)\n\nIf provider stream throws or signals failure, each provider wrapper catches and emits terminal `error` event with:\n\n- `stopReason = \"aborted\"` when abort signal is set\n- otherwise `stopReason = \"error\"`\n- `errorMessage = finalizeErrorMessage(error, rawRequestDump)` (`packages/ai/src/utils/http-inspector.ts`), which wraps `formatErrorMessageWithRetryAfter()` and appends any captured HTTP-error body / raw-request dump (the `cursor` wrapper calls `formatErrorMessageWithRetryAfter()` directly)\n\n## Malformed chunk / SSE parse failure behavior\n\nThe OpenAI Completions/Responses paths use the in-repo HTTP+SSE transport `postOpenAIStream()` (`packages/ai/src/utils/openai-http.ts`), which decodes frames with `readSseJson()` and replaced the `openai` SDK client. Anthropic uses the in-repo `AnthropicMessagesClient` (`packages/ai/src/providers/anthropic-client.ts`); the Google paths and the Codex SSE fallback read SSE via `readSseJson()` directly, and websocket Codex frames are normalized through the same event handler.\n\nObserved behavior in current implementation:\n\n- malformed SSE framing or chunk JSON surfaces as an exception or stream `error` event\n- malformed Codex SSE JSON/framing throws from the local SSE reader\n- provider wrapper converts failures into unified terminal `error` events\n- no provider-specific resume/retry inside the stream function itself, except Codex websocket-to-SSE transport fallback before replay-unsafe output is emitted\n- higher-level retries are handled in `AgentSession` auto-retry logic (message-level retry, not stream-chunk replay)\n\n## Cancellation boundaries\n\nCancellation is layered:\n\n- AI provider request: `options.signal` is passed into provider client stream call.\n- Provider wrapper: after stream loop, aborted signal forces error path (`\"Request was aborted\"`).\n- Agent loop: checks `signal.aborted` before handling each provider event and can synthesize an aborted assistant message from the latest partial.\n- Session/agent controls: `AgentSession.abort()` -> `agent.abort()` -> shared abort controller cancellation.\n\nTool execution cancellation is separate from model stream cancellation:\n\n- tool runners use `AbortSignal.any([agentSignal, steeringAbortSignal])`\n- steering interrupts can abort remaining tool execution while preserving already-produced tool results\n\n## Backpressure boundaries\n\nThere is no hard backpressure mechanism between provider SDK stream and downstream consumers:\n\n- `EventStream` uses in-memory queues with no max size\n- the throttled partial-JSON re-parse reduces per-delta CPU cost but does not slow provider intake\n- if consumers lag significantly, queued events can grow until completion\n\nCurrent design favors responsiveness and simple ordering over bounded-buffer flow control.\n\n## How stream events surface as agent/session events\n\n`agentLoop.streamAssistantResponse()` bridges `AssistantMessageEvent` to `AgentEvent`:\n\n- on `start`: pushes placeholder assistant message and emits `message_start`\n- on block events (`text_*`, `thinking_*`, `toolcall_*`): updates last assistant message, emits `message_update` with raw `assistantMessageEvent`\n- on terminal (`done`/`error`): resolves final message from `response.result()`, emits `message_end`\n\n`AgentSession` then consumes those events for session-level behaviors:\n\n- TTSR watches `message_update.assistantMessageEvent` for `text_delta`, `thinking_delta`, and `toolcall_delta`\n- streaming edit guard inspects `toolcall_delta`/`toolcall_end` on `edit` calls and can abort early\n- persistence writes finalized messages at `message_end`\n- auto-retry examines assistant `stopReason === \"error\"` plus `errorMessage` heuristics\n\n## Unified vs provider-specific responsibilities\n\nUnified (common contract):\n\n- event shape (`AssistantMessageEvent`)\n- final result extraction (`done`/`error`)\n- immediate in-order event delivery\n- agent/session event propagation model\n\nProvider-specific (not fully abstracted):\n\n- upstream event taxonomies and mapping logic\n- stop-reason translation tables\n- tool-call ID conventions\n- reasoning/thinking block semantics and signatures\n- usage token semantics and availability timing\n- message conversion constraints per API\n\n## Implementation files\n\n- [`../../ai/src/stream.ts`](../packages/ai/src/stream.ts) — provider dispatch, option mapping, API key/session plumbing, custom API dispatch, and provider-specific credential handling.\n- [`../../ai/src/utils/event-stream.ts`](../packages/ai/src/utils/event-stream.ts) — generic stream queue + final-result resolution.\n- [`../../ai/src/utils/json-parse.ts`](../packages/ai/src/utils/json-parse.ts) — partial JSON parsing for streamed tool arguments.\n- [`../../ai/src/providers/anthropic.ts`](../packages/ai/src/providers/anthropic.ts) — Anthropic event translation and tool JSON delta accumulation.\n- [`../../ai/src/providers/openai-responses.ts`](../packages/ai/src/providers/openai-responses.ts), [`openai-shared.ts`](../packages/ai/src/providers/openai-shared.ts), [`openai-codex-responses.ts`](../packages/ai/src/providers/openai-codex-responses.ts), [`azure-openai-responses.ts`](../packages/ai/src/providers/azure-openai-responses.ts) — Responses-family event translation and status mapping.\n- [`../../ai/src/providers/google.ts`](../packages/ai/src/providers/google.ts), [`google-gemini-cli.ts`](../packages/ai/src/providers/google-gemini-cli.ts), [`google-vertex.ts`](../packages/ai/src/providers/google-vertex.ts) — Gemini stream chunk-to-block translation variants.\n- [`../../ai/src/providers/google-shared.ts`](../packages/ai/src/providers/google-shared.ts) — Gemini finish-reason mapping and shared conversion rules.\n- [`../../ai/src/providers/amazon-bedrock.ts`](../packages/ai/src/providers/amazon-bedrock.ts), [`openai-completions.ts`](../packages/ai/src/providers/openai-completions.ts), [`ollama.ts`](../packages/ai/src/providers/ollama.ts), [`cursor.ts`](../packages/ai/src/providers/cursor.ts), [`pi-native-client.ts`](../packages/ai/src/providers/pi-native-client.ts) — additional built-in stream adapters using the same event contract.\n- [`../../agent/src/agent-loop.ts`](../packages/agent/src/agent-loop.ts) — provider stream consumption and `message_update` bridging.\n- [`../src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts) — session-level handling of streaming updates, abort, retry, and persistence.\n",
55
+ "providers.md": "# Providers\n\nProviders are the model backends `omp` can route requests to: Anthropic, OpenAI, Google Gemini, Groq, OpenRouter, Mistral, xAI, local engines like Ollama, hosted gateways, custom `models.yml` providers, and providers registered by extensions.\n\nA **provider** is the account or backend namespace, such as `anthropic`, `openai`, `google`, or `ollama`. A **model** is a concrete model under that provider, selected as `provider/model-id`, such as `anthropic/claude-opus-4-6`. Disabling a provider removes every model under it from selection; if you only want to narrow individual models, use model settings instead.\n\nThis page covers how providers become available, how credentials are resolved, the provider/environment-variable map, local engines, disabling providers, and custom providers. For endpoint-specific request, reasoning, tool, stream, usage, and retry constraints, see [Provider endpoint constraints](./provider-endpoint-constraints.md). For model selection and the full `models.yml` schema, see [Model and Provider Configuration](./models.md). For config-file locations and merge precedence, see [Settings](./settings.md). For credential storage and login flows in depth, see [Secrets and credentials](./secrets.md). For the complete environment-variable reference, see [Environment variables](./environment-variables.md). For local engine setup, see [Local models](./local-models.md). For context-file discovery providers, see [Context files](./context-files.md).\n\n## How `omp` decides a provider is available\n\nAt startup the model registry assembles its catalog from four sources, in order:\n\n1. The bundled model catalog (every built-in provider and its known models).\n2. Custom provider and model entries from `~/.omp/agent/models.yml`.\n3. Runtime-discovered models for providers that support discovery (local engines and discovery-enabled gateways).\n4. Providers and models registered by extensions.\n\nThe registry can hold a model even when it is not currently selectable. A model becomes **available** only when both conditions hold:\n\n1. its provider ID is **not** in the effective `disabledProviders` list; **and**\n2. the provider is either **keyless** (an implicit local provider, or a custom provider with `auth: none`) **or** has resolvable credentials.\n\n`disabledProviders` is checked *before* credentials. If a provider ID is disabled, no stored key, OAuth session, environment variable, `.env` entry, or `models.yml` `apiKey` will make it selectable — the provider's models are dropped from availability regardless of credentials. Removing the ID from the effective list restores them.\n\nKeyless local engines are a special case: `ollama`, `llama.cpp`, and `lm-studio` are treated as keyless when no key is configured, so their discovered models are selectable as soon as the engine answers — no login required. See [Built-in local engines](#built-in-local-engines).\n\n## Credentials and precedence\n\nWhen a provider needs an API key, `omp` resolves it in this order (first match wins):\n\n1. **Runtime override** — a key supplied for the current process, e.g. CLI `--api-key`. Never persisted.\n2. **`models.yml` config key** — an `apiKey` pinned on a custom provider, registered as a config-sourced bearer. This deliberately beats stored OAuth, so a key supplied for a custom `baseUrl`/gateway is honored instead of forwarding an upstream OAuth token the proxy would reject.\n3. **Stored API key** — an API-key credential saved in the auth store.\n4. **Stored OAuth credential** — refreshed when needed; multiple accounts are ranked/rotated automatically.\n5. **Provider environment variable** — including values loaded from `.env` files (see [the env-var table](#environment-variables-and-env-files)).\n6. **`models.yml` fallback resolver** — keys for custom providers not otherwise registered.\n\nStored credentials live in the auth store at `~/.omp/agent/agent.db` for local auth, or in the configured auth-broker snapshot when running in broker mode. (`PI_CODING_AGENT_DIR` relocates the `~/.omp/agent` base, and the auth store moves with it.)\n\n### OAuth vs API key, and provider-scoped logins\n\nLogins are **provider-scoped**: authenticating `anthropic` does not authenticate `openai`, and each provider tracks its own credentials. A disabled provider stays disabled even with valid stored auth.\n\nUse the interactive slash commands inside a session:\n\n- `/login` — opens the OAuth/key selector. `/login <provider>` jumps straight to one provider (e.g. `/login anthropic`); for an OAuth flow that needs a pasted callback, run `/login <redirect-url>` to complete it.\n- `/logout` — opens the provider selector to remove stored credentials.\n\nFor headless or remote setups backed by a shared auth broker, the CLI exposes `omp auth-broker login <provider>` / `omp auth-broker logout` (and `status`, `list`, `import`, `migrate`). See [Secrets and credentials](./secrets.md) for the broker model.\n\nWhen a model has no credentials, `omp` tells you to run `/login` or set the provider's environment variable.\n\n### Pinning a key in `models.yml`\n\nA custom provider's `apiKey` is resolved as **environment-variable-name-or-literal**: if the value names an existing environment variable, that variable's value is used; otherwise the string itself is the key. Prefixing the value with `!` runs it as a shell command and uses the trimmed stdout (see [Model and Provider Configuration](./models.md) for the full value syntax).\n\n```yaml\n# ~/.omp/agent/models.yml\nproviders:\n my-gateway:\n baseUrl: https://gateway.example.com/v1\n api: openai-completions\n apiKey: MY_GATEWAY_API_KEY # reads this env var if set, else literal text\n models:\n - id: claude-sonnet\n name: Claude Sonnet via Gateway\n contextWindow: 200000\n maxTokens: 8192\n```\n\nIf `authHeader: true` is set on a custom provider, the resolved key is injected as an `Authorization: Bearer <key>` header on every request to that provider.\n\n## Environment variables and `.env` files\n\nEach provider has one or more environment variables that supply a key when no stored credential exists. The table below is the verified provider → variable map; the full catalog is large, so it is split into core and additional providers. OAuth-backed providers can also accept a token variable in addition to (or instead of) an API key.\n\n### Core providers\n\n| Provider ID | Environment variable(s) |\n|---|---|\n| `anthropic` | `ANTHROPIC_OAUTH_TOKEN`, then `ANTHROPIC_API_KEY` (Foundry mode prefers `ANTHROPIC_FOUNDRY_API_KEY` when `CLAUDE_CODE_USE_FOUNDRY=true`) |\n| `openai` | `OPENAI_API_KEY` |\n| `openai-codex` | `OPENAI_CODEX_OAUTH_TOKEN` |\n| `google` | `GEMINI_API_KEY` |\n| `google-vertex` | `GOOGLE_CLOUD_API_KEY`, or Application Default Credentials (`GOOGLE_APPLICATION_CREDENTIALS` + `GOOGLE_CLOUD_PROJECT` + `GOOGLE_CLOUD_LOCATION`) |\n| `groq` | `GROQ_API_KEY` |\n| `openrouter` | `OPENROUTER_API_KEY` |\n| `mistral` | `MISTRAL_API_KEY` |\n| `xai` | `XAI_API_KEY` |\n| `xai-oauth` | `XAI_OAUTH_TOKEN`, then `XAI_API_KEY` |\n| `github-copilot` | `COPILOT_GITHUB_TOKEN` |\n| `cursor` | `CURSOR_ACCESS_TOKEN` |\n| `azure` | `AZURE_OPENAI_API_KEY` |\n| `amazon-bedrock` | `AWS_PROFILE`, or `AWS_ACCESS_KEY_ID` + `AWS_SECRET_ACCESS_KEY`, or an ECS/IRSA credential chain |\n\n### Additional hosted providers\n\n| Provider ID | Environment variable(s) |\n|---|---|\n| `cerebras` | `CEREBRAS_API_KEY` |\n| `deepseek` | `DEEPSEEK_API_KEY` |\n| `fireworks` | `FIREWORKS_API_KEY` |\n| `together` | `TOGETHER_API_KEY` |\n| `nvidia` | `NVIDIA_API_KEY` |\n| `huggingface` | `HUGGINGFACE_HUB_TOKEN`, then `HF_TOKEN` |\n| `moonshot` | `MOONSHOT_API_KEY` |\n| `nanogpt` | `NANO_GPT_API_KEY` |\n| `venice` | `VENICE_API_KEY` |\n| `vercel-ai-gateway` | `AI_GATEWAY_API_KEY` (also `VERCEL_AI_GATEWAY_API_KEY` for catalog discovery) |\n| `cloudflare-ai-gateway` | `CLOUDFLARE_AI_GATEWAY_API_KEY` |\n| `litellm` | `LITELLM_API_KEY`; optional `LITELLM_BASE_URL` for the proxy endpoint |\n| `kilo` | `KILO_API_KEY` |\n| `zai` | `ZAI_API_KEY` |\n| `zenmux` | `ZENMUX_API_KEY` |\n| `zhipu-coding-plan` | `ZHIPU_API_KEY` |\n| `umans` | `UMANS_AI_CODING_PLAN_API_KEY` |\n| `qianfan` | `QIANFAN_API_KEY` |\n| `qwen-portal` | `QWEN_OAUTH_TOKEN`, then `QWEN_PORTAL_API_KEY` |\n| `synthetic` | `SYNTHETIC_API_KEY` |\n| `minimax` | `MINIMAX_API_KEY` |\n| `alibaba-coding-plan` | `ALIBABA_CODING_PLAN_API_KEY` |\n| `aimlapi` | `AIMLAPI_API_KEY` |\n| `gitlab-duo` | `GITLAB_TOKEN` |\n| `opencode-zen`, `opencode-go` | `OPENCODE_API_KEY` |\n| `firepass` | `FIREPASS_API_KEY` |\n| `wafer-pass` | `WAFER_PASS_API_KEY` |\n| `wafer-serverless` | `WAFER_SERVERLESS_API_KEY` |\n| `xiaomi` | `XIAOMI_API_KEY` |\n| `ollama-cloud` | `OLLAMA_CLOUD_API_KEY` |\n| `ollama` | `OLLAMA_API_KEY` (optional; local discovery is keyless by default) |\n| `lm-studio` | `LM_STUDIO_API_KEY` (optional; keyless by default) |\n| `llama.cpp` | `LLAMA_CPP_API_KEY` (only when the server requires auth) |\n\nOAuth-backed providers such as `anthropic`, `github-copilot`, `cursor`, `ollama-cloud`, `qwen-portal`, `kimi-code`, `xai-oauth`, `wafer-pass`, `wafer-serverless`, `google-gemini-cli`, and `google-antigravity` are normally reached through `/login` rather than an environment variable. See [Environment variables](./environment-variables.md) for search-tool and configuration variables not listed here.\n\n### `.env` discovery and precedence\n\n`omp` eagerly loads `.env` files into the process environment before any provider lookup. It reads four files and, for each variable, the **first** source that defines it wins. Effective precedence, high to low:\n\n1. The process environment inherited by `omp` (already-set variables always win).\n2. `<cwd>/.env`\n3. `~/.omp/agent/.env`\n4. `~/.omp/.env`\n5. `~/.env`\n\nA variable already present in the process environment is never overwritten by a `.env` file. Among the files, a value set in `<cwd>/.env` wins over `~/.omp/agent/.env`, which wins over `~/.omp/.env`, which wins over `~/.env`. So a shell-exported `OPENAI_API_KEY` beats every `.env` file, and a project's `<cwd>/.env` beats your home `~/.env`.\n\nProject-local `.env` is the simplest way to make one repository use a project-specific gateway, key, or local endpoint:\n\n```dotenv\n# <project>/.env\nOPENROUTER_API_KEY=sk-or-...\nOLLAMA_BASE_URL=http://127.0.0.1:11434\n```\n\n`.env` parsing is intentionally minimal:\n\n- blank lines and lines starting with `#` are ignored;\n- keys must match `[A-Za-z_][A-Za-z0-9_]*` (shell-identifier shape) — other names are dropped;\n- values may be wrapped in single or double quotes, which are stripped;\n- values containing a NUL byte are dropped;\n- an `OMP_`-prefixed key is also mirrored to the matching `PI_`-prefixed name.\n\n## Built-in local engines\n\nThree local engines are discovered automatically without needing a `models.yml` entry. Each uses a base URL that can be overridden by an environment variable:\n\n| Provider ID | Base URL (env override → default) | Notes |\n|---|---|---|\n| `ollama` | `OLLAMA_BASE_URL`, then `OLLAMA_HOST` (normalized), else `http://127.0.0.1:11434` | Keyless by default. |\n| `llama.cpp` | `LLAMA_CPP_BASE_URL`, else `http://127.0.0.1:8080` | Keyless unless a key is stored for `llama.cpp`. |\n| `lm-studio` | `LM_STUDIO_BASE_URL`, else `http://127.0.0.1:1234/v1` | Keyless by default. |\n\nThese implicit engines are **skipped** when:\n\n- a provider with the same ID is already configured in `models.yml` (your explicit config wins); or\n- the provider ID appears in the effective `disabledProviders` list.\n\nFor installing and running these engines, see [Local models](./local-models.md).\n\n## Disabling model providers\n\nUse the `disabledProviders` setting to remove a provider's models from selection:\n\n```yaml\n# ~/.omp/agent/config.yml or <project>/.omp/config.yml\ndisabledProviders:\n - anthropic\n - openai\n - google\n - groq\n```\n\nProvider IDs are matched exactly. Disable `google` to hide the Google Gemini API provider; the OAuth-backed Google providers `google-gemini-cli` and `google-antigravity` are separate IDs and must be disabled individually. Disable `ollama`, `llama.cpp`, or `lm-studio` to stop local discovery for that engine.\n\n`disabledProviders` applies uniformly to:\n\n- bundled catalog providers;\n- custom `models.yml` providers;\n- runtime-discovered provider models;\n- extension-registered providers;\n- implicit local engines.\n\nDisabling a provider does not delete its stored credentials — re-enable it by removing its ID from the effective list.\n\n## Project-specific provider control\n\nProject settings live in `<project>/.omp/config.yml`. Use them when one repository must allow or hide a different provider set than your global default:\n\n```yaml\n# <project>/.omp/config.yml\ndisabledProviders:\n - openai\n - openrouter\n```\n\nSettings arrays are **replaced** wholesale by the higher-precedence layer, not merged or appended. If the global file disables three providers and the project file disables one, the project sees only the project list:\n\n```yaml\n# ~/.omp/agent/config.yml\ndisabledProviders:\n - anthropic\n - openai\n - google\n\n# <project>/.omp/config.yml\ndisabledProviders:\n - groq\n```\n\nEffective result inside the project:\n\n```json\n[\"groq\"]\n```\n\nThe project array re-enables `anthropic`, `openai`, and `google` for sessions launched from that project. If you want a project to *add* to the global set, repeat the global IDs in the project file. See [Settings](./settings.md) for the full precedence chain, including `--config` overlays and runtime overrides.\n\n## Path-scoped `disabledProviders`\n\n`disabledProviders` can mix plain string entries (apply everywhere) with path-scoped entries (apply only when the current working directory matches a configured path):\n\n```yaml\ndisabledProviders:\n - ollama\n - path: ~/projects/sensitive\n providers:\n - anthropic\n - openai\n - paths:\n - ~/work/client-a\n - ~/work/client-b\n values:\n - openrouter\n```\n\n- Bare string entries always apply.\n- A scoped entry applies when the current working directory **is** the configured path or sits **under** it. `~` expands to the home directory.\n- Accepted path keys: `path`, `paths`, `pathPrefix`, `pathPrefixes`.\n- Accepted value keys: `providers`, `values`, `items`.\n\nFor the example above:\n\n- `ollama` is disabled everywhere.\n- `anthropic` and `openai` are additionally disabled under `~/projects/sensitive`.\n- `openrouter` is additionally disabled under `~/work/client-a` and `~/work/client-b`.\n\nPath scopes are resolved **after** the settings merge. Because a higher-precedence layer replaces the whole array, a project-level `disabledProviders` array drops any scoped entries that only existed in the global array. `enabledModels` is the only other setting that supports the same path-scoped form. See [Settings](./settings.md) for details.\n\n## Provider IDs vs discovery provider IDs\n\n`disabledProviders` uses a **single shared ID namespace** that gates two different subsystems:\n\n- **Model providers** — the backends on this page (`anthropic`, `openai`, `ollama`, a custom `models.yml` ID, …). Disabling one removes its models from selection.\n- **Discovery providers** — sources of context files, MCP servers, commands, skills, hooks, tools, prompts, and settings. Disabling one stops that source from contributing capability items.\n\n| Entry type | Examples | Effect |\n|---|---|---|\n| Model provider ID | `anthropic`, `openai`, `google`, `groq`, `openrouter`, `ollama`, `my-gateway` | Removes that provider's models from availability. |\n| Discovery provider ID | `native`, `claude`, `codex`, `gemini`, `agents`, `github` | Stops that discovery source from contributing capability items. |\n\nWatch the related names. The Google Gemini **API** models use the model provider ID `google`; `gemini` is a **discovery** provider ID (the source that reads `GEMINI.md`), not the Google model provider. Use discovery IDs only when you intend to disable an entire config source. See [Context files](./context-files.md) for the discovery-provider side.\n\n## Custom providers in `models.yml`\n\nCustom providers live in `~/.omp/agent/models.yml` under `providers:`. A provider ID defined there participates in the same selection, credential resolution, and `disabledProviders` rules as built-in providers.\n\nMinimal OpenAI-compatible provider:\n\n```yaml\nproviders:\n my-openai-compatible:\n baseUrl: https://api.example.com/v1\n api: openai-completions\n apiKey: MY_OPENAI_COMPATIBLE_KEY # env-var-name or literal\n models:\n - id: fast-chat\n name: Fast Chat\n contextWindow: 128000\n maxTokens: 8192\n```\n\nKeyless local provider (no credentials required):\n\n```yaml\nproviders:\n local-proxy:\n baseUrl: http://127.0.0.1:4000/v1\n api: openai-completions\n auth: none\n models:\n - id: local-model\n name: Local Model\n contextWindow: 32768\n maxTokens: 4096\n```\n\nDiscovery-enabled provider (models fetched from the endpoint at runtime):\n\n```yaml\nproviders:\n team-proxy:\n baseUrl: https://models.example.com/v1\n apiKey: TEAM_PROXY_API_KEY\n authHeader: true # send Authorization: Bearer <resolved key>\n disableStrictTools: true\n discovery:\n type: proxy\n```\n\nFor the full schema, all allowed `api` values, discovery `type`s, model overrides, and equivalence settings, see [Model and Provider Configuration](./models.md).\n\nTo disable a custom provider, list its ID exactly:\n\n```yaml\ndisabledProviders:\n - my-openai-compatible\n - team-proxy\n```\n\n## Troubleshooting\n\n**A provider's models are not selectable.** Confirm the provider has credentials (`/login <provider>`, an exported environment variable, or a `models.yml` `apiKey`) and that its ID is not in the effective `disabledProviders` list. Remember the rule: not disabled **and** (keyless **or** has credentials). Keyless local engines only appear once the engine is actually running and responding.\n\n**The wrong key is being used (a stale key from `.env`).** Resolution favors runtime `--api-key`, then a `models.yml` config key, then stored credentials, then environment/`.env`. An already-set process environment variable also beats every `.env` file, and `<cwd>/.env` beats `~/.env`. If an unexpected key wins, check for an exported shell variable and the four `.env` files in precedence order, and clear the one that should not apply.\n\n**A provider still appears even though I disabled it.** `disabledProviders` arrays are replaced, not merged: a project `<project>/.omp/config.yml` array fully overrides the global one. Verify the *effective* list for the directory you are in (path-scoped entries only apply at or under their configured path), and confirm the ID is spelled exactly. Use `omp config get disabledProviders` to inspect the merged value (see [Settings](./settings.md)).\n\n**A discovery provider name had no effect on models (or vice-versa).** The ID namespace is shared. `gemini`, `codex`, `claude`, `native`, and `agents` are discovery-source IDs; the Google model backend is `google`. Make sure you are disabling the right kind of provider.\n\n**A custom `models.yml` provider does not load.** A YAML or schema error makes the registry skip the custom file. Validate the file with `omp models` (use `omp models find <substr>` to scope it to one provider), confirm each provider has a `baseUrl`, a valid `api`, and at least one model entry, and that an implicit local engine is not silently shadowing it (an explicit `ollama`/`lm-studio`/`llama.cpp` entry replaces the built-in discovery for that ID). See [Model and Provider Configuration](./models.md).\n",
54
56
  "python-repl.md": "# Eval Tool Python Backend\n\nThis document describes the Python execution stack in `packages/coding-agent`.\nIt covers tool behavior, runner lifecycle, environment handling, execution semantics, output rendering, supported magics, and operational failure modes.\n\n## Scope and Key Files\n\n- Tool surface: `src/tools/eval.ts`\n- Session/per-call kernel orchestration: `src/eval/py/executor.ts`\n- Subprocess kernel client: `src/eval/py/kernel.ts`\n- Python wrapper / NDJSON server: `src/eval/py/runner.py`\n- Prelude helpers loaded into every kernel: `src/eval/py/prelude.py`\n- Host-side subagent helper bridge: `src/eval/agent-bridge.ts`\n- MIME bundle renderer (text + structured outputs): `src/eval/py/display.ts`\n- Interactive-mode renderer for user-triggered Python runs: `src/modes/components/eval-execution.ts`\n- Runtime/env filtering and Python resolution: `src/eval/py/runtime.ts`\n\n## What eval's Python backend is\n\nThe `eval` tool executes one or more Python cells inside a retained `python` subprocess that speaks NDJSON over stdin/stdout. No Jupyter gateway and no extra pip dependencies are required — a vanilla Python 3.8+ interpreter is enough. Rich `display()` output (PIL, pandas, plotly, matplotlib figures) keeps working because the wrapper implements MIME-bundle dispatch.\n\nTool params:\n\n```ts\n{\n cells: Array<{\n language: \"py\" | \"js\";\n code: string;\n title?: string;\n timeout?: number; // seconds, clamped to 1..3600, default 30. Inactivity budget — see \"Cell timeout\".\n reset?: boolean; // reset this cell's selected runtime before execution\n }>;\n}\n```\n\nThe tool is `concurrency = \"exclusive\"` for a session, so calls do not overlap.\n\n## Kernel lifecycle\n\nEach Python kernel is a single subprocess: `<resolved-python> -u <runner.py>`. The runner is bundled with the host binary (Bun text import), written to an `omp-python-runner` cache under the OS temp directory once per script hash, and reused by subsequent spawns.\n\nKernel startup sequence:\n\n1. Availability check (`checkPythonKernelAvailability`) — verifies that a Python interpreter resolves and runs.\n2. Spawn `python -u runner.py` with filtered env and `cwd`.\n3. Send an init request that runs `os.chdir(cwd)`, injects env entries, and adds `cwd` to `sys.path`.\n4. Execute `PYTHON_PRELUDE` (idempotent — only initializes once per process).\n\nKernel shutdown:\n\n- Send `{\"type\": \"exit\"}` over stdin.\n- Wait for process exit with `SHUTDOWN_GRACE_MS` budget.\n- Escalate to `SIGTERM` and finally `SIGKILL` if the process does not exit in time.\n\n## Wire protocol (NDJSON, host ↔ runner)\n\nOne JSON object per line, UTF-8, `\\n` terminated.\n\nHost → runner:\n\n```jsonc\n{\"id\": \"<reqId>\", \"code\": \"<source>\", \"silent\": false, \"storeHistory\": true, \"cwd\": \"<optional>\", \"env\": {\"KEY\": \"VAL\"}}\n{\"type\": \"exit\"}\n```\n\nRunner → host:\n\n```jsonc\n{\"type\": \"started\", \"id\": \"<reqId>\"}\n{\"type\": \"stdout\", \"id\": \"<reqId>\", \"data\": \"...\"}\n{\"type\": \"stderr\", \"id\": \"<reqId>\", \"data\": \"...\"}\n{\"type\": \"display\", \"id\": \"<reqId>\", \"bundle\": {<mime>: <value>}}\n{\"type\": \"result\", \"id\": \"<reqId>\", \"bundle\": {<mime>: <value>}}\n{\"type\": \"error\", \"id\": \"<reqId>\", \"ename\": \"...\", \"evalue\": \"...\", \"traceback\": [\"...\"]}\n{\"type\": \"done\", \"id\": \"<reqId>\", \"status\": \"ok\"|\"error\", \"executionCount\": N, \"cancelled\": false}\n```\n\nStatus events the prelude emits (e.g. `_emit_status(\"find\", count=…)`) ship inside display bundles under `application/x-omp-status` so the existing TUI status renderer keeps working.\n\n## Magics\n\nThe runner's source transformer rewrites IPython-style magics to plain Python calls before parsing. Supported set:\n\n| Magic | Effect |\n| --------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| `%pip <args>` | `python -m pip <args>` with live streaming output. Newly installed packages are evicted from `sys.modules` so the next `import` picks up the fresh install. |\n| `%cd <path>` | `os.chdir(path)` (with `~` expansion); emits status event. |\n| `%pwd` | Returns `os.getcwd()`. |\n| `%ls [path]` | Returns `sorted(os.listdir(path))`. |\n| `%env [KEY[=VAL]]` | List, read, or set env vars (matches prelude `env()` semantics). |\n| `%set_env KEY VALUE` | Set `os.environ[KEY]`. |\n| `%time <expr>` / `%timeit <expr>` | Time the expression; emits status event with elapsed ms. |\n| `%who` / `%whos` | List user-namespace names. |\n| `%reset` | Clear user globals and re-inject prelude. |\n| `%load <path>` | Read a file into a fresh cell and execute. |\n| `%run <path>` | `runpy.run_path` and merge globals back. |\n| `%%bash` / `%%sh` | Run the cell body via `bash`/`sh`. |\n| `%%capture [name]` | Run body with stdout/stderr captured into `name`. |\n| `%%timeit` | Time the cell body. |\n| `%%writefile <path>` | Write body to file. |\n| `!cmd` / `var = !cmd` | Run command via subprocess shell; returns an SList-style result with `.n` / `.s` helpers. |\n| `var = %name args` | Assignment forms work for line magics and `!cmd`. |\n\nUnknown magic names raise `NameError: UsageError: ...` inside the cell.\n\n## Session persistence semantics\n\n`python.kernelMode` controls retained kernel reuse:\n\n- `session` (default)\n - Reuses kernel sessions keyed by namespaced eval session id plus normalized cwd and interpreter.\n - Multiple owners can share the same retained kernel for that key.\n - Calls through the tool are exclusive, so tool invocations do not overlap.\n - A dead retained subprocess is replaced before execution.\n - If the subprocess dies during execution, it is replaced and the cell is retried once.\n- `per-call`\n - Spawns a fresh subprocess for each request.\n - Shuts the subprocess down after the request.\n - No cross-call state persistence.\n\n### Multi-cell behavior in a single tool call\n\nPython cells run sequentially in the same selected Python kernel instance for that tool call.\n\nIf an intermediate cell fails:\n\n- Earlier cell state remains in memory.\n- Tool returns a targeted error indicating which cell failed.\n- Later cells are not executed.\n\n`reset=true` is per cell and resets that language runtime before the cell executes.\n\n## Environment filtering and runtime resolution\n\nEnvironment is filtered before launching the runner:\n\n- Allowlist includes core vars like `PATH`, `HOME`, locale vars, `VIRTUAL_ENV`, `PYTHONPATH`, etc.\n- Allow-prefixes: `LC_`, `XDG_`, `PI_`\n- Denylist strips common API keys (OpenAI/Anthropic/Gemini/etc.)\n\nRuntime selection order (skipped entirely when the `python.interpreter` setting names an explicit executable):\n\n1. Active/located venv (`VIRTUAL_ENV`, then `CONDA_PREFIX`, then `<cwd>/.venv`, `<cwd>/venv`)\n2. Managed venv at `~/.omp/python-env`\n3. `python` or `python3` on PATH\n\nWhen a venv is selected, its bin/Scripts path is prepended to `PATH`.\n\nThe runner additionally receives `PYTHONUNBUFFERED=1` and `PYTHONIOENCODING=utf-8` so streamed output reaches the host promptly.\n\n## Tool availability and mode selection\n\n`eval.py` / `eval.js` (both default `true`) plus optional boolean env flags `PI_PY` / `PI_JS` control eval backend exposure:\n\n- Python backend only (`eval.py=true`, `eval.js=false`, or `PI_PY=1 PI_JS=0`)\n- JavaScript backend only (`eval.py=false`, `eval.js=true`, or `PI_PY=0 PI_JS=1`)\n- both backends (`eval.py=true`, `eval.js=true`, or `PI_PY=1 PI_JS=1`)\n\n`PI_PY` and `PI_JS` use normal boolean flag parsing. Each flag, when set, overrides only its own setting; an unset flag falls back to its setting (`eval.py` / `eval.js`, both default `true`).\n\nIf Python preflight fails and `eval.js` is enabled, `eval` remains available for `js` cells; `py` cells fail with a Python-backend availability error.\n\nPython prelude helpers include `agent(prompt, *, agent_type=\"task\", model=None, label=None, schema=None, return_handle=False)`. It synchronously calls the host bridge, runs one subagent through the task executor, and returns the final text. When `schema` is supplied, the helper parses the subagent's JSON output and returns the object. When `return_handle=True`, it instead returns a DAG node dict (`{\"text\", \"output\", \"handle\", \"id\", \"agent\"}`) whose `handle` is the spawned agent's recoverable `agent://<id>` URI (the parsed object lands under `\"data\"` when `schema` is also set), so a downstream `pipeline`/`parallel` stage can reference the transcript by handle instead of re-inlining it.\n\n## Execution flow and cancellation/timeout\n\n### Cell timeout\n\nEach eval cell `timeout` is in seconds, defaults to 30, and is clamped to `1..3600`. It is a **wall-clock budget on the cell's own work** that the watchdog (`IdleTimeout`, `src/eval/idle-timeout.ts`) enforces, **but it is suspended while a host-side `agent()`/`parallel()`/`completion()` bridge call is in flight**: those calls emit synthetic pause/resume timeout-control status events (`withBridgeTimeoutPause`, `src/eval/bridge-timeout.ts`) that pause the watchdog entirely and start a fresh timeout window when control returns to the runtime, so a long fanout or a slow completion runs to completion instead of being killed mid-stream. Pause is reference-counted because `parallel()` can have multiple bridge calls in flight at once.\n\nThe pause/resume events are the **sole** mechanism that suspends the budget. Everything else the cell does — compute, `stdout`/`stderr`, `log()`/`phase()`, and ordinary (non-agent) tool calls — counts against `timeout`, so a cell that is not delegating to an agent/completion is bounded by a plain wall-clock timeout. The tool combines the caller abort signal, the session abort signal, and the watchdog's signal with `AbortSignal.any(...)`; no wall-clock deadline is passed to the backend, so neither runtime arms a competing fixed timer.\n\n### Kernel execution cancellation\n\nOn abort/timeout:\n\n- The host sends `kill(\"SIGINT\")` to the runner subprocess.\n- The runner's exec-time signal handler raises `KeyboardInterrupt` inside the user code.\n- Result includes `cancelled=true`; a kernel timeout is annotated as `eval cell timed out after <n>s; kernel interrupted but remains running. Reset the kernel via { reset: true } if state appears corrupted.`\n- Between requests the runner installs `SIG_IGN` for SIGINT so a stray cancel does not tear down the kernel.\n\nIf the runner does not emit `done` within 5s of the interrupt (`INTERRUPT_ESCALATION_MS` — e.g. stuck in C code holding the GIL), the host shuts the subprocess down (escalating `exit` → `SIGTERM` → `SIGKILL`), the cell is annotated as kernel-killed, and the kernel is recreated on the next call.\n\n### stdin behavior\n\nInteractive stdin is not supported. The runner does not forward `input()` prompts; user code that calls `input()` blocks until cancellation.\n\n## Output capture and rendering\n\n### Captured output classes\n\nFrom runner frames:\n\n- `stdout` / `stderr` → plain text chunks\n- `display` / `result` → rich display handling (MIME bundle)\n- `error` → traceback text\n- `application/x-omp-status` MIME inside `display` → structured status events\n\nDisplay MIME precedence:\n\n1. `text/markdown`\n2. `text/plain`\n3. `text/html` (converted to basic markdown)\n\nAdditionally captured as structured outputs:\n\n- `application/json` → JSON tree data\n- `image/png` / `image/jpeg` → image payloads\n- `application/x-omp-status` → status events\n\n### Matplotlib\n\nThe runner sets `MPLBACKEND=Agg` as an environ default so figures render off-screen. After every cell, `pyplot.get_fignums()` is iterated; each figure is saved to PNG, emitted as an `image/png` display, and closed.\n\n### Storage and truncation\n\nOutput is streamed through `OutputSink` and may be persisted to artifact storage. Tool results can include truncation metadata and `artifact://<id>` for full output recovery.\n\n### Renderer behavior\n\n- Tool renderer (`eval-render.ts`, re-exported from `eval.ts`):\n - shows code-cell blocks with per-cell status\n - collapsed preview defaults to 10 lines\n - supports expanded mode for all output retained in the tool result\n- Interactive renderer (`eval-execution.ts`):\n - used for user-triggered Python execution in TUI\n - collapsed preview defaults to 20 lines\n - clamps very long individual lines to 4000 chars for display safety\n - shows cancellation/error/truncation notices\n\n## Operational troubleshooting\n\n- **Python backend not available** — Check `eval.py`, `PI_PY`, and that `python`/`python3` is on PATH. If preflight fails and `eval.js` is enabled, use a `js` cell.\n- **No Python on PATH** — Install a system Python 3.8+ or place a venv at `~/.omp/python-env`. `omp setup python --check` reports the resolved interpreter.\n- **Execution hangs then times out** — Increase tool `timeout` (max 3600s) if workload is legitimate. For stuck native code, cancellation triggers `SIGINT` first then escalates; the session restarts on the next request.\n- **stdin/input prompts in Python code** — `input()` is not supported; pass data programmatically.\n- **Working directory errors** — Tool validates `cwd` exists and is a directory before execution.\n\n## Relevant environment variables\n\n- `PI_PY` / `PI_JS` — eval backend exposure overrides\n- `PI_PYTHON_SKIP_CHECK=1` — bypass Python preflight/warm checks\n- `PI_PYTHON_INTEGRATION=1` — enable gated integration tests that spawn a real Python\n- `PI_PYTHON_IPC_TRACE=1` — log NDJSON frames exchanged with the runner subprocess\n",
55
57
  "resolve-tool-runtime.md": "# Resolve tool runtime internals\n\nThis document explains how preview/apply workflows are modeled in coding-agent and how built-in or custom tools can participate via the tool-choice queue and `pushPendingAction`.\n\n## Scope and key files\n\n- [`src/tools/resolve.ts`](../packages/coding-agent/src/tools/resolve.ts)\n- [`src/tools/ast-edit.ts`](../packages/coding-agent/src/tools/ast-edit.ts)\n- [`src/extensibility/custom-tools/types.ts`](../packages/coding-agent/src/extensibility/custom-tools/types.ts)\n- [`src/extensibility/custom-tools/loader.ts`](../packages/coding-agent/src/extensibility/custom-tools/loader.ts)\n- [`src/sdk.ts`](../packages/coding-agent/src/sdk.ts)\n\n## What `resolve` does\n\n`resolve` is a hidden tool that finalizes a pending preview action.\n\n- `action: \"apply\"` executes the queued action's `apply(reason, extra)` callback and returns that result with resolve metadata.\n- `action: \"discard\"` invokes `reject(reason, extra)` if provided; otherwise returns `Discarded: <label>. Reason: <reason>`.\n- `extra` is optional free-form metadata. Queue handlers receive it; producers decide whether it has meaning.\n\nIf no pending action exists, `resolve(action=\"apply\")` fails with:\n\n- `No pending action to resolve. Nothing to apply or discard.`\n\n`resolve(action=\"discard\")` with no pending action succeeds instead, returning `Nothing to discard; no pending action remains.` — the desired end-state (no staged change) already holds.\n\n## Pending actions use the tool-choice queue\n\nPreview producers call `queueResolveHandler(...)`, which pushes a one-shot forced `resolve` directive onto the session tool-choice queue and adds a `resolve-reminder` steering message.\n\nRuntime behavior:\n\n- the queued handler owns the pending `apply`/`reject` callbacks,\n- `resolve` looks up the current queue invoker with `session.peekQueueInvoker()`,\n- if the model rejects the forced tool choice, the queue directive is requeued,\n- `resolve` does not maintain a separate pending-action stack.\n\n`resolve` also checks a standing resolve handler after the queue invoker; this is used by long-lived approval flows that are not ordinary preview tool calls.\n\nMultiple pending previews therefore follow the active tool-choice queue ordering, not an independent pending-action store. If an apply callback throws, the queued helper re-pushes the same resolve directive and reminder so the preview can still be discarded or retried.\n\n## Built-in producer example (`ast_edit`)\n\n`ast_edit` previews structural replacements first. When the preview has replacements and is not applied yet, it queues a resolve handler that contains:\n\n- label (human-readable summary)\n- `sourceToolName` (`ast_edit`)\n- `apply(reason: string, extra?: Record<string, unknown>)` callback that reruns AST edit with `dryRun: false`\n\n`resolve(action=\"apply\", reason=\"...\")` passes both `reason` and `extra` into this callback, but `ast_edit`'s apply ignores both — its parameter is `_reason`, and the rerun is independent of `reason`/`extra`.\n\n## Custom tools: `pushPendingAction`\n\nCustom tools can register resolve-compatible pending actions through `CustomToolAPI.pushPendingAction(...)`. The custom tool loader forwards these actions to `queueResolveHandler(...)` when that hook is available.\n\n`CustomToolPendingAction`:\n\n- `label: string` (required)\n- `apply(reason: string): Promise<AgentToolResult<unknown>>` (required) — invoked on apply; `reason` is the string passed to `resolve`\n- `reject?(reason: string): Promise<AgentToolResult<unknown> | undefined>` (optional) — invoked on discard; return value replaces the default \"Discarded\" message if provided\n- `details?: unknown` exists on the public custom-tool type but is not currently forwarded by the loader into resolve metadata\n- `sourceToolName?: string` (optional, defaults to `\"custom_tool\"`)\n\n### Minimal usage example\n\n```ts\nimport type { CustomToolFactory } from \"@oh-my-pi/pi-coding-agent\";\n\nconst factory: CustomToolFactory = (pi) => ({\n name: \"batch_rename_preview\",\n label: \"Batch Rename Preview\",\n description: \"Previews renames and defers commit to resolve\",\n parameters: pi.zod.object({\n files: pi.zod.array(pi.zod.string()),\n }),\n\n async execute(_toolCallId, params) {\n const previewSummary = `Prepared rename plan for ${params.files.length} files`;\n\n pi.pushPendingAction({\n label: `Batch rename: ${params.files.length} files`,\n sourceToolName: \"batch_rename_preview\",\n apply: async (reason) => {\n // apply writes here\n return {\n content: [\n { type: \"text\", text: `Applied batch rename. Reason: ${reason}` },\n ],\n };\n },\n reject: async (reason) => {\n // optional: cleanup or notify on discard\n return {\n content: [\n { type: \"text\", text: `Discarded batch rename. Reason: ${reason}` },\n ],\n };\n },\n });\n\n return {\n content: [\n {\n type: \"text\",\n text: `${previewSummary}. Call resolve to apply or discard.`,\n },\n ],\n };\n },\n});\n\nexport default factory;\n```\n\n## Runtime availability and failures\n\n`pushPendingAction` is wired by the custom tool loader through the active session's resolve queue hook.\n\nIf the runtime did not provide the resolve queue hook, `pushPendingAction` throws:\n\n- `Pending action store unavailable for custom tools in this runtime.`\n\n## Tool-choice behavior\n\nWhen `queueResolveHandler(...)` registers a preview, the agent runtime forces a one-shot `resolve` tool choice so pending previews are explicitly finalized before normal tool flow continues.\n\n## Developer guidance\n\n- Use pending actions only for destructive or high-impact operations that should support explicit apply/discard.\n- Keep `label` concise and specific; it is shown in resolve renderer output.\n- Ensure `apply(reason)` is deterministic and idempotent enough for one-shot execution; `reason` is informational and should not change behavior.\n- Implement `reject(reason)` when the discard needs cleanup (temp state, locks, notifications); omit it for stateless previews where the default message suffices.\n- If your tool can stage multiple previews, remember they are mediated by the tool-choice queue rather than a separate pending-action stack.\n",
56
58
  "rpc.md": "# RPC Protocol Reference\n\nRPC mode runs the coding agent as a newline-delimited JSON protocol over stdio.\n\n- **stdin**: commands (`RpcCommand`), extension UI responses, and host-tool updates/results\n- **stdout**: a ready frame, command responses (`RpcResponse`), session/agent events, extension UI requests, host-tool requests/cancellations\n\nPrimary implementation:\n\n- `src/modes/rpc/rpc-mode.ts`\n- `src/modes/rpc/rpc-types.ts`\n- `src/session/agent-session.ts`\n- `packages/agent/src/agent.ts`\n- `packages/agent/src/agent-loop.ts`\n\n## Startup\n\n```bash\nomp --mode rpc [regular CLI options]\n```\n\nBehavior notes:\n\n- `@file` CLI arguments are rejected in RPC mode.\n- RPC mode disables automatic session title generation by default to avoid an extra model call.\n- RPC mode resets workflow-altering `todo.*`, `task.*`, `memory.backend`/`memories.enabled`, `advisor.*`, `async.*`, and `bash.autoBackground.*` settings to their built-in defaults instead of inheriting user overrides.\n- The process reads stdin as JSONL (`readJsonl(Bun.stdin.stream())`).\n- At startup it writes `{ \"type\": \"ready\" }` before processing commands.\n- When stdin closes, pending host-tool calls and host-URI requests are rejected and the process exits with code `0`.\n- Responses/events are written as one JSON object per line.\n\n## Transport and Framing\n\nEach frame is a single JSON object followed by `\\n`.\n\nThere is no envelope beyond the object shape itself.\n\n### Outbound frame categories (stdout)\n\n1. Ready frame (`{ type: \"ready\" }`)\n2. `RpcResponse` (`{ type: \"response\", ... }`)\n3. `AgentSessionEvent` objects (`agent_start`, `message_update`, etc.)\n4. `RpcExtensionUIRequest` (`{ type: \"extension_ui_request\", ... }`)\n5. Host tool requests/cancellations (`host_tool_call`, `host_tool_cancel`)\n6. Host URI requests/cancellations (`host_uri_request`, `host_uri_cancel`)\n7. Extension errors (`{ type: \"extension_error\", extensionPath, event, error }`)\n8. Available-commands updates (`{ type: \"available_commands_update\", commands }`), emitted at startup and whenever command metadata changes\n9. Prompt lifecycle hints (`{ type: \"prompt_result\", id?, agentInvoked }`) for scheduled prompts that later resolve without invoking the agent\n10. Subagent frames (`subagent_lifecycle`, `subagent_progress`, `subagent_event`), gated by `set_subagent_subscription`\n11. Builtin slash-command side channels (`command_output`, `session_info_update`, `config_update`)\n\n### Inbound frame categories (stdin)\n\n1. `RpcCommand`\n2. `RpcExtensionUIResponse` (`{ type: \"extension_ui_response\", ... }`)\n3. Host tool updates/results (`host_tool_update`, `host_tool_result`)\n4. Host URI results (`host_uri_result`)\n\n## Request/Response Correlation\n\nAll commands accept optional `id?: string`.\n\n- If provided, normal command responses echo the same `id`.\n- `RpcClient` relies on this for pending-request resolution.\n\nImportant edge behavior from runtime:\n\n- Unknown command responses are emitted with `id: undefined` (even if the request had an `id`).\n- Parse/handler exceptions in the input loop emit `command: \"parse\"` with `id: undefined`.\n- `prompt` and `abort_and_prompt` return immediate success, then may emit a later error response with the **same** id if async prompt scheduling fails.\n- `prompt` success responses may include `data.agentInvoked`. `false` means the prompt completed locally without an agent turn; `true` means the prompt produced agent lifecycle events; omitted means the host must rely on session events for completion.\n- `abort_and_prompt` does not currently emit `data.agentInvoked` or `prompt_result`; hosts should treat it as the legacy abort-then-schedule path and rely on session events or same-id scheduling errors.\n\n## Command Schema (canonical)\n\n`RpcCommand` is defined in `src/modes/rpc/rpc-types.ts`:\n\n### Prompting\n\n- `{ id?, type: \"prompt\", message: string, images?: ImageContent[], streamingBehavior?: \"steer\" | \"followUp\" }`\n- `{ id?, type: \"steer\", message: string, images?: ImageContent[] }`\n- `{ id?, type: \"follow_up\", message: string, images?: ImageContent[] }`\n- `{ id?, type: \"abort\" }`\n- `{ id?, type: \"abort_and_prompt\", message: string, images?: ImageContent[] }`\n- `{ id?, type: \"new_session\", parentSession?: string }`\n\n### State\n\n- `{ id?, type: \"get_state\" }`\n- `{ id?, type: \"get_available_commands\" }`\n- `{ id?, type: \"set_todos\", phases: TodoPhase[] }`\n- `{ id?, type: \"set_host_tools\", tools: RpcHostToolDefinition[] }`\n- `{ id?, type: \"set_host_uri_schemes\", schemes: RpcHostUriSchemeDefinition[] }`\n- `{ id?, type: \"set_subagent_subscription\", level: \"off\" | \"progress\" | \"events\" }`\n- `{ id?, type: \"get_subagents\" }`\n- `{ id?, type: \"get_subagent_messages\", subagentId?: string, sessionFile?: string, fromByte?: number }`\n\n### Model\n\n- `{ id?, type: \"set_model\", provider: string, modelId: string }`\n- `{ id?, type: \"cycle_model\" }`\n- `{ id?, type: \"get_available_models\" }`\n\n### Thinking\n\n- `{ id?, type: \"set_thinking_level\", level: ThinkingLevel }`\n- `{ id?, type: \"cycle_thinking_level\" }`\n\n### Queue modes\n\n- `{ id?, type: \"set_steering_mode\", mode: \"all\" | \"one-at-a-time\" }`\n- `{ id?, type: \"set_follow_up_mode\", mode: \"all\" | \"one-at-a-time\" }`\n- `{ id?, type: \"set_interrupt_mode\", mode: \"immediate\" | \"wait\" }`\n\n### Compaction\n\n- `{ id?, type: \"compact\", customInstructions?: string }`\n- `{ id?, type: \"set_auto_compaction\", enabled: boolean }`\n\n### Retry\n\n- `{ id?, type: \"set_auto_retry\", enabled: boolean }`\n- `{ id?, type: \"abort_retry\" }`\n\n### Bash\n\n- `{ id?, type: \"bash\", command: string }`\n- `{ id?, type: \"abort_bash\" }`\n\n### Session\n\n- `{ id?, type: \"get_session_stats\" }`\n- `{ id?, type: \"export_html\", outputPath?: string }`\n- `{ id?, type: \"switch_session\", sessionPath: string }`\n- `{ id?, type: \"branch\", entryId: string }`\n- `{ id?, type: \"get_branch_messages\" }`\n- `{ id?, type: \"get_last_assistant_text\" }`\n- `{ id?, type: \"set_session_name\", name: string }`\n- `{ id?, type: \"handoff\", customInstructions?: string }`\n\n### Messages\n\n- `{ id?, type: \"get_messages\" }`\n\n### Login\n\n- `{ id?, type: \"get_login_providers\" }`\n- `{ id?, type: \"login\", providerId: string }`\n\n## Response Schema\n\nAll command results use `RpcResponse`:\n\n- Success: `{ id?, type: \"response\", command: <command>, success: true, data?: ... }`\n- Failure: `{ id?, type: \"response\", command: string, success: false, error: string }`\n\nData payloads are command-specific and defined in `rpc-types.ts`.\n\n### `prompt` payload\n\n`prompt` is acknowledged after the command is accepted, not after a model turn finishes:\n\n```json\n{\n \"id\": \"req_1\",\n \"type\": \"response\",\n \"command\": \"prompt\",\n \"success\": true,\n \"data\": { \"agentInvoked\": false }\n}\n```\n\n`data.agentInvoked: false` is a completion signal for local-only prompts, including slash commands that produce output without starting an agent turn. `data.agentInvoked: true` means the prompt produced agent lifecycle events; those events can be emitted before or after the prompt response depending on the command path. Older runtimes may omit `data`; hosts should then rely on `agent_end`, custom message completion, or `prompt_result`.\n\n`prompt_result` is emitted when a prompt was accepted immediately but later resolves as local-only:\n\n```json\n{ \"type\": \"prompt_result\", \"id\": \"req_1\", \"agentInvoked\": false }\n```\n\nLocal-only slash commands may emit `command_output` frames before completing via `data.agentInvoked: false` or a later `prompt_result`. They do not emit `agent_end`.\n\n### `get_state` payload\n\n```json\n{\n \"model\": { \"provider\": \"...\", \"id\": \"...\" },\n \"thinkingLevel\": \"off|minimal|low|medium|high|xhigh\",\n \"isStreaming\": false,\n \"isCompacting\": false,\n \"steeringMode\": \"all|one-at-a-time\",\n \"followUpMode\": \"all|one-at-a-time\",\n \"interruptMode\": \"immediate|wait\",\n \"sessionFile\": \"...\",\n \"sessionId\": \"...\",\n \"sessionName\": \"...\",\n \"autoCompactionEnabled\": true,\n \"messageCount\": 0,\n \"queuedMessageCount\": 0,\n \"todoPhases\": [\n {\n \"id\": \"phase-1\",\n \"name\": \"Todos\",\n \"tasks\": [\n {\n \"id\": \"task-1\",\n \"content\": \"Map the tool surface\",\n \"status\": \"in_progress\"\n }\n ]\n }\n ],\n \"systemPrompt\": [\"...\"],\n \"dumpTools\": [\n {\n \"name\": \"read\",\n \"description\": \"Read files and URLs\",\n \"parameters\": {}\n }\n ],\n \"contextUsage\": {\n \"tokens\": 1100,\n \"contextWindow\": 200000,\n \"percent\": 0.55\n }\n}\n```\n\n### `set_todos` payload\n\nReplaces the in-memory todo state for the current session and returns the normalized phase list:\n\n```json\n{\n \"id\": \"req_2\",\n \"type\": \"set_todos\",\n \"phases\": [\n {\n \"id\": \"phase-1\",\n \"name\": \"Evaluation\",\n \"tasks\": [\n {\n \"id\": \"task-1\",\n \"content\": \"Map the read tool surface\",\n \"status\": \"in_progress\"\n },\n {\n \"id\": \"task-2\",\n \"content\": \"Exercise edit operations\",\n \"status\": \"pending\"\n }\n ]\n }\n ]\n}\n```\n\nThis is useful for hosts that want to pre-seed a plan before the first prompt.\n\n### `set_host_tools` payload\n\nReplaces the current set of host-owned tools that the RPC server may call back\ninto over stdio:\n\n```json\n{\n \"id\": \"req_3\",\n \"type\": \"set_host_tools\",\n \"tools\": [\n {\n \"name\": \"echo_host\",\n \"label\": \"Echo Host\",\n \"description\": \"Echo a value from the embedding host\",\n \"parameters\": {\n \"type\": \"object\",\n \"properties\": {\n \"message\": { \"type\": \"string\" }\n },\n \"required\": [\"message\"],\n \"additionalProperties\": false\n }\n }\n ]\n}\n```\n\nThe response payload is:\n\n```json\n{\n \"toolNames\": [\"echo_host\"]\n}\n```\n\nThese tools are added to the active session tool registry before the next model\ncall. Re-sending `set_host_tools` replaces the previous host-owned set.\n\n### `set_host_uri_schemes` payload\n\nReplaces the current set of host-owned URL schemes the RPC server should\ndispatch reads/writes through:\n\n```json\n{\n \"id\": \"req_4\",\n \"type\": \"set_host_uri_schemes\",\n \"schemes\": [\n {\n \"scheme\": \"db\",\n \"description\": \"Virtual db row files\",\n \"writable\": true,\n \"immutable\": false\n }\n ]\n}\n```\n\nThe response payload is:\n\n```json\n{\n \"schemes\": [\"db\"]\n}\n```\n\nSchemes are case-insensitive on the wire and normalized to lowercase before\nthe response is sent. Re-sending `set_host_uri_schemes` replaces the entire\nprevious set — schemes missing from the new list are unregistered.\n\n## Event Stream Schema\n\nRPC mode forwards `AgentSessionEvent` objects from `AgentSession.subscribe(...)`.\n\nCommon event types:\n\n- `agent_start`, `agent_end`\n- `turn_start`, `turn_end`\n- `message_start`, `message_update`, `message_end`\n- `tool_execution_start`, `tool_execution_update`, `tool_execution_end`\n- `auto_compaction_start`, `auto_compaction_end`\n- `auto_retry_start`, `auto_retry_end`\n- `ttsr_triggered`\n- `todo_reminder`\n- `todo_auto_clear`\n\nExtension runner errors are emitted separately as:\n\n```json\n{\n \"type\": \"extension_error\",\n \"extensionPath\": \"...\",\n \"event\": \"...\",\n \"error\": \"...\"\n}\n```\n\n`message_update` includes streaming deltas in `assistantMessageEvent` (text/thinking/toolcall deltas).\n\n## Prompt/Queue Concurrency and Ordering\n\nThis is the most important operational behavior.\n\n### Immediate ack vs completion\n\n`prompt` and `abort_and_prompt` are **acknowledged immediately**:\n\n```json\n{ \"id\": \"req_1\", \"type\": \"response\", \"command\": \"prompt\", \"success\": true }\n```\n\nThat means:\n\n- command acceptance != run completion\n- agent turns complete via `agent_end`\n- local-only prompts complete via `data.agentInvoked: false` on the response or via a later `prompt_result`\n\n### While streaming\n\n`AgentSession.prompt()` requires `streamingBehavior` during active streaming:\n\n- `\"steer\"` => queued steering message (interrupt path)\n- `\"followUp\"` => queued follow-up message (post-turn path)\n\nIf omitted during streaming, prompt fails.\n\n### Queue defaults\n\nFrom `packages/agent/src/agent.ts` defaults:\n\n- `steeringMode`: `\"one-at-a-time\"`\n- `followUpMode`: `\"one-at-a-time\"`\n- `interruptMode`: `\"immediate\"`\n\n### Mode semantics\n\n- `set_steering_mode` / `set_follow_up_mode`\n - `\"one-at-a-time\"`: dequeue one queued message per turn\n - `\"all\"`: dequeue entire queue at once\n- `set_interrupt_mode`\n - `\"immediate\"`: tool execution checks steering between tool calls; pending steering can abort remaining tool calls in the turn\n - `\"wait\"`: defer steering until turn completion\n\n## Extension UI Sub-Protocol\n\nExtensions in RPC mode use request/response UI frames.\n\n### Outbound request\n\n`RpcExtensionUIRequest` (`type: \"extension_ui_request\"`) methods:\n\n- `select`, `confirm`, `input`, `editor`, `cancel`\n- `notify`, `setStatus`, `setWidget`, `setTitle`, `set_editor_text`\n- `open_url` (emitted by RPC login flows)\n\nRuntime note:\n\n- Automatic session title generation is disabled in RPC mode, and `setTitle` UI\n requests are also suppressed by default because most hosts do not have a\n meaningful terminal-title surface. Set `PI_RPC_EMIT_TITLE=1` to opt back in to\n the UI event only.\n\nExample:\n\n```json\n{\n \"type\": \"extension_ui_request\",\n \"id\": \"123\",\n \"method\": \"confirm\",\n \"title\": \"Confirm\",\n \"message\": \"Continue?\",\n \"timeout\": 30000\n}\n```\n\n### Inbound response\n\n`RpcExtensionUIResponse` (`type: \"extension_ui_response\"`):\n\n- `{ type: \"extension_ui_response\", id: string, value: string }`\n- `{ type: \"extension_ui_response\", id: string, confirmed: boolean }`\n- `{ type: \"extension_ui_response\", id: string, cancelled: true, timedOut?: boolean }`\n\nIf a dialog has a timeout, RPC mode resolves to a default value when timeout/abort fires.\n\n## Host Tool Sub-Protocol\n\nRPC hosts can expose custom tools to the agent by sending `set_host_tools`, then\nserving execution requests over the same transport.\n\n### Outbound request\n\nWhen the agent wants the host to execute one of those tools, RPC mode emits:\n\n```json\n{\n \"type\": \"host_tool_call\",\n \"id\": \"host_1\",\n \"toolCallId\": \"toolu_123\",\n \"toolName\": \"echo_host\",\n \"arguments\": { \"message\": \"hello\" }\n}\n```\n\nIf the tool execution is later aborted, RPC mode emits:\n\n```json\n{\n \"type\": \"host_tool_cancel\",\n \"id\": \"host_cancel_1\",\n \"targetId\": \"host_1\"\n}\n```\n\n### Inbound updates and completion\n\nHosts can optionally stream progress:\n\n```json\n{\n \"type\": \"host_tool_update\",\n \"id\": \"host_1\",\n \"partialResult\": {\n \"content\": [{ \"type\": \"text\", \"text\": \"working\" }]\n }\n}\n```\n\nCompletion uses:\n\n```json\n{\n \"type\": \"host_tool_result\",\n \"id\": \"host_1\",\n \"result\": {\n \"content\": [{ \"type\": \"text\", \"text\": \"done\" }]\n }\n}\n```\n\nSet top-level `isError: true` on `host_tool_result` to reject the pending host tool call and surface the returned text content as a tool error.\n\n## Host URI Sub-Protocol\n\nRPC hosts can also own custom URL schemes (virtual files). After\n`set_host_uri_schemes`, every read of `<scheme>://…` and write of\n`<scheme>://…` (when registered as `writable`) is bounced back to the host\nover the same transport.\n\n### Outbound request\n\nWhen a session tool resolves a host-owned URL, RPC mode emits:\n\n```json\n{\n \"type\": \"host_uri_request\",\n \"id\": \"uri_1\",\n \"operation\": \"read\",\n \"url\": \"db://users/42\"\n}\n```\n\nWrites look the same with `\"operation\": \"write\"` and an additional\n`\"content\": \"...\"` field carrying the full replacement bytes.\n\nIf the request is later aborted (caller cancels, session ends), RPC mode\nemits:\n\n```json\n{\n \"type\": \"host_uri_cancel\",\n \"id\": \"uri_cancel_1\",\n \"targetId\": \"uri_1\"\n}\n```\n\n### Inbound result\n\nFor successful reads:\n\n```json\n{\n \"type\": \"host_uri_result\",\n \"id\": \"uri_1\",\n \"content\": \"id=42\\nname=Alice\\n\",\n \"contentType\": \"text/plain\",\n \"notes\": [\"fresh from cache\"],\n \"immutable\": false\n}\n```\n\nFor successful writes, omit content:\n\n```json\n{ \"type\": \"host_uri_result\", \"id\": \"uri_1\" }\n```\n\nTo reject the request, set `isError: true` and either populate `error` with\na message or fall back to `content` for textual error surfacing:\n\n```json\n{\n \"type\": \"host_uri_result\",\n \"id\": \"uri_1\",\n \"isError\": true,\n \"error\": \"row 42 not found\"\n}\n```\n\n### Constraints\n\n- The agent's `edit` tool does not target host URIs. Hosts that want to\n mutate virtual files expose `write` and let the model use the `write` tool\n with replacement content.\n- Schemes are global to the process; `set_host_uri_schemes` replaces the\n previous set, unregistering anything not in the new list.\n- Schemes are normalized to lowercase before registration.\n\n## Error Model and Recoverability\n\n### Command-level failures\n\nFailures are `success: false` with string `error`.\n\n```json\n{\n \"id\": \"req_2\",\n \"type\": \"response\",\n \"command\": \"set_model\",\n \"success\": false,\n \"error\": \"Model not found: provider/model\"\n}\n```\n\n### Recoverability expectations\n\n- Most command failures are recoverable; process remains alive.\n- Malformed JSONL / parse-loop exceptions emit a `parse` error response and continue reading subsequent lines.\n- Empty `set_session_name` is rejected (`Session name cannot be empty`).\n- Extension UI responses with unknown `id` are ignored.\n- Process termination conditions are stdin close or explicit extension-triggered shutdown after the current command.\n\n## Compact Command Flows\n\n### 1) Prompt and stream\n\nstdin:\n\n```json\n{ \"id\": \"req_1\", \"type\": \"prompt\", \"message\": \"Summarize this repo\" }\n```\n\nstdout sequence (typical):\n\n```json\n{ \"id\": \"req_1\", \"type\": \"response\", \"command\": \"prompt\", \"success\": true }\n{ \"type\": \"agent_start\" }\n{ \"type\": \"message_update\", \"assistantMessageEvent\": { \"type\": \"text_delta\", \"delta\": \"...\" }, \"message\": { \"role\": \"assistant\", \"content\": [] } }\n{ \"type\": \"agent_end\", \"messages\": [] }\n```\n\n### 2) Prompt during streaming with explicit queue policy\n\nstdin:\n\n```json\n{\n \"id\": \"req_2\",\n \"type\": \"prompt\",\n \"message\": \"Also include risks\",\n \"streamingBehavior\": \"followUp\"\n}\n```\n\n### 3) Inspect and tune queue behavior\n\nstdin:\n\n```json\n{ \"id\": \"q1\", \"type\": \"get_state\" }\n{ \"id\": \"q2\", \"type\": \"set_steering_mode\", \"mode\": \"all\" }\n{ \"id\": \"q3\", \"type\": \"set_interrupt_mode\", \"mode\": \"wait\" }\n```\n\n### 4) Extension UI round trip\n\nstdout:\n\n```json\n{\n \"type\": \"extension_ui_request\",\n \"id\": \"ui_7\",\n \"method\": \"input\",\n \"title\": \"Branch name\",\n \"placeholder\": \"feature/...\"\n}\n```\n\nstdin:\n\n```json\n{ \"type\": \"extension_ui_response\", \"id\": \"ui_7\", \"value\": \"feature/rpc-host\" }\n```\n\n## Notes on `RpcClient` helper\n\n`src/modes/rpc/rpc-client.ts` is a convenience wrapper, not the protocol definition.\n\nCurrent helper characteristics:\n\n- Spawns `bun <cliPath> --mode rpc`\n- Correlates responses by generated `req_<n>` ids\n- Dispatches recognized core `AgentEvent` types to listeners\n- Supports host-owned custom tools via `setCustomTools()` and automatic handling of `host_tool_call` / `host_tool_cancel`\n- Wraps common protocol commands including OAuth `getLoginProviders()` / `login(...)`; use raw protocol frames for any surface not wrapped by the helper.\n\nUse raw protocol frames if you need complete surface coverage.\n",
@@ -61,9 +63,9 @@ export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
61
63
  "session-switching-and-recent-listing.md": "# Session switching and recent session listing\n\nThis document describes how coding-agent discovers recent sessions, resolves `--resume` targets, presents session pickers, and switches the active runtime session.\n\nIt focuses on current implementation behavior, including fallback paths and caveats.\n\n## Implementation files\n\n- [`../src/session/session-manager.ts`](../packages/coding-agent/src/session/session-manager.ts)\n- [`../src/session/session-listing.ts`](../packages/coding-agent/src/session/session-listing.ts)\n- [`../src/session/session-paths.ts`](../packages/coding-agent/src/session/session-paths.ts)\n- [`../src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts)\n- [`../src/cli/session-picker.ts`](../packages/coding-agent/src/cli/session-picker.ts)\n- [`../src/modes/components/session-selector.ts`](../packages/coding-agent/src/modes/components/session-selector.ts)\n- [`../src/modes/controllers/selector-controller.ts`](../packages/coding-agent/src/modes/controllers/selector-controller.ts)\n- [`../src/main.ts`](../packages/coding-agent/src/main.ts)\n- [`../src/sdk.ts`](../packages/coding-agent/src/sdk.ts)\n- [`../src/modes/interactive-mode.ts`](../packages/coding-agent/src/modes/interactive-mode.ts)\n- [`../src/modes/utils/ui-helpers.ts`](../packages/coding-agent/src/modes/utils/ui-helpers.ts)\n\n## Recent-session discovery\n\n### Directory scope\n\n`SessionManager` stores sessions under a cwd-scoped directory by default:\n\n- `~/.omp/agent/sessions/<dir-encoded>/*.jsonl` (home-relative `-<rel>` names, `-tmp-<rel>` for temp paths, legacy `--<abs>--` otherwise)\n\n`SessionManager.list(cwd, sessionDir?)` reads only that directory unless an explicit `sessionDir` is provided.\n\n### Two listing paths with different payloads\n\nThere are two different listing pipelines:\n\n1. `getRecentSessions(sessionDir, limit)` (welcome/summary view)\n - Reads only a 4KB prefix (`readTextSlices(..., 4096, 0)[0]`) from each file.\n - Parses header + earliest user text preview.\n - Returns lightweight `RecentSessionInfo` (`path`, `name`, `timeAgo`); `name` and `timeAgo` are computed eagerly (`sessionDisplayName` / `formatTimeAgo`), not lazy getters.\n - Sorts by file `mtime` descending.\n\n2. `SessionManager.list(...)` / `SessionManager.listAll()` (resume pickers and ID matching)\n - Reads a 4KB prefix plus a bounded 32 KiB tail in one `readTextSlices(...)` call per file, not the full JSONL file.\n - Builds `SessionInfo` objects (`id`, `cwd`, `title`, `messageCount`, `firstMessage`, `allMessagesText`, timestamps, lifecycle status).\n - Uses prefix parsing plus marker counting for list text, and tail parsing for the final-message lifecycle status; later messages beyond the prefix may not be present in `allMessagesText`.\n - Sorts by `modified` descending.\n\n### Metadata fallback behavior\n\nFor recent summaries (`RecentSessionInfo`):\n\n- display name preference (`sessionDisplayName`): `title` -> first user message -> an `Untitled · <time>` label (the raw `id` is intentionally never used)\n- the welcome screen truncates the rendered name to the available column width (no fixed length)\n- only the first line is kept and control characters are stripped from title/message-derived names (`sanitizeSessionName`)\n\nFor `SessionInfo` list entries:\n\n- `title` is `header.title` or the last compaction `shortSummary` seen in the 4KB prefix\n- `firstMessage` is first user message text discoverable from the prefix or `\"(no messages)\"`\n\n## `--continue` resolution and terminal breadcrumb preference\n\n`SessionManager.continueRecent(cwd, sessionDir?)` resolves the target in this order:\n\n1. Read terminal-scoped breadcrumb (`~/.omp/agent/terminal-sessions/<terminal-id>`)\n2. Validate breadcrumb:\n - current terminal can be identified\n - referenced file still exists\n3. If the breadcrumb's cwd differs from the current cwd, that cwd no longer exists (moved/renamed dir), and the current directory has no sessions of its own, the breadcrumb session is re-rooted into the current directory (`SessionManager.open` + `moveTo`) instead of starting fresh\n4. Otherwise, if the breadcrumb cwd matches the current cwd (resolved path compare), use the breadcrumb session; else fall back to newest file by mtime in the session dir (`findMostRecentSession`)\n5. If none found, create a new session\n\nTerminal ID derivation prefers TTY path and falls back to env-based identifiers (`ZELLIJ_PANE_ID`, `TMUX_PANE`, `CMUX_SURFACE_ID`, `KITTY_WINDOW_ID`, `WEZTERM_PANE`, `TERM_SESSION_ID`, `WT_SESSION`).\n\nBreadcrumb writes are best-effort and non-fatal.\n\n## Startup-time resume target resolution (`main.ts`)\n\n### `--resume <value>`\n\n`createSessionManager(...)` handles string-valued `--resume` in two modes:\n\n1. Path-like value (contains `/`, `\\\\`, or ends with `.jsonl`)\n - direct `SessionManager.open(sessionArg, parsed.sessionDir)`\n\n2. Resume key value\n - `resolveResumableSession(...)` searches local sessions first, then all sessions when `sessionDir` is not forced\n - matching is case-insensitive and accepts `id` prefix, full JSONL filename prefix, or the session-id suffix after the timestamp\n - first match in modified-descending order is used (no ambiguity prompt)\n\nCross-project match behavior:\n\n- if the matched session's recorded cwd no longer exists (moved/renamed dir), CLI prompts `Move (re-root) it into the current directory? [Y/n]`; yes opens the session and `moveTo(cwd)` re-roots it (this also applies to local-scope matches whose recorded cwd is gone)\n- otherwise, if a global match's cwd differs from the current cwd, CLI prompts `Fork into current directory? [y/N]`\n- fork accepted -> `SessionManager.forkFrom(...)`\n- either prompt declined -> command cancels (`Resume cancelled: session is in another project.`)\n- non-TTY -> throws `SessionResolutionError` instead of prompting\n\nNo match -> throws error (`Session \"...\" not found.`).\n\n### `--resume` (no value)\n\nHandled after initial session-manager construction:\n\n1. list local sessions with `SessionManager.list(cwd, parsed.sessionDir)`\n2. if empty: preload `SessionManager.listAll()` and open the picker in all-projects scope; print `No sessions found` and exit early only when the global list is also empty\n3. open TUI picker (`selectSession`, with optional preloaded `allSessions`/`startInAllScope`)\n4. if canceled: print `No session selected` and exit early\n5. if selected: when the session belongs to another project, switch the process into that project's directory (`setProjectDir`, cache resets, settings reload) first; then `SessionManager.open(selected.path)`\n\n### `--continue`\n\nUses `SessionManager.continueRecent(...)` directly (breadcrumb-first behavior above).\n\n## Picker-based selection internals\n\n## CLI picker (`src/cli/session-picker.ts`)\n\n`selectSession(sessions, { allSessions?, startInAllScope? })` creates a standalone TUI with `SessionSelectorComponent` and resolves exactly once:\n\n- selection -> resolves selected `SessionInfo` (caller uses `.path` / `.cwd`)\n- cancel (Esc) -> resolves `null`\n- hard exit (Ctrl+C path) -> stops TUI and `process.exit(0)`\n- Tab toggles current-folder / all-projects scope; the all-projects list is loaded lazily via `SessionManager.listAll` (or preloaded via `allSessions`)\n- search ranking is augmented with prompt-history matches from `history.db` (`HistoryStorage.matchingSessionIds`) when available\n\n## Interactive in-session picker (`SelectorController.showSessionSelector`)\n\nFlow:\n\n1. fetch sessions from current session dir via `SessionManager.list(currentCwd, currentSessionDir)`; if empty, preload `SessionManager.listAll()` and open in all-projects scope\n2. mount `SessionSelectorComponent` in editor area using `showSelector(...)`, wired with `loadAllSessions: () => SessionManager.listAll()` and a `history.db` prompt matcher\n3. callbacks:\n - select -> close selector and call `handleResumeSession(sessionPath)`\n - cancel -> restore editor and rerender\n - exit -> `ctx.shutdown()`\n\n## Session selector component behavior\n\n`SessionList` supports:\n\n- arrow/page navigation\n- Enter to select\n- Delete to delete after confirmation\n- Esc to cancel\n- Ctrl+C to exit\n- Tab to toggle current-folder / all-projects scope\n- ranked fuzzy search across session id/title/cwd/first message/all messages/path, merged with prompt-history matches from `history.db`\n\nEmpty-list render behavior:\n\n- current-folder scope renders `No sessions in current folder. Press Tab to view all.`; all-projects scope renders `No sessions found`\n- Enter/Delete on empty do nothing (no callback)\n- Esc/Ctrl+C still work\n\n## Runtime switch execution (`AgentSession.switchSession`)\n\n`switchSession(sessionPath)` is the core in-process switch path.\n\nLifecycle/state transition:\n\n1. capture `previousSessionFile`\n2. emit `session_before_switch` hook event (`reason: \"resume\"`, cancellable)\n3. if canceled -> return `false` with no switch\n4. disconnect from current agent event stream\n5. abort active generation/tool flow\n6. flush session writer (`sessionManager.flush()`) to persist pending writes, then capture rollback state\n7. clear queued steering/follow-up/next-turn message buffers\n8. `sessionManager.setSessionFile(sessionPath)`\n - updates session file pointer\n - writes terminal breadcrumb\n - loads entries / migrates / blob-resolves / reindexes\n - if missing/invalid file data: initializes a new session at that path and rewrites header\n9. update `agent.sessionId`\n10. rebuild display context via `buildDisplaySessionContext()`\n11. restore persisted/discovered MCP tool selections and rebuild active tools/system prompt when discovery is enabled\n12. emit `session_switch` hook event (`reason: \"resume\"`, `previousSessionFile`)\n13. replace agent messages with rebuilt context and sync todos\n14. close provider sessions when switching to a different session or when same-session reload changed replay messages\n15. restore model via `getRestorableSessionModels(sessionContext.models, lastModelChangeRole)` — tries the recorded models in fallback order and uses the first one present in the model registry\n16. restore thinking level and service tier:\n - thinking uses persisted `thinking_level_change`, otherwise the configured default clamped to model capability\n - service tier uses persisted `service_tier_change`, otherwise the configured `serviceTier` setting (`\"none\"` becomes unset)\n17. reconnect agent listeners, run the registered session-switch reconciler if any (interactive mode re-enters persisted modes; errors logged, not fatal), and return `true`\n\n## UI state rebuild after interactive switch\n\n`SelectorController.handleResumeSession` performs UI reset around `switchSession`:\n\n- stop loading animation\n- clear status container\n- clear pending-message UI and pending tool map\n- reset streaming component/message references\n- call `session.switchSession(...)`\n- if the resumed session's cwd differs from the previous one, re-point the process and cwd-derived caches at it (`applyCwdChange`)\n- clear chat container and rerender from session context (`renderInitialMessages`)\n- reload todos from new session artifacts\n- show `Resumed session` (or `Resumed session in <dir>` for a cross-project resume)\n\nSo visible conversation/todo state is rebuilt from the new session file.\n\n## Startup resume vs in-session switch\n\n### Startup resume (`--continue`, `--resume`, direct open)\n\n- Session file is chosen before `createAgentSession(...)`.\n- `sdk.ts` builds `existingSession = sessionManager.buildSessionContext()`.\n- Agent messages are restored once during session creation.\n- Model/thinking are selected during creation (including restore/fallback logic).\n- Interactive mode then runs `#reconcileModeFromSession()` to re-enter persisted mode state (e.g. plan mode).\n\n### In-session switch (`/resume`-style selector path)\n\n- Uses `AgentSession.switchSession(...)` on an already-running `AgentSession`.\n- Messages/model/thinking are rebuilt immediately in place.\n- Hook `session_before_switch`/`session_switch` events are emitted.\n- UI chat/todos are refreshed.\n- Mode re-entry is symmetric with startup: interactive mode registers `#reconcileModeFromSession()` as the session-switch reconciler (`setSessionSwitchReconciler`), and `switchSession()` invokes it after reconnecting.\n\n## Failure and edge-case behavior\n\n### Cancellation paths\n\n- CLI picker cancel -> returns `null`, caller prints `No session selected`, process exits early.\n- Interactive picker cancel -> editor restored, no session change.\n- Hook cancellation (`session_before_switch`) -> `switchSession()` returns `false`.\n\n### Empty list paths\n\n- CLI `--resume` (no value): empty list prints `No sessions found` and exits.\n- Interactive selector: empty list renders message and remains cancellable.\n\n### Missing/invalid target session file\n\nWhen opening/switching to a specific path (`setSessionFile`):\n\n- ENOENT -> treated as empty -> new session initialized at that exact path and persisted.\n- malformed/invalid header (or effectively unreadable parsed entries) -> treated as empty -> new session initialized and persisted.\n\nThis is recovery behavior, not hard failure.\n\n### Hard failures\n\nSwitch/open can still throw on true I/O failures (permission errors, rewrite failures, etc.), which propagate to callers.\n\n### ID prefix matching caveats\n\n- Matching uses `startsWith` on the lowercased session id, lowercased JSONL filename, and lowercased id suffix after the filename timestamp.\n- First match in modified-descending order wins; there is no ambiguity UI if multiple sessions share a prefix.\n- Prefix-listing metadata is intentionally lightweight, so search text may not include messages outside the first 4KB of the session file.\n",
62
64
  "session-tree-plan.md": "# Session tree architecture (current)\n\nReference: [session.md](../docs/session.md)\n\nThis document describes how session tree navigation works today: in-memory tree model, leaf movement rules, branching behavior, and extension/event integration.\n\n## What this subsystem is\n\nThe session is stored as an append-only entry log, but runtime behavior is tree-based:\n\n- Every non-header entry has `id` and `parentId`.\n- The active position is `leafId` in `SessionManager`.\n- Appending an entry always creates a child of the current leaf.\n- Branching does **not** rewrite history; it only changes where the leaf points before the next append.\n\nKey files:\n\n- `src/session/session-manager.ts` — tree data model, traversal, leaf movement, branch/session extraction\n- `src/session/session-context.ts` — `buildSessionContext` context reconstruction (resolved root→leaf LLM context, compaction/branch-summary replay)\n- `src/session/agent-session.ts` — `/tree` navigation flow, summarization, hook/event emission\n- `src/modes/components/tree-selector.ts` — interactive tree UI behavior and filtering\n- `src/modes/controllers/selector-controller.ts` — selector orchestration for `/tree` and `/branch`\n- `src/slash-commands/builtin-registry.ts` — command routing (`/tree`, `/branch`)\n- `src/modes/controllers/input-controller.ts` — double-escape behavior and `app.session.tree`/`app.session.fork` keybinding wiring\n- `src/session/messages.ts` — conversion of `branch_summary`, `compaction`, and `custom_message` entries into LLM context messages\n\n## Tree data model in `SessionManager`\n\nRuntime indices live in a `SessionEntryIndex` helper, held as `#index` on `SessionManager` and kept in lockstep with the journal array `#entries`:\n\n- `#entriesById: Map<string, SessionEntry>` — fast lookup for any entry\n- `#children: Map<string | null, SessionEntry[]>` — parent→children adjacency\n- `#labels: Map<string, string>` — resolved labels by target entry id\n- `#leaf: string | null` — current position in the tree\n- `#usage` — running usage totals\n\nTree APIs:\n\n- `getBranch(fromId?)` walks parent links to root and returns root→node path\n- `getTree()` returns `SessionTreeNode[]` (`entry`, `children`, `label`)\n - parent links become children arrays\n - entries with missing parents are treated as roots\n - children are sorted oldest→newest by timestamp\n- `getChildren(parentId)` returns direct children\n- `getLabel(id)` resolves current label from the index's `#labels` map\n\n`getTree()` is a runtime projection; persistence remains append-only JSONL entries.\n\n## Leaf movement semantics\n\nThere are three leaf movement primitives:\n\n1. `branch(entryId)`\n - Validates entry exists\n - Sets `leafId = entryId`\n - No new entry is written\n\n2. `resetLeaf()`\n - Sets `leafId = null`\n - Next append creates a new root entry (`parentId = null`)\n\n3. `branchWithSummary(branchFromId, summary, details?, fromExtension?)`\n - Accepts `branchFromId: string | null`\n - Sets `leafId = branchFromId`\n - Appends a `branch_summary` entry as child of that leaf\n - When `branchFromId` is `null`, `fromId` is persisted as `\"root\"`\n\n## `/tree` navigation behavior (same session file)\n\n`AgentSession.navigateTree()` is navigation, not file forking.\n\nFlow:\n\n1. Validate target and compute abandoned path (`collectEntriesForBranchSummary`)\n2. Emit `session_before_tree` with `TreePreparation`\n3. Optionally summarize abandoned entries (hook-provided summary or built-in summarizer)\n4. Compute new leaf target:\n - selecting a **user** message: leaf moves to its parent, and message text is returned for editor prefill\n - selecting a **custom_message**: same rule as user message (leaf = parent, text prefills editor)\n - selecting any other entry: leaf = selected entry id\n5. Apply leaf move:\n - with summary: `branchWithSummary(newLeafId, ...)`\n - without summary and `newLeafId === null`: `resetLeaf()`\n - otherwise: `branch(newLeafId)`\n6. Rebuild agent context from new leaf and emit `session_tree`\n\nImportant: summary entries are attached at the **new navigation position**, not on the abandoned branch tail.\n\n## `/branch` behavior (new session file)\n\n`/branch` and `/tree` are intentionally different:\n\n- `/tree` navigates within the current session file.\n- `/branch` creates a new session branch file (or in-memory replacement for non-persistent mode).\n\nUser-facing `/branch` flow (`SelectorController.showUserMessageSelector` → `AgentSession.branch`):\n\n- Branch source must be a **user message**.\n- Selected user text is extracted for editor prefill.\n- If selected user message is root (`parentId === null`): start a new session via `newSession({ parentSession: previousSessionFile })`.\n- Otherwise: `createBranchedSession(selectedEntry.parentId)` to fork history up to the selected prompt boundary.\n\n`SessionManager.createBranchedSession(leafId)` specifics:\n\n- Builds root→leaf path via `getBranch(leafId)`; throws if missing.\n- Excludes existing `label` entries from copied path.\n- Rebuilds fresh label entries from the resolved label map (`labelsInEffect()`) for entries that remain in path.\n- Persistent mode: writes new JSONL file and switches manager to it; returns new file path.\n- In-memory mode: replaces in-memory entries; returns `undefined`.\n\n## Context reconstruction and summary/custom integration\n\n`buildSessionContext()` (in `session-context.ts`, exposed via `SessionManager.buildSessionContext()`) resolves the active root→leaf path and builds effective LLM context state:\n\n- Tracks latest thinking/model/service-tier/mode/TTSR/MCP-selection state on path.\n- Handles latest compaction on path:\n - emits compaction summary first\n - replays kept messages from `firstKeptEntryId` to compaction point\n - then replays post-compaction messages\n- Includes `branch_summary` and `custom_message` entries as `AgentMessage` objects.\n\n`session/messages.ts` then maps these message types for model input:\n\n- `branchSummary` and `compactionSummary` become user-role templated context messages\n- `custom`/`hookMessage` become developer-role content messages (via agent-core's `convertMessageToLlm`)\n\nSo tree movement changes context by changing the active leaf path, not by mutating old entries.\n\n## Labels and tree UI behavior\n\nLabel persistence:\n\n- `appendLabelChange(targetId, label?)` writes `label` entries on the current leaf chain.\n- `#labels` (in `SessionEntryIndex`) is updated immediately (set or delete).\n- `getTree()` resolves current label onto each returned node.\n\nTree selector behavior (`tree-selector.ts`):\n\n- Flattens tree for navigation, keeps active-path highlighting, and prioritizes displaying the active branch first.\n- Supports filter modes: `default`, `no-tools`, `user-only`, `labeled-only`, `all`.\n - `default` suppresses `label`, `custom`, `model_change`, and `thinking_level_change`; it is not a complete \"hide all internal entries\" filter.\n- Supports free-text search over rendered semantic content.\n- `Shift+L` opens inline label editing and writes via `appendLabelChange`.\n\nCommand routing:\n\n- `/tree` always opens tree selector.\n- `/branch` opens user-message selector unless `doubleEscapeAction=tree`, in which case it also uses tree selector UX.\n\n## Extension and hook touchpoints for tree operations\n\nCommand-time extension API (`ExtensionCommandContext`):\n\n- `branch(entryId)` — create branched session file\n- `navigateTree(targetId, { summarize? })` — move within current tree/file\n\nEvents around tree navigation:\n\n- `session_before_tree`\n - receives `TreePreparation`:\n - `targetId`\n - `oldLeafId`\n - `commonAncestorId`\n - `entriesToSummarize`\n - `userWantsSummary`\n - may cancel navigation\n - may provide summary payload used instead of built-in summarizer\n - receives abort `signal` (Escape cancellation path)\n- `session_tree`\n - emits `newLeafId`, `oldLeafId`\n - includes `summaryEntry` when a summary was created\n - `fromExtension` indicates summary origin\n\nAdjacent but related lifecycle hooks:\n\n- `session_before_branch` / `session_branch` for `/branch` flow\n- `session_before_compact`, `session.compacting`, `session_compact` for compaction entries that later affect tree-context reconstruction\n\n## Real constraints and edge conditions\n\n- `branch()` cannot target `null`; use `resetLeaf()` for root-before-first-entry state.\n- `branchWithSummary()` supports `null` target and records `fromId: \"root\"`.\n- Selecting current leaf in tree selector is a no-op.\n- Summarization requires an active model; if absent, summarize navigation fails fast.\n- If summarization is aborted, navigation is cancelled and leaf is unchanged.\n- In-memory sessions never return a branch file path from `createBranchedSession`.\n- Tree context reconstruction includes service-tier and MCP tool-selection state, but those entries do not become LLM messages.\n\n## Plan approval session naming\n\nWhen a user approves a plan from plan mode (`InteractiveMode.#approvePlan`), the approval handler seeds the session name from the plan's title so the resulting (fresh or compacted) session does not stay unnamed.\n\nTrigger:\n\n- Plan approval reaches `#approvePlan(...)` with `options.title` populated from the plan-approval details.\n- This runs for every approval choice (`Approve and execute`, `Approve and compact context`, `Approve and keep context`); the synthetic `plan-approved` prompt is what otherwise bypasses the input-controller's title-generation path.\n\nNaming source:\n\n- The normalized plan title is humanized via `humanizePlanTitle(title)` (`packages/coding-agent/src/plan-mode/approved-plan.ts`):\n - replaces runs of `-`/`_` with a single space\n - trims whitespace\n - capitalizes the first character\n - returns `\"\"` for whitespace-only / separator-only input\n- The humanized name is applied only when the current session has no name (`!sessionManager.getSessionName()`). It then calls `sessionManager.setSessionName(name, \"auto\")`, which also refuses to overwrite user-named sessions.\n- On successful apply, the terminal title (`setSessionTerminalTitle`) and the editor border color are refreshed to reflect the new name.\n\nExamples (from `humanizePlanTitle`):\n\n- `migrate-mcp-loader` → `Migrate mcp loader`\n- `fix_session_naming` → `Fix session naming`\n- `foo--bar__baz` → `Foo bar baz`\n- `RefactorRouter` → `RefactorRouter` (no separators to expand)\n- `\"\"` / `\"---\"` → `\"\"` (no name applied)\n\n## Legacy compatibility still present\n\nSession migrations still run on load:\n\n- v1→v2 adds `id`/`parentId` and converts compaction index anchor to id anchor\n- v2→v3 migrates legacy `hookMessage` role to `custom`\n\nCurrent runtime behavior is version-3 tree semantics after migration.\n",
63
65
  "session.md": "# Session Storage and Entry Model\n\nThis document is the source of truth for how coding-agent sessions are represented, persisted, migrated, and reconstructed at runtime.\n\n## Scope\n\nCovers:\n\n- Session JSONL format and versioning\n- Entry taxonomy and tree semantics (`id`/`parentId` + leaf pointer)\n- Migration/compatibility behavior when loading old or malformed files\n- Context reconstruction (`buildSessionContext`)\n- Persistence guarantees, failure behavior, truncation/blob externalization\n- Storage abstractions (`FileSessionStorage`, `MemorySessionStorage`) and related utilities\n\nDoes not cover `/tree` UI rendering behavior beyond semantics that affect session data.\n\n## Implementation Files\n\n- [`src/session/session-manager.ts`](../packages/coding-agent/src/session/session-manager.ts) — orchestration: tree/leaf, appends, persistence, blobs, lifecycle factories\n- [`src/session/session-entries.ts`](../packages/coding-agent/src/session/session-entries.ts) — entry/header types, `SessionEntry` union, `CURRENT_SESSION_VERSION`\n- [`src/session/session-migrations.ts`](../packages/coding-agent/src/session/session-migrations.ts) — version migrations\n- [`src/session/session-loader.ts`](../packages/coding-agent/src/session/session-loader.ts) — file load + blob-ref resolution\n- [`src/session/session-context.ts`](../packages/coding-agent/src/session/session-context.ts) — `buildSessionContext`\n- [`src/session/session-persistence.ts`](../packages/coding-agent/src/session/session-persistence.ts) — truncation + image blob externalization\n- [`src/session/session-paths.ts`](../packages/coding-agent/src/session/session-paths.ts) — on-disk layout, dir encoding, terminal breadcrumbs\n- [`src/session/session-listing.ts`](../packages/coding-agent/src/session/session-listing.ts) — discovery (list/recent/resolve)\n- [`src/session/session-storage.ts`](../packages/coding-agent/src/session/session-storage.ts) — storage abstractions\n- [`src/session/messages.ts`](../packages/coding-agent/src/session/messages.ts) — custom-message transformers\n- [`src/session/blob-store.ts`](../packages/coding-agent/src/session/blob-store.ts) — content-addressed blob store\n- [`src/session/history-storage.ts`](../packages/coding-agent/src/session/history-storage.ts) — prompt history (separate subsystem)\n\n## On-Disk Layout\n\nDefault session file location:\n\n```text\n~/.omp/agent/sessions/<dir-encoded>/<timestamp>_<sessionId>.jsonl\n```\n\n`<dir-encoded>` depends on where the canonicalized cwd lives:\n\n- inside the home directory: `-<relative-path>` with `/`, `\\\\`, and `:` replaced by `-` (bare `-` for home itself)\n- inside the OS temp root: `-tmp-<relative-path>` with the same replacement\n- anywhere else: legacy absolute form `--<cwd-without-leading-slash-with-same-replacement>--`\n\nOld `--<home-encoded>-*--` directories are migrated to the new home-relative names once per sessions root on first access (best-effort).\n\nBlob store location:\n\n```text\n~/.omp/agent/blobs/<sha256>\n```\n\nTerminal breadcrumb files are written under:\n\n```text\n~/.omp/agent/terminal-sessions/<terminal-id>\n```\n\nBreadcrumb content is two lines: original cwd, then session file path. `continueRecent()` prefers this terminal-scoped pointer before scanning most-recent mtime.\n\n## File Format\n\nSession files are JSONL: one JSON object per line.\n\n- Line 1 is always the session header (`type: \"session\"`).\n- Remaining lines are `SessionEntry` values.\n- Entries are append-only at runtime; branch navigation moves a pointer (`leafId`) rather than mutating existing entries.\n\n### Header (`SessionHeader`)\n\n```json\n{\n \"type\": \"session\",\n \"version\": 3,\n \"id\": \"1f9d2a6b9c0d1234\",\n \"timestamp\": \"2026-02-16T10:20:30.000Z\",\n \"cwd\": \"/work/pi\",\n \"title\": \"optional session title\",\n \"titleSource\": \"auto\",\n \"parentSession\": \"optional lineage marker\"\n}\n```\n\nNotes:\n\n- `version` is optional in v1 files; absence means v1.\n- `parentSession` is an opaque lineage string. Current code writes either a session id or a session path depending on flow (`fork`, `forkFrom`, `createBranchedSession`, or explicit `newSession({ parentSession })`). Treat as metadata, not a typed foreign key.\n\n### Entry Base (`SessionEntryBase`)\n\nAll non-header entries include:\n\n```json\n{\n \"type\": \"...\",\n \"id\": \"8-char-id\",\n \"parentId\": \"previous-or-branch-parent\",\n \"timestamp\": \"2026-02-16T10:20:30.000Z\"\n}\n```\n\n`parentId` can be `null` for a root entry (first append, or after `resetLeaf()`).\n\n## Entry Taxonomy\n\n`SessionEntry` is the union of:\n\n- `message`\n- `thinking_level_change`\n- `model_change`\n- `service_tier_change`\n- `compaction`\n- `branch_summary`\n- `custom`\n- `custom_message`\n- `label`\n- `ttsr_injection`\n- `session_init`\n- `mode_change`\n- `mcp_tool_selection`\n\n### `message`\n\nStores an `AgentMessage` directly.\n\n```json\n{\n \"type\": \"message\",\n \"id\": \"a1b2c3d4\",\n \"parentId\": null,\n \"timestamp\": \"2026-02-16T10:21:00.000Z\",\n \"message\": {\n \"role\": \"assistant\",\n \"provider\": \"anthropic\",\n \"model\": \"claude-sonnet-4-5\",\n \"content\": [{ \"type\": \"text\", \"text\": \"Done.\" }],\n \"usage\": {\n \"input\": 100,\n \"output\": 20,\n \"cacheRead\": 0,\n \"cacheWrite\": 0,\n \"cost\": {\n \"input\": 0,\n \"output\": 0,\n \"cacheRead\": 0,\n \"cacheWrite\": 0,\n \"total\": 0\n }\n },\n \"timestamp\": 1760000000000\n }\n}\n```\n\n### `model_change`\n\n```json\n{\n \"type\": \"model_change\",\n \"id\": \"b1c2d3e4\",\n \"parentId\": \"a1b2c3d4\",\n \"timestamp\": \"2026-02-16T10:21:30.000Z\",\n \"model\": \"openai/gpt-4o\",\n \"role\": \"default\"\n}\n```\n\n`role` is optional; missing is treated as `default` in context reconstruction.\n\n### `service_tier_change`\n\n```json\n{\n \"type\": \"service_tier_change\",\n \"id\": \"c1d2e3f4\",\n \"parentId\": \"b1c2d3e4\",\n \"timestamp\": \"2026-02-16T10:21:45.000Z\",\n \"serviceTier\": \"flex\"\n}\n```\n\n`serviceTier` can also be `null`.\n\n### `thinking_level_change`\n\n```json\n{\n \"type\": \"thinking_level_change\",\n \"id\": \"c1d2e3f4\",\n \"parentId\": \"b1c2d3e4\",\n \"timestamp\": \"2026-02-16T10:22:00.000Z\",\n \"thinkingLevel\": \"high\"\n}\n```\n\n### `compaction`\n\n```json\n{\n \"type\": \"compaction\",\n \"id\": \"d1e2f3a4\",\n \"parentId\": \"c1d2e3f4\",\n \"timestamp\": \"2026-02-16T10:23:00.000Z\",\n \"summary\": \"Conversation summary\",\n \"shortSummary\": \"Short recap\",\n \"firstKeptEntryId\": \"a1b2c3d4\",\n \"tokensBefore\": 42000,\n \"details\": { \"readFiles\": [\"src/a.ts\"] },\n \"preserveData\": { \"hookState\": true },\n \"fromExtension\": false\n}\n```\n\n### `branch_summary`\n\n```json\n{\n \"type\": \"branch_summary\",\n \"id\": \"e1f2a3b4\",\n \"parentId\": \"a1b2c3d4\",\n \"timestamp\": \"2026-02-16T10:24:00.000Z\",\n \"fromId\": \"a1b2c3d4\",\n \"summary\": \"Summary of abandoned path\",\n \"details\": { \"note\": \"optional\" },\n \"fromExtension\": true\n}\n```\n\nIf branching from root (`branchFromId === null`), `fromId` is the literal string `\"root\"`.\n\n### `custom`\n\nExtension state persistence; ignored by `buildSessionContext`.\n\n```json\n{\n \"type\": \"custom\",\n \"id\": \"f1a2b3c4\",\n \"parentId\": \"e1f2a3b4\",\n \"timestamp\": \"2026-02-16T10:25:00.000Z\",\n \"customType\": \"my-extension\",\n \"data\": { \"state\": 1 }\n}\n```\n\n### `custom_message`\n\nExtension-provided message that does participate in LLM context. `content` can be a string or text/image content blocks, and `attribution` records whether the user or agent initiated it.\n\n```json\n{\n \"type\": \"custom_message\",\n \"id\": \"a2b3c4d5\",\n \"parentId\": \"f1a2b3c4\",\n \"timestamp\": \"2026-02-16T10:26:00.000Z\",\n \"customType\": \"my-extension\",\n \"content\": \"Injected context\",\n \"display\": true,\n \"details\": { \"debug\": false },\n \"attribution\": \"agent\"\n}\n```\n\n### `label`\n\n```json\n{\n \"type\": \"label\",\n \"id\": \"b2c3d4e5\",\n \"parentId\": \"a2b3c4d5\",\n \"timestamp\": \"2026-02-16T10:27:00.000Z\",\n \"targetId\": \"a1b2c3d4\",\n \"label\": \"checkpoint\"\n}\n```\n\n`label: undefined` clears a label for `targetId`.\n\n### `ttsr_injection`\n\n```json\n{\n \"type\": \"ttsr_injection\",\n \"id\": \"c2d3e4f5\",\n \"parentId\": \"b2c3d4e5\",\n \"timestamp\": \"2026-02-16T10:28:00.000Z\",\n \"injectedRules\": [\"ruleA\", \"ruleB\"]\n}\n```\n\n### `mcp_tool_selection`\n\n```json\n{\n \"type\": \"mcp_tool_selection\",\n \"id\": \"d2e3f4a5\",\n \"parentId\": \"c2d3e4f5\",\n \"timestamp\": \"2026-02-16T10:28:30.000Z\",\n \"selectedToolNames\": [\"server.tool\"]\n}\n```\n\n### `session_init`\n\n```json\n{\n \"type\": \"session_init\",\n \"id\": \"d2e3f4a5\",\n \"parentId\": \"c2d3e4f5\",\n \"timestamp\": \"2026-02-16T10:29:00.000Z\",\n \"systemPrompt\": \"...\",\n \"task\": \"...\",\n \"tools\": [\"read\", \"edit\"],\n \"outputSchema\": { \"type\": \"object\" },\n \"spawns\": \"*\",\n \"readSummarize\": false\n}\n```\n\n### `mode_change`\n\n```json\n{\n \"type\": \"mode_change\",\n \"id\": \"e2f3a4b5\",\n \"parentId\": \"d2e3f4a5\",\n \"timestamp\": \"2026-02-16T10:30:00.000Z\",\n \"mode\": \"plan\",\n \"data\": { \"planFile\": \"/tmp/plan.md\" }\n}\n```\n\n## Versioning and Migration\n\nCurrent session version: `3`.\n\n### v1 -> v2\n\nApplied when header `version` is missing or `< 2`:\n\n- Adds `id` and `parentId` to each non-header entry.\n- Reconstructs a linear parent chain using file order.\n- Migrates compaction field `firstKeptEntryIndex` -> `firstKeptEntryId` when present.\n- Sets header `version = 2`.\n\n### v2 -> v3\n\nApplied when header `version < 3`:\n\n- For `message` entries: rewrites legacy `message.role === \"hookMessage\"` to `\"custom\"`.\n- Sets header `version = 3`.\n\n### Migration Trigger and Persistence\n\n- Migrations run during session load (`setSessionFile`).\n- If any migration ran, the session is flagged for a full rewrite (`#rewriteRequired`) rather than rewritten immediately.\n- Migration mutates in-memory entries first; the flagged rewrite persists the updated JSONL on the next write (a synchronous full rewrite on the next append).\n\n## Load and Compatibility Behavior\n\n`loadEntriesFromFile(path)` behavior:\n\n- Missing file (`ENOENT`) -> returns `[]`.\n- Non-parseable lines are handled by lenient JSONL parser (`parseJsonlLenient`).\n- If first parsed entry is not a valid session header (`type !== \"session\"` or missing string `id`) -> returns `[]`.\n\n`SessionManager.setSessionFile()` behavior:\n\n- `[]` from loader is treated as empty/nonexistent session and replaced with a new initialized session file at that path.\n- Valid files are loaded, migrated if needed, blob refs resolved, then indexed.\n\n## Tree and Leaf Semantics\n\nThe underlying model is append-only tree + mutable leaf pointer:\n\n- Every append method creates exactly one new entry whose `parentId` is current `leafId`.\n- The new entry becomes the new `leafId`.\n- `branch(entryId)` moves only `leafId`; existing entries remain unchanged.\n- `resetLeaf()` sets `leafId = null`; next append creates a new root entry (`parentId: null`).\n- `branchWithSummary()` sets leaf to branch target and appends a `branch_summary` entry.\n\n`getEntries()` returns all non-header entries in insertion order. Existing entries are not deleted in normal operation; rewrites preserve logical history while updating representation (migrations, move, targeted rewrite helpers).\n\n## Context Reconstruction (`buildSessionContext`)\n\n`buildSessionContext(entries, leafId?, byId?, options?)` resolves what is sent to the model. Passing `options.transcript: true` instead builds the full-history display transcript (compactions emitted inline at the position they fired) — display-only, never sent to a provider.\n\nAlgorithm:\n\n1. Determine leaf:\n - `leafId === null` -> return empty context.\n - explicit `leafId` -> use that entry if found.\n - otherwise fallback to last entry.\n2. Walk `parentId` chain from leaf to root and reverse to root->leaf path.\n3. Derive runtime state across path:\n - `thinkingLevel` from latest `thinking_level_change` (default `\"off\"`)\n - `serviceTier` from latest `service_tier_change`\n - model map from `model_change` entries (`role ?? \"default\"`)\n - fallback `models.default` from assistant message provider/model if no explicit model change\n - deduplicated `injectedTtsrRules` from all `ttsr_injection` entries\n - selected MCP discovery tools from latest `mcp_tool_selection`\n - mode/modeData from latest `mode_change` (default mode `\"none\"`)\n4. Build message list:\n - `message` entries pass through\n - `custom_message` entries become `custom` AgentMessages via `createCustomMessage`\n - `branch_summary` entries become `branchSummary` AgentMessages via `createBranchSummaryMessage`\n - if a `compaction` exists on path:\n - emit compaction summary first (`createCompactionSummaryMessage`)\n - emit path entries starting at `firstKeptEntryId` up to the compaction boundary\n - emit entries after the compaction boundary\n\n`custom`, `session_init`, `service_tier_change`, `mcp_tool_selection`, and `ttsr_injection` entries do not inject model context directly.\n\n## Persistence Guarantees and Failure Model\n\n### Persist vs in-memory\n\n- `SessionManager.create/open/continueRecent/forkFrom` -> persistent mode (`persist = true`).\n- `SessionManager.inMemory` -> non-persistent mode (`persist = false`) with `MemorySessionStorage`.\n\n### Write pipeline\n\nAppends are written synchronously in-body through a `SessionStorageWriter` (from `storage.openWriter`), so an entry is durable the instant the append returns. Async disk work (flush, close, atomic rewrite) is serialized through an internal promise chain (`#diskTail`); appends bypass it.\n\n- `append*` updates in-memory state immediately.\n- Persistence is deferred until at least one assistant message exists.\n - Before first assistant: entries are retained in memory; no file append occurs.\n - When first assistant exists: full in-memory session is flushed to file.\n - Afterwards: new entries append incrementally.\n\nRationale in code: avoid persisting sessions that never produced an assistant response.\n\n### Durability operations\n\n- `flush()` drains the async disk chain and the open writer's queued appends (no `fsync`); `flushSync()` performs a synchronous full rewrite for exit paths that cannot await.\n- Atomic full rewrites (`#rewriteAtomically`) delegate to `storage.writeTextAtomic`: temp-write then rename over the target (with an EPERM-safe move-aside fallback).\n- Used for `setSessionName`, `rewriteEntries` (tool-output pruning/supersede passes), and move/fork operations. Load-time migrations and other in-memory divergence (`#rewriteRequired`) instead trigger a synchronous full rewrite (`#rewriteSynchronously`) on the next persist.\n\n### Error behavior\n\n- Persistence errors are latched (`#diskFailure`) and rethrown on subsequent operations.\n- First error is logged once with session file context.\n- Writer close is best-effort but propagates the first meaningful error.\n\n## Data Size Controls and Blob Externalization\n\nBefore persisting entries:\n\n- Large strings are truncated to `MAX_PERSIST_CHARS` (500,000 chars) with notice:\n - `\"[Session persistence truncated large content]\"`\n- Transient fields `partialJson` and `jsonlEvents` are removed.\n- If object has both `content` and `lineCount`, line count is recomputed after truncation.\n- Image blocks in `content` arrays with base64 length >= 1024 are externalized to blob refs:\n - stored as `blob:sha256:<hash>`\n - raw bytes written to blob store (`BlobStore.put`)\n\nOn load, blob refs are resolved back to base64 for message/custom_message image blocks.\n\n## Storage Abstractions\n\n`SessionStorage` interface provides all filesystem operations used by `SessionManager`:\n\n- sync: `ensureDirSync`, `existsSync`, `writeTextSync`, `statSync`, `listFilesSync`\n- async: `exists`, `readText`, `readTextSlices`, `writeText`, `writeTextAtomic`, `rename`, `unlink`, `deleteSessionWithArtifacts`, `openWriter`\n\nImplementations:\n\n- `FileSessionStorage`: real filesystem (Bun + node fs)\n- `MemorySessionStorage`: map-backed in-memory implementation for tests/non-persistent sessions\n\n`SessionStorageWriter` exposes `append`, `flush`, `isOpen`, `close`, `getError`.\n\n## Session Discovery Utilities\n\nDiscovery helpers live in `session-listing.ts`; `SessionManager` re-exposes the project-scoped lists as thin static wrappers:\n\n- `getRecentSessions(sessionDir, limit?)` -> lightweight metadata for UI/session picker, capped by `limit` (default 4)\n- `findMostRecentSession(sessionDir)` -> newest by mtime\n- `listSessions(sessionDir, storage)` (a.k.a. `SessionManager.list(cwd, sessionDir?)`) -> sessions in one project scope\n- `listAllSessions(storage)` (a.k.a. `SessionManager.listAll()`) -> sessions across all project scopes under `~/.omp/agent/sessions`\n- `resolveResumableSession(sessionArg, cwd, sessionDir?)` -> local then global resume/fork target lookup\n\nMetadata extraction for `getRecentSessions` reads a prefix via `readTextSlices(..., 4096, 0)`. `listSessions`/`listAllSessions` read a 4KB prefix plus a bounded 32 KiB tail through one `readTextSlices(...)` call per file, using the prefix for metadata and the tail for lifecycle status. Resume matching is case-insensitive and accepts session id prefixes, full filename prefixes, or the id suffix after the timestamp in `<timestamp>_<sessionId>.jsonl`.\n\n## Related but Distinct: Prompt History Storage\n\n`HistoryStorage` (`history-storage.ts`) is a separate SQLite subsystem for prompt recall/search, not session replay.\n\n- DB: `~/.omp/agent/history.db`\n- Table: `history(id, prompt, created_at, cwd, session_id)`\n- FTS5 index: `history_fts` with trigger-maintained sync\n- Deduplicates consecutive identical prompts using in-memory last-prompt cache\n- Inserts are batched through an async drain queue (~100 ms delay) so prompt capture does not block turn execution\n\nUse session files for conversation graph/state replay; use `HistoryStorage` for prompt history UX.\n",
64
- "settings.md": "# Settings\n\n`omp` resolves settings from built-in defaults, a persistent global config file, optional project-local config, one-shot CLI overlays, and in-memory runtime overrides. Reach for project settings when one repository needs a different provider set, model role, tool policy, memory backend, or UI behavior than your global defaults — without touching your machine-wide configuration.\n\nSettings are stored as plain YAML mappings. Every key, its type, default, and enum values come from the settings schema, and you can inspect or change any of them with `omp config` or the interactive `/settings` panel.\n\n- For model/provider credentials, `.env` files, and the env-var table that resolves API keys, see [Providers](./providers.md).\n- For custom model definitions in `models.yml`, see [Models](./models.md).\n- For instruction files discovered into the agent context (`AGENTS.md`, `.omp/`, etc.), see [Context files](./context-files.md).\n- For the full catalog of environment variables, see [Environment variables](./environment-variables.md).\n\n## Where settings live\n\n| Scope | Path | Read behavior | Write behavior |\n|---|---|---|---|\n| Global | `~/.omp/agent/config.yml` | The main persistent settings file. Always loaded. | `/settings`, `omp config set`, and `omp config reset` write here. |\n| Global legacy | `~/.omp/agent/settings.json` | Migrated into `config.yml` once, only when `config.yml` does not yet exist. | Not written after migration; the original is renamed to `settings.json.bak`. |\n| Project | `<cwd>/.omp/config.yml` (plus `.omp/settings.json`) | Loaded when the process working directory has a non-empty `.omp/`. | Read-only from settings commands; edit the file by hand. |\n| Project legacy | `<cwd>/.omp/settings.json` | Still read; project `config.yml` is merged on top of it. | Not written by settings commands. |\n| CLI overlay | Any file passed with `--config <file>` | Loaded after global and project settings, for that one process. Repeatable. | Never persisted. |\n| Runtime overrides | In-memory only | Set by dedicated CLI flags (`--model`, `--approval-mode`, …) and feature env vars. | Never persisted. |\n\n`PI_CODING_AGENT_DIR` relocates the `~/.omp/agent` base directory. When it is set, the global `config.yml`, the auth store (`agent.db`), and everything else under the agent directory move with it. Use `omp config path` to print the active agent directory.\n\nNative project settings are intentionally scoped to the process working directory's `.omp/` folder — settings discovery does **not** walk ancestor directories looking for the nearest `.omp/`. Other discovery providers (Claude, Codex, Gemini, Cursor, OpenCode) can also contribute project-level settings from their own files; those are read-only from `omp` settings commands and can be turned off by provider id (see [Provider and source disabling](#provider-and-source-disabling)).\n\n## Config file formats\n\nThe global `config.yml` is always YAML. The generic config loader used for other files (for example `models.yml`) accepts `.yml`, `.yaml`, `.json`, and `.jsonc`:\n\n- When a `.yml`/`.yaml` path is requested and only a sibling `.json` exists, it is migrated to YAML automatically (idempotent, once per process).\n- `.json` and `.jsonc` configs are read as-is, with no migration.\n- A file whose top level is not a mapping (a bare array or scalar) is treated as empty for persistent settings, and is a hard error for `--config` overlays.\n\n## Reading and writing settings\n\nUse the interactive `/settings` panel inside a session, or the `omp config` command from a shell. Both operate on the merged effective settings, but every persistent write lands in the **global** file only.\n\n```bash\nomp config list # all settings with current effective values\nomp config list --json # same, machine-readable\nomp config get theme.dark # one value\nomp config get theme.dark --json\nomp config set compaction.enabled false\nomp config set defaultThinkingLevel medium\nomp config reset steeringMode # restore a key to its schema default\nomp config path # print the active agent directory\n```\n\n### Subcommands\n\n| Command | Effect |\n|---|---|\n| `omp config list` | Print every setting grouped by tab, with its current value and type. `--json` emits an object keyed by setting path with `{ value, type, description }`. |\n| `omp config get <key>` | Print the effective value of one key. Unknown keys exit non-zero. `--json` emits `{ key, value, type, description }`. |\n| `omp config set <key> <value>` | Parse `<value>` against the key's schema type and write it to the global `config.yml`. |\n| `omp config reset <key>` | Write the key's schema **default** back to the global config (this persists the default, it does not delete the key). |\n| `omp config path` | Print the active agent directory (honors `PI_CODING_AGENT_DIR`). |\n\n`omp config` with no subcommand, or `--help`, prints the help and lists settings. The `--json` flag is accepted by `list`, `get`, `set`, and `reset`.\n\n### Value parsing\n\n`omp config set` parses the value string according to the target key's schema type. The string is trimmed first.\n\n| Type | Accepted input | Notes |\n|---|---|---|\n| boolean | `true`, `false`, `yes`, `no`, `on`, `off`, `1`, `0` | Case-insensitive. Anything else is rejected. |\n| number | Any finite JavaScript number | `Infinity`/`NaN` are rejected. |\n| enum | One of the key's allowed values | Must match exactly; the error lists the valid values. |\n| array | A JSON array | e.g. `'[\"anthropic\",\"openai\"]'`. Must parse and be an array. |\n| record | A JSON object | e.g. `'{\"bash\":\"prompt\"}'`. Must parse and be a non-array object. |\n| string | Stored as given (trimmed) | Multi-word values are joined with spaces. |\n\nKeys must match a real schema path exactly. There is no shorthand — set `theme.dark`, not `theme`.\n\n### Where writes go\n\n`omp config set`, `omp config reset`, `/settings`, and any runtime settings change all write to the global `config.yml` under the active agent directory. They never write to `<cwd>/.omp/config.yml`. To create a project-local override, edit that file directly (see [Project-local config](#project-local-config)). Saves are debounced and re-read the file under a lock, so external edits made while a session is open are preserved.\n\n## Precedence\n\nFrom lowest to highest priority, the effective value of a setting is built as:\n\n```text\nbuilt-in defaults <- global config <- project config <- CLI overlays <- runtime overrides\n```\n\nFrom highest to lowest:\n\n1. **Runtime overrides** — dedicated CLI flags and feature env vars applied in memory for the current process: `--model`, `--smol`, `--slow`, `--plan`, `--approval-mode`, `--auto-approve`/`--yolo`, `--hide-thinking`, `--advisor`, `--no-pty`, `--api-key`, and protocol-mode defaults. Never persisted.\n2. **CLI config overlays** — each `--config <file>`; later overlay files override earlier ones.\n3. **Project settings** — `<cwd>/.omp/settings.json` then `<cwd>/.omp/config.yml` (and contributions from other discovery providers at project level).\n4. **Global settings** — `~/.omp/agent/config.yml`.\n5. **Built-in defaults** — from the settings schema.\n\nA key that is unset at every layer resolves to its schema default at read time.\n\n### Environment overrides\n\nEnvironment variables are **not** a single settings layer. Each is read by the feature that owns the value, usually as a per-machine override or fallback, and is never written back to `config.yml`. The ones that map directly onto a setting:\n\n| Env var | Overrides setting | Notes |\n|---|---|---|\n| `PI_SMOL_MODEL` | `modelRoles.smol` | Also exposed as `--smol`. |\n| `PI_SLOW_MODEL` | `modelRoles.slow` | Also exposed as `--slow`. |\n| `PI_PLAN_MODEL` | `modelRoles.plan` | Also exposed as `--plan`. |\n| `PI_NO_PTY=1` | (disables PTY bash) | Equivalent to `--no-pty` for the process. |\n| `PI_PY` | `eval.py` | `PI_PY=0` disables the Python eval backend. |\n| `PI_JS` | `eval.js` | `PI_JS=0` disables the JavaScript eval backend. |\n| `PI_TINY_DEVICE` | `providers.tinyModelDevice` | ONNX execution provider for local tiny models. |\n| `PI_TINY_DTYPE` | `providers.tinyModelDtype` | ONNX precision for local tiny models. |\n| `OMP_AUTH_BROKER_URL` | `auth.broker.url` | Env value takes precedence over config. |\n| `OMP_AUTH_BROKER_TOKEN` | `auth.broker.token` | Env value takes precedence over config. |\n| `PI_CODING_AGENT_DIR` | (relocates agent dir) | Moves `config.yml`, `agent.db`, and the whole agent base. |\n\nProvider API keys are resolved separately (stored auth, OAuth, `models.yml`, environment, and `.env` files); see [Providers](./providers.md) and the full [Environment variables](./environment-variables.md) reference.\n\n## Merge rules\n\nLayers are combined with a deep merge:\n\n- **Objects are deep-merged** — keys present only in a lower layer are kept; keys present in a higher layer override.\n- **Scalars and arrays are replaced wholesale** by the higher-precedence layer. A higher layer's array does not append to a lower layer's array.\n\nUse nested YAML mappings for dotted setting paths:\n\n```yaml\ntheme:\n dark: titanium\n light: light\n\ntools:\n approvalMode: write\n approval:\n bash: prompt\n read: allow\n```\n\n### Worked example: global vs. project\n\n```yaml\n# ~/.omp/agent/config.yml\ntools:\n approvalMode: write\n approval:\n bash: prompt\n read: allow\ndisabledProviders:\n - anthropic\n - openai\n - gemini\n\n# <repo>/.omp/config.yml\ntools:\n approval:\n bash: allow\ndisabledProviders:\n - groq\n```\n\nEffective settings inside `<repo>`:\n\n```yaml\ntools:\n approvalMode: write # kept from global (object deep-merge)\n approval:\n bash: allow # overridden by project\n read: allow # kept from global\ndisabledProviders:\n - groq # project array REPLACES the global array\n```\n\nArray replacement is the most common surprise: the project's `disabledProviders` does not extend the global list — it becomes the entire list for that project. The same applies to `enabledModels`, `cycleOrder`, `extensions`, and every other array-typed setting.\n\n## Project-local config\n\nCreate `<repo>/.omp/config.yml` when a repository needs its own settings:\n\n```yaml\n# <repo>/.omp/config.yml\nmodelRoles:\n default: anthropic/claude-sonnet-4-5\n smol: openai/gpt-4.1-mini\n slow: anthropic/claude-opus-4-5:high\n\ntools:\n approvalMode: write\n approval:\n bash: prompt\n\ncompaction:\n strategy: context-full\n thresholdPercent: 80\n\ntheme:\n dark: titanium\n```\n\nKeep secrets out of committed project config unless your repository policy allows it. Prefer environment variables, stored auth, an auth broker, or an untracked `--config` overlay for credentials.\n\n### One-shot overlays\n\nUse `--config` for a temporary layer that should not persist:\n\n```bash\nomp --config ./local/ci-settings.yml \"check this failure\"\nomp --config ./base.yml --config ./experiment.yml \"try this model\"\n```\n\nOverlay paths are resolved relative to the process working directory (and `~` is expanded). Each overlay must parse as a YAML mapping; a missing file, invalid YAML, or a top-level array/scalar is a hard error — it does **not** silently fall back to lower-precedence settings.\n\n## Path-scoped arrays\n\nTwo array settings — `enabledModels` and `disabledProviders` — accept path-scoped entries in addition to bare strings, so a single global config can behave differently per directory:\n\n```yaml\nenabledModels:\n - claude-sonnet-4-5 # applies everywhere\n - path: ~/work/high-context\n models:\n - anthropic/claude-opus-4-5\n\ndisabledProviders:\n - ollama # applies everywhere\n - paths:\n - ~/projects/sensitive\n - ~/clients/acme\n providers:\n - anthropic\n - openai\n```\n\nBare string entries apply everywhere. A scoped entry applies when the current working directory **is** the configured path or is **under** it. `~` expands to your home directory and relative paths are resolved before matching.\n\nAccepted **path** keys (any of them, combined): `path`, `paths`, `pathPrefix`, `pathPrefixes`.\n\nAccepted **value** keys:\n\n- `models` (for `enabledModels`) or `providers` (for `disabledProviders`)\n- `values` or `items` (for either setting)\n\nOnly string values are kept; malformed scoped entries are ignored. Path scoping is resolved **after** the layer merge, so it reads the final effective array.\n\n## Provider and source disabling\n\n`disabledProviders` is a single shared id namespace that gates two different subsystems, before any credential check:\n\n| Entry kind | Example ids | Effect |\n|---|---|---|\n| Model providers | `anthropic`, `openai`, `gemini`, `groq`, `ollama`, `openrouter` | Removes those backends from model selection, even when credentials are available. See [Providers](./providers.md). |\n| Discovery sources | `native`, `claude`, `codex`, `gemini`, `github`, `opencode`, `cursor`, `agents-md` | Stops that source from contributing context files, MCP servers, commands, skills, hooks, tools, prompts, or settings. See [Context files](./context-files.md). |\n\nMost provider-control use cases list model provider ids. Disabling the `claude` discovery source is different from disabling the `anthropic` model provider — one stops Claude-format config discovery, the other stops the Anthropic model backend.\n\nBecause arrays replace rather than append, a project that sets `disabledProviders` must list the complete desired set:\n\n```yaml\n# ~/.omp/agent/config.yml\ndisabledProviders:\n - anthropic\n - openai\n\n# <repo>/.omp/config.yml — inside this repo ONLY groq is disabled\ndisabledProviders:\n - groq\n```\n\nThe default is an empty array (nothing disabled). For the two subsystems' provider ids and ordering, see [Providers](./providers.md) and [Context files](./context-files.md).\n\n## Settings catalog\n\nEvery key below is defined in the settings schema; `omp config list` shows the full set with current values. Defaults and enum values are taken from the schema. Settings that accept an env or flag override are noted; those overrides are process-local and not persisted.\n\n### Models\n\n`modelRoles`, `modelTags`, and `cycleOrder` work together to define the models you can switch between. Role values may carry a thinking suffix (`:minimal`, `:low`, `:medium`, `:high`, `:xhigh`).\n\n```yaml\nmodelRoles:\n default: anthropic/claude-sonnet-4-5\n smol: openai/gpt-4.1-mini\n slow: anthropic/claude-opus-4-5:high\n vision: gemini/gemini-3-pro-preview\n plan: anthropic/claude-opus-4-5\n advisor: anthropic/claude-sonnet-4-5:medium\n\ncycleOrder:\n - smol\n - default\n - slow\n\nmodelProviderOrder:\n - anthropic\n - openai\n\nenabledModels:\n - claude-sonnet-4-5\n```\n\n| Key | Type | Default | Notes |\n|---|---|---|---|\n| `modelRoles` | record | `{}` | Map of role name -> model id. Built-in roles: `default`, `smol`, `slow`, `vision`, `plan`, `designer`, `commit`, `title`, `task`, `advisor`. Per-role env/flags exist only for `--model`/`--smol`/`--slow`/`--plan`; configure the advisor with `modelRoles.advisor`. |\n| `modelTags` | record | `{}` | Custom role/tag metadata; can introduce additional roles. |\n| `modelProviderOrder` | array | `[]` | Preferred provider order when a model id is ambiguous. |\n| `cycleOrder` | array | `[\"smol\",\"default\",\"slow\"]` | Roles cycled by the model switcher. |\n| `enabledModels` | array | `[]` | Allow-list of models; supports [path-scoped entries](#path-scoped-arrays). Empty means all available models. |\n| `disabledProviders` | array | `[]` | Disabled model/discovery providers; supports path-scoped entries. See [above](#provider-and-source-disabling). |\n| `includeModelInPrompt` | boolean | `true` | Include the active model name in the system prompt. |\n\nSee [Models](./models.md) for the `models.yml` schema and custom-provider definitions.\n\n### Advisor\n\nThe advisor is a second model that reviews each completed turn and can inject advice into the primary session. Assign a model with `modelRoles.advisor`, then enable it with `advisor.enabled`, `/advisor on`, or by launching with the `--advisor` flag.\n\nSee [Advisor and WATCHDOG.md](./advisor-watchdog.md) for runtime behavior, `WATCHDOG.md` discovery, and bounded catch-up semantics.\n\n| Key | Type | Default | Notes |\n|---|---|---|---|\n| `advisor.enabled` | boolean | `false` | Enable the advisor runtime when `modelRoles.advisor` resolves to an available model. |\n| `advisor.subagents` | boolean | `false` | Also enable advisor runtimes for spawned task/eval subagents. |\n| `advisor.syncBacklog` | enum | `off` | Bounded advisor catch-up delay: `off`, `1`, `3`, or `5`. The primary waits up to 30 seconds only while advisor backlog is at or above the threshold. |\n\n### Thinking\n\n```yaml\ndefaultThinkingLevel: high\nhideThinkingBlock: false\nthinkingBudgets:\n minimal: 1024\n low: 2048\n medium: 8192\n high: 16384\n xhigh: 32768\n```\n\n| Key | Type | Default | Values |\n|---|---|---|---|\n| `defaultThinkingLevel` | enum | `high` | `minimal`, `low`, `medium`, `high`, `xhigh`, `auto`. Override per run with `--thinking`. |\n| `hideThinkingBlock` | boolean | `false` | Hide thinking blocks in output. `--hide-thinking` sets it for the run (display only). |\n| `thinkingBudgets.minimal` | number | `1024` | Token budget for the `minimal` level. |\n| `thinkingBudgets.low` | number | `2048` | Token budget for `low`. |\n| `thinkingBudgets.medium` | number | `8192` | Token budget for `medium`. |\n| `thinkingBudgets.high` | number | `16384` | Token budget for `high`. |\n| `thinkingBudgets.xhigh` | number | `32768` | Token budget for `xhigh`. |\n\n### Sampling\n\nA value of `-1` means \"use the provider/model default\" — `omp` does not send that parameter.\n\n| Key | Type | Default | Notes |\n|---|---|---|---|\n| `temperature` | number | `-1` | Sampling temperature. |\n| `topP` | number | `-1` | Nucleus sampling. |\n| `topK` | number | `-1` | Top-K sampling. |\n| `minP` | number | `-1` | Minimum-probability cutoff. |\n| `presencePenalty` | number | `-1` | Presence penalty. |\n| `repetitionPenalty` | number | `-1` | Repetition penalty. |\n| `serviceTier` | enum | `none` | `none`, `auto`, `default`, `flex`, `scale`, `priority`, `openai-only`, `claude-only`. |\n| `personality` | enum | `default` | `default`, `friendly`, `pragmatic`, `none`. |\n\n### Retry and fallback\n\n```yaml\nretry:\n enabled: true\n maxRetries: 10\n baseDelayMs: 500\n maxDelayMs: 300000\n modelFallback: true\n fallbackRevertPolicy: cooldown-expiry\n```\n\n| Key | Type | Default | Notes |\n|---|---|---|---|\n| `retry.enabled` | boolean | `true` | Retry transient provider errors. |\n| `retry.maxRetries` | number | `10` | Max retries per request. |\n| `retry.baseDelayMs` | number | `500` | Initial backoff. |\n| `retry.maxDelayMs` | number | `300000` | Backoff ceiling (5 min). |\n| `retry.modelFallback` | boolean | `true` | Fall back to another model when one is unavailable. |\n| `retry.fallbackChains` | record | `{}` | Per-model fallback chains. |\n| `retry.fallbackRevertPolicy` | enum | `cooldown-expiry` | `cooldown-expiry`, `never`. |\n\n### Tools and approvals\n\n```yaml\ntools:\n approvalMode: yolo # default\n approval:\n bash: prompt\n edit: allow\n discoveryMode: auto\n maxTimeout: 0\n intentTracing: true\n```\n\n| Key | Type | Default | Notes |\n|---|---|---|---|\n| `tools.approvalMode` | enum | `yolo` | `always-ask` (auto-approve read-only), `write` (auto-approve read + workspace-write), `yolo` (auto-approve all tiers). `--approval-mode` and `--auto-approve`/`--yolo` override per run. |\n| `tools.approval` | record | `{}` | Per-tool policy keyed by tool name; each value is `allow`, `deny`, or `prompt`. e.g. `omp config set tools.approval '{\"bash\":\"prompt\"}'`. |\n| `tools.discoveryMode` | enum | `auto` | `auto`, `off`, `mcp-only`, `all`. Controls dynamic tool discovery. |\n| `tools.essentialOverride` | array | `[]` | Tool names kept available even when tools are narrowed. |\n| `tools.maxTimeout` | number | `0` | Max tool runtime in seconds; `0` = no cap. |\n| `tools.intentTracing` | boolean | `true` | Record per-call intent strings. |\n| `tools.outputMaxColumns` | number | `768` | Per-line byte cap for streaming output; `0` disables. |\n| `tools.artifactSpillThreshold` | number | `50` | KB of tool output above which output spills to an artifact. |\n| `tools.artifactHeadBytes` | number | `20` | KB of head kept inline on spill; `0` = tail-only. |\n| `tools.artifactTailBytes` | number | `20` | KB of tail kept inline on spill. |\n| `tools.artifactTailLines` | number | `500` | Max tail lines kept inline on spill. |\n\nIndividual built-in tools are toggled by their own keys, e.g. `bash.enabled`, `eval.py`, `eval.js`, `find.enabled`, `search.enabled`, `fetch.enabled`, `browser.enabled`, `astEdit.enabled`, `astGrep.enabled`, `web_search.enabled`, `inspect_image.enabled`.\n\n### Shell, eval, and LSP\n\n```yaml\nbash:\n enabled: true\n stripTrailingHeadTail: true\n autoBackground:\n enabled: false\n thresholdMs: 60000\n\neval:\n py: true\n js: true\n\npython:\n kernelMode: session # session, per-call\n interpreter: \"\"\n\nlsp:\n enabled: true\n lazy: true\n diagnosticsOnWrite: true\n diagnosticsOnEdit: false\n formatOnWrite: false\n```\n\n| Key | Type | Default | Notes |\n|---|---|---|---|\n| `bash.enabled` | boolean | `true` | Enable the bash tool. |\n| `bash.stripTrailingHeadTail` | boolean | `true` | Strip trailing head/tail noise from output. |\n| `bash.autoBackground.enabled` | boolean | `false` | Auto-background long-running commands. |\n| `bash.autoBackground.thresholdMs` | number | `60000` | Threshold before auto-backgrounding. |\n| `eval.py` | boolean | `true` | Python eval backend. `PI_PY=0` disables for the process. |\n| `eval.js` | boolean | `true` | JavaScript eval backend. `PI_JS=0` disables for the process. |\n| `python.kernelMode` | enum | `session` | `session` (persistent kernel) or `per-call`. |\n| `python.interpreter` | string | `\"\"` | Path to a Python interpreter; empty = auto-detect. |\n| `lsp.enabled` | boolean | `true` | Language-server integration. `--no-lsp` disables for the run. |\n| `lsp.lazy` | boolean | `true` | Start servers on demand. |\n| `lsp.diagnosticsOnWrite` | boolean | `true` | Run diagnostics after a write. |\n| `lsp.diagnosticsOnEdit` | boolean | `false` | Run diagnostics after an edit. |\n| `lsp.formatOnWrite` | boolean | `false` | Format files on write. |\n| `lsp.diagnosticsDeduplicate` | boolean | `true` | Collapse duplicate diagnostics. |\n| `shellPath` | string | _(unset)_ | Override the shell binary used by bash. |\n\n### Files: editing and reading\n\n```yaml\nedit:\n mode: hashline # apply_patch, hashline, patch, replace\n fuzzyMatch: true\n fuzzyThreshold: 0.95\n blockAutoGenerated: true\n\nread:\n defaultLimit: 300\n toolResultPreview: false\n summarize:\n enabled: true\n prose: false\n```\n\n| Key | Type | Default | Notes |\n|---|---|---|---|\n| `edit.mode` | enum | `hashline` | `apply_patch`, `hashline`, `patch`, `replace`. |\n| `edit.fuzzyMatch` | boolean | `true` | Allow fuzzy anchor matching. |\n| `edit.fuzzyThreshold` | number | `0.95` | Similarity threshold for fuzzy matching. |\n| `edit.blockAutoGenerated` | boolean | `true` | Refuse to edit generated/lockfile-like files. |\n| `edit.streamingAbort` | boolean | `false` | Abort on streaming edit mismatch. |\n| `read.defaultLimit` | number | `300` | Default line count for `read` without a selector. |\n| `read.summarize.enabled` | boolean | `true` | Structural summaries for code reads. |\n| `read.summarize.prose` | boolean | `false` | Summarize prose files too. |\n| `read.toolResultPreview` | boolean | `false` | Inline preview of tool results. |\n| `readHashLines` | boolean | `true` | Show hashline tags in read output. |\n| `readLineNumbers` | boolean | `false` | Show plain line numbers. |\n\n### Context, compaction, and memory\n\n```yaml\ncontextPromotion:\n enabled: true\n\ncompaction:\n enabled: true\n strategy: context-full # context-full, handoff, shake, snapcompact, off\n thresholdPercent: -1 # -1 = default reserve-based behavior\n thresholdTokens: -1 # fixed token limit when > 0\n remoteEnabled: true\n\nmemory:\n backend: off # off, local, hindsight, mnemopi\n```\n\n| Key | Type | Default | Notes |\n|---|---|---|---|\n| `contextPromotion.enabled` | boolean | `true` | Promote relevant earlier context. |\n| `compaction.enabled` | boolean | `true` | Automatic conversation compaction. |\n| `compaction.strategy` | enum | `context-full` | `context-full`, `handoff`, `shake`, `snapcompact`, `off`. |\n| `compaction.thresholdPercent` | number | `-1` | Percent-of-context trigger; `-1` = reserve-based default. |\n| `compaction.thresholdTokens` | number | `-1` | Fixed token trigger when `> 0`. |\n| `compaction.reserveTokens` | number | `16384` | Tokens reserved for the next turn. |\n| `compaction.keepRecentTokens` | number | `20000` | Recent tokens always preserved. |\n| `compaction.remoteEnabled` | boolean | `true` | Allow remote compaction service. |\n| `compaction.autoContinue` | boolean | `true` | Continue automatically after compaction. |\n| `memory.backend` | enum | `off` | `off`, `local`, `hindsight`, `mnemopi`. Each backend has its own `hindsight.*` / `mnemopi.*` / `memories.*` tuning keys. |\n| `autolearn.enabled` | boolean | `false` | Experimental: after the agent stops, nudge it to capture lessons to memory and create/enhance isolated managed skills under `~/.omp/agent/managed-skills`. Enables the `manage_skill` tool (and `learn` when a memory backend is active). |\n| `autolearn.autoContinue` | boolean | `false` | When `autolearn.enabled`, auto-run one capture turn at stop (uses extra tokens). Off = a passive reminder rides your next turn. |\n| `autolearn.minToolCalls` | number | `5` | Only nudge after a turn that used at least this many tools. |\n\n`compaction` has additional tuning keys (idle compaction, supersede/drop heuristics) visible in `omp config list`. See [Compaction](./compaction.md) for the full strategy reference.\n\n### Appearance and terminal\n\n```yaml\ntheme:\n dark: titanium\n light: light\nsymbolPreset: unicode # unicode, nerd, ascii\ncolorBlindMode: false\n\nstatusLine:\n preset: default # default, minimal, compact, full, nerd, ascii, custom\n separator: powerline-thin\n transparent: false\n showHookStatus: true\n\nterminal:\n showImages: true\nimages:\n autoResize: true\n blockImages: false\ntui:\n hyperlinks: auto # off, auto, always\n```\n\n| Key | Type | Default | Values |\n|---|---|---|---|\n| `theme.dark` | string | `titanium` | Theme used on a dark terminal background. |\n| `theme.light` | string | `light` | Theme used on a light terminal background. |\n| `symbolPreset` | enum | `unicode` | `unicode`, `nerd`, `ascii`. |\n| `colorBlindMode` | boolean | `false` | Use blue instead of green for diff additions. |\n| `showHardwareCursor` | boolean | `true` | Show the terminal hardware cursor. |\n| `statusLine.preset` | enum | `default` | `default`, `minimal`, `compact`, `full`, `nerd`, `ascii`, `custom`. |\n| `statusLine.separator` | enum | `powerline-thin` | `powerline`, `powerline-thin`, `slash`, `pipe`, `block`, `none`, `ascii`. |\n| `statusLine.sessionAccent` | boolean | `true` | Tint the editor border with the session color. |\n| `statusLine.transparent` | boolean | `false` | Use the terminal background for the status line. |\n| `statusLine.showHookStatus` | boolean | `true` | Show hook status messages. |\n| `terminal.showImages` | boolean | `true` | Render images inline (when the terminal supports it). |\n| `images.autoResize` | boolean | `true` | Resize large images for model compatibility. |\n| `images.blockImages` | boolean | `false` | Never send images to providers. |\n| `tui.hyperlinks` | enum | `auto` | `off`, `auto`, `always`. |\n\nFor a custom status line, set `statusLine.preset: custom` and configure `statusLine.leftSegments`, `statusLine.rightSegments`, and `statusLine.segmentOptions`.\n\n### Interaction\n\n| Key | Type | Default | Values |\n|---|---|---|---|\n| `steeringMode` | enum | `one-at-a-time` | `all`, `one-at-a-time`. How queued steering messages are delivered. |\n| `followUpMode` | enum | `one-at-a-time` | `all`, `one-at-a-time`. |\n| `interruptMode` | enum | `immediate` | `immediate`, `wait`. |\n| `doubleEscapeAction` | enum | `tree` | `branch`, `tree`, `none`. |\n| `autoResume` | boolean | `false` | Auto-resume the most recent session in the cwd. |\n| `ask.timeout` | number | `0` | Seconds before an `ask` prompt times out; `0` = no timeout. (Legacy ms values are migrated to seconds.) |\n| `ask.notify` | enum | `on` | `on`, `off`. |\n\n### Providers and services\n\n```yaml\nproviders:\n webSearch: auto\n image: auto\n fetch: auto\n tinyModel: online\n tinyModelDevice: default\n tinyModelDtype: default\n openaiWebsockets: auto\n openrouterVariant: default\n kimiApiFormat: anthropic\n\nprovider:\n appendOnlyContext: auto # auto, on, off\n\nexa:\n enabled: true\n enableSearch: true\n enableResearcher: false\n enableWebsets: false\n\nsearxng:\n endpoint: https://search.example.com\n token: SEARXNG_TOKEN\n```\n\n| Key | Type | Default | Values / notes |\n|---|---|---|---|\n| `providers.webSearch` | enum | `auto` | `auto` plus the configured search providers (`tavily`, `perplexity`, `brave`, `jina`, `kimi`, `anthropic`, `gemini`, `codex`, `zai`, `exa`, `parallel`, `kagi`, `synthetic`, `searxng`). |\n| `providers.image` | enum | `auto` | `auto`, `openai`, `antigravity`, `xai`, `gemini`, `openrouter`. |\n| `providers.fetch` | enum | `auto` | `auto`, `native`, `trafilatura`, `lynx`, `parallel`, `jina`. |\n| `providers.tinyModel` | enum | `online` | `online` or a local model (`lfm2-350m`, `qwen3-0.6b`, `gemma-270m`, `qwen2.5-0.5b`, `lfm2-700m`). |\n| `providers.tinyModelDevice` | enum | `default` | ONNX execution provider for local tiny models. Overridden by `PI_TINY_DEVICE`. |\n| `providers.tinyModelDtype` | enum | `default` | ONNX precision for local tiny models. Overridden by `PI_TINY_DTYPE`. |\n| `providers.openaiWebsockets` | enum | `auto` | `auto`, `off`, `on`. |\n| `providers.openrouterVariant` | enum | `default` | `default`, `nitro`, `floor`, `online`, `exacto`. |\n| `providers.kimiApiFormat` | enum | `anthropic` | `openai`, `anthropic`. |\n| `provider.appendOnlyContext` | enum | `auto` | `auto`, `on`, `off`. |\n| `exa.enabled` | boolean | `true` | Enable Exa integration. |\n| `exa.enableSearch` | boolean | `true` | Exa search. |\n| `exa.enableResearcher` | boolean | `false` | Exa researcher. |\n| `exa.enableWebsets` | boolean | `false` | Exa websets. |\n| `searxng.endpoint` | string | _(unset)_ | SearXNG instance URL. |\n| `searxng.token` | string | _(unset)_ | SearXNG token; also `searxng.basicUsername`/`searxng.basicPassword`/`searxng.categories`/`searxng.language`. |\n| `auth.broker.url` | string | _(unset)_ | Auth-broker URL. Overridden by `OMP_AUTH_BROKER_URL`. |\n| `auth.broker.token` | string | _(unset)_ | Auth-broker token. Overridden by `OMP_AUTH_BROKER_TOKEN`. |\n\nProvider credentials and custom model definitions are configured separately — see [Providers](./providers.md) and [Models](./models.md).\n\n### Other groups\n\n`omp config list` exposes many more grouped settings, including: `task.*` (subagent concurrency, isolation, model overrides), `skills.*` and `commands.*` (discovery toggles), `mcp.*`, `github.*`, `async.*`, `goal.*`, `loop.*`, `todo.*`, `magicKeywords.*`, `ttsr.*` (time-traveling stream rules), `display.*`, `startup.*`, `share.*`, `collab.*`, `stt.*`/`tts.*`, `memories.*`/`hindsight.*`/`mnemopi.*` (memory backends), and `bashInterceptor.*`. Each follows the same type/default rules shown above.\n\n## Legacy migration\n\n`omp` migrates older config shapes automatically. None of these require action; they are listed so you know what changes you may see in `config.yml`.\n\n### Startup migration to `config.yml`\n\nWhen `~/.omp/agent/config.yml` does not exist, startup builds it once from legacy sources, then writes the result:\n\n1. `~/.omp/agent/settings.json` (renamed to `settings.json.bak` after a successful migration).\n2. Settings persisted in `agent.db`.\n\nAfter `config.yml` exists, these legacy sources are no longer consulted. The generic config loader also performs `.json` -> `.yml` migration for other config files when only the `.json` form is present.\n\n### Field-level migrations\n\nApplied whenever raw settings are loaded (global, project, overlays, and runtime overrides):\n\n| Old | New |\n|---|---|\n| `queueMode` | `steeringMode` |\n| `ask.timeout` in milliseconds (value `> 1000`) | seconds (divided by 1000) |\n| flat `theme: \"<name>\"` string | `theme.dark` / `theme.light` (slot chosen by luminance; built-in `light`/`dark` are dropped to use defaults) |\n| `task.isolation.enabled: true/false` | `task.isolation.mode: auto/none` |\n| `task.simple` | removed |\n| legacy `task.isolation.mode` (`worktree`, `fuse-overlay`, `fuse-projfs`) | `rcopy`, `overlayfs`, `projfs` |\n| `lastChangelogVersion` | moved to a marker file and stripped from `config.yml` |\n\n## Troubleshooting\n\n### A project setting is not taking effect\n\n- Start `omp` from the directory that contains `.omp/config.yml`. Settings discovery only checks the current working directory's `.omp/`, not ancestor directories.\n- Ensure `.omp/` is non-empty; empty config directories are ignored.\n- Confirm the file is valid YAML and its top level is a mapping.\n- Run `omp config get <key>` from that directory to see the effective value.\n- Remember that `--config` overlays and runtime flags override project config.\n\n### A global array disappeared in a project\n\nArrays replace; they do not append. If a project sets `disabledProviders`, `enabledModels`, `cycleOrder`, `extensions`, or any other array, include the **complete** desired value in the project layer — the global array is fully replaced.\n\n### A provider is still available after editing config\n\n- Check whether you disabled the model provider id (e.g. `anthropic`) or a discovery source id (e.g. `claude`) — they are different namespaces with different effects.\n- Check for a project (or overlay) `disabledProviders` array replacing your global one.\n- Credentials can still come from environment variables, `.env`, OAuth, stored auth, or `models.yml`; disabling a provider blocks selection regardless, but verify you edited the right layer. See [Providers](./providers.md).\n- Restart the session if the model list was already initialized.\n\n### `omp config set` changed the wrong file\n\n`omp config set` and `omp config reset` always write the global `config.yml` under the active agent directory. Run `omp config path` to print it. For project-local settings, edit `<repo>/.omp/config.yml` directly.\n\n### `omp config reset` did not remove my key\n\n`reset` writes the schema **default** value into the global config — it persists the default rather than deleting the key. To stop overriding a project value from global config, delete the key from `~/.omp/agent/config.yml` by hand.\n\n### A `--config` overlay fails at startup\n\n`--config` files are process-local YAML mappings. A missing file, invalid YAML, or a top-level array/scalar is a hard error — it does not silently fall back to lower-precedence settings. Fix the path or contents.\n\n### An environment variable beats my config\n\nSome settings (model roles, eval backends, tiny-model device/precision, auth broker, PTY) are overridable by env vars or CLI flags for per-machine convenience, and those take precedence over `config.yml`. Unset the variable or drop the flag to let the persisted value win. See [Environment overrides](#environment-overrides) and [Environment variables](./environment-variables.md).\n\n### `omp config set <key>` says \"Unknown setting\"\n\nKeys must match a schema path exactly, with no shorthand. Use `theme.dark`, not `theme`. Run `omp config list` to see every valid key.\n",
66
+ "settings.md": "# Settings\n\n`omp` resolves settings from built-in defaults, a persistent global config file, optional project-local config, one-shot CLI overlays, and in-memory runtime overrides. Reach for project settings when one repository needs a different provider set, model role, tool policy, memory backend, or UI behavior than your global defaults — without touching your machine-wide configuration.\n\nSettings are stored as plain YAML mappings. Every key, its type, default, and enum values come from the settings schema, and you can inspect or change any of them with `omp config` or the interactive `/settings` panel.\n\n- For model/provider credentials, `.env` files, and the env-var table that resolves API keys, see [Providers](./providers.md).\n- For custom model definitions in `models.yml`, see [Models](./models.md).\n- For instruction files discovered into the agent context (`AGENTS.md`, `.omp/`, etc.), see [Context files](./context-files.md).\n- For the full catalog of environment variables, see [Environment variables](./environment-variables.md).\n\n## Where settings live\n\n| Scope | Path | Read behavior | Write behavior |\n|---|---|---|---|\n| Global | `~/.omp/agent/config.yml` | The main persistent settings file. Always loaded. | `/settings`, `omp config set`, and `omp config reset` write here. |\n| Global legacy | `~/.omp/agent/settings.json` | Migrated into `config.yml` once, only when `config.yml` does not yet exist. | Not written after migration; the original is renamed to `settings.json.bak`. |\n| Project | `<cwd>/.omp/config.yml` (plus `.omp/settings.json`) | Loaded when the process working directory has a non-empty `.omp/`. | Read-only from settings commands; edit the file by hand. |\n| Project legacy | `<cwd>/.omp/settings.json` | Still read; project `config.yml` is merged on top of it. | Not written by settings commands. |\n| CLI overlay | Any file passed with `--config <file>` | Loaded after global and project settings, for that one process. Repeatable. | Never persisted. |\n| Runtime overrides | In-memory only | Set by dedicated CLI flags (`--model`, `--approval-mode`, …) and feature env vars. | Never persisted. |\n\n`PI_CODING_AGENT_DIR` relocates the `~/.omp/agent` base directory. When it is set, the global `config.yml`, the auth store (`agent.db`), and everything else under the agent directory move with it. Use `omp config path` to print the active agent directory.\n\nNative project settings are intentionally scoped to the process working directory's `.omp/` folder — settings discovery does **not** walk ancestor directories looking for the nearest `.omp/`. Other discovery providers (Claude, Codex, Gemini, Cursor, OpenCode) can also contribute project-level settings from their own files; those are read-only from `omp` settings commands and can be turned off by provider id (see [Provider and source disabling](#provider-and-source-disabling)).\n\n## Config file formats\n\nThe global `config.yml` is always YAML. The generic config loader used for other files (for example `models.yml`) accepts `.yml`, `.yaml`, `.json`, and `.jsonc`:\n\n- When a `.yml`/`.yaml` path is requested and only a sibling `.json` exists, it is migrated to YAML automatically (idempotent, once per process).\n- `.json` and `.jsonc` configs are read as-is, with no migration.\n- A file whose top level is not a mapping (a bare array or scalar) is treated as empty for persistent settings, and is a hard error for `--config` overlays.\n\n## Reading and writing settings\n\nUse the interactive `/settings` panel inside a session, or the `omp config` command from a shell. Both operate on the merged effective settings, but every persistent write lands in the **global** file only.\n\n```bash\nomp config list # all settings with current effective values\nomp config list --json # same, machine-readable\nomp config get theme.dark # one value\nomp config get theme.dark --json\nomp config set compaction.enabled false\nomp config set defaultThinkingLevel medium\nomp config reset steeringMode # restore a key to its schema default\nomp config path # print the active agent directory\n```\n\nFor users who want the full first-run animation on normal launches, set `startup.showSplash`:\n\n```bash\nomp config set startup.showSplash true\n```\n\nThis only controls the startup splash animation. It does not rerun setup or change setup state, and `startup.quiet: true` still suppresses all startup chrome including the splash.\n\n### Subcommands\n\n| Command | Effect |\n|---|---|\n| `omp config list` | Print every setting grouped by tab, with its current value and type. `--json` emits an object keyed by setting path with `{ value, type, description }`. |\n| `omp config get <key>` | Print the effective value of one key. Unknown keys exit non-zero. `--json` emits `{ key, value, type, description }`. |\n| `omp config set <key> <value>` | Parse `<value>` against the key's schema type and write it to the global `config.yml`. |\n| `omp config reset <key>` | Write the key's schema **default** back to the global config (this persists the default, it does not delete the key). |\n| `omp config path` | Print the active agent directory (honors `PI_CODING_AGENT_DIR`). |\n\n`omp config` with no subcommand, or `--help`, prints the help and lists settings. The `--json` flag is accepted by `list`, `get`, `set`, and `reset`.\n\n### Value parsing\n\n`omp config set` parses the value string according to the target key's schema type. The string is trimmed first.\n\n| Type | Accepted input | Notes |\n|---|---|---|\n| boolean | `true`, `false`, `yes`, `no`, `on`, `off`, `1`, `0` | Case-insensitive. Anything else is rejected. |\n| number | Any finite JavaScript number | `Infinity`/`NaN` are rejected. |\n| enum | One of the key's allowed values | Must match exactly; the error lists the valid values. |\n| array | A JSON array | e.g. `'[\"anthropic\",\"openai\"]'`. Must parse and be an array. |\n| record | A JSON object | e.g. `'{\"bash\":\"prompt\"}'`. Must parse and be a non-array object. |\n| string | Stored as given (trimmed) | Multi-word values are joined with spaces. |\n\nKeys must match a real schema path exactly. There is no shorthand — set `theme.dark`, not `theme`.\n\n### Where writes go\n\n`omp config set`, `omp config reset`, `/settings`, and any runtime settings change all write to the global `config.yml` under the active agent directory. They never write to `<cwd>/.omp/config.yml`. To create a project-local override, edit that file directly (see [Project-local config](#project-local-config)). Saves are debounced and re-read the file under a lock, so external edits made while a session is open are preserved.\n\n## Precedence\n\nFrom lowest to highest priority, the effective value of a setting is built as:\n\n```text\nbuilt-in defaults <- global config <- project config <- CLI overlays <- runtime overrides\n```\n\nFrom highest to lowest:\n\n1. **Runtime overrides** — dedicated CLI flags and feature env vars applied in memory for the current process: `--model`, `--smol`, `--slow`, `--plan`, `--approval-mode`, `--auto-approve`/`--yolo`, `--hide-thinking`, `--advisor`, `--no-pty`, `--api-key`, and protocol-mode defaults. Never persisted.\n2. **CLI config overlays** — each `--config <file>`; later overlay files override earlier ones.\n3. **Project settings** — `<cwd>/.omp/settings.json` then `<cwd>/.omp/config.yml` (and contributions from other discovery providers at project level).\n4. **Global settings** — `~/.omp/agent/config.yml`.\n5. **Built-in defaults** — from the settings schema.\n\nA key that is unset at every layer resolves to its schema default at read time.\n\n### Environment overrides\n\nEnvironment variables are **not** a single settings layer. Each is read by the feature that owns the value, usually as a per-machine override or fallback, and is never written back to `config.yml`. The ones that map directly onto a setting:\n\n| Env var | Overrides setting | Notes |\n|---|---|---|\n| `PI_SMOL_MODEL` | `modelRoles.smol` | Also exposed as `--smol`. |\n| `PI_SLOW_MODEL` | `modelRoles.slow` | Also exposed as `--slow`. |\n| `PI_PLAN_MODEL` | `modelRoles.plan` | Also exposed as `--plan`. |\n| `PI_NO_PTY=1` | (disables PTY bash) | Equivalent to `--no-pty` for the process. |\n| `PI_PY` | `eval.py` | `PI_PY=0` disables the Python eval backend. |\n| `PI_JS` | `eval.js` | `PI_JS=0` disables the JavaScript eval backend. |\n| `PI_TINY_DEVICE` | `providers.tinyModelDevice` | ONNX execution provider for local tiny models. |\n| `PI_TINY_DTYPE` | `providers.tinyModelDtype` | ONNX precision for local tiny models. |\n| `OMP_AUTH_BROKER_URL` | `auth.broker.url` | Env value takes precedence over config. |\n| `OMP_AUTH_BROKER_TOKEN` | `auth.broker.token` | Env value takes precedence over config. |\n| `PI_CODING_AGENT_DIR` | (relocates agent dir) | Moves `config.yml`, `agent.db`, and the whole agent base. |\n\nProvider API keys are resolved separately (stored auth, OAuth, `models.yml`, environment, and `.env` files); see [Providers](./providers.md) and the full [Environment variables](./environment-variables.md) reference.\n\n## Merge rules\n\nLayers are combined with a deep merge:\n\n- **Objects are deep-merged** — keys present only in a lower layer are kept; keys present in a higher layer override.\n- **Scalars and arrays are replaced wholesale** by the higher-precedence layer. A higher layer's array does not append to a lower layer's array.\n\nUse nested YAML mappings for dotted setting paths:\n\n```yaml\ntheme:\n dark: titanium\n light: light\n\ntools:\n approvalMode: write\n approval:\n bash: prompt\n read: allow\n```\n\n### Worked example: global vs. project\n\n```yaml\n# ~/.omp/agent/config.yml\ntools:\n approvalMode: write\n approval:\n bash: prompt\n read: allow\ndisabledProviders:\n - anthropic\n - openai\n - gemini\n\n# <repo>/.omp/config.yml\ntools:\n approval:\n bash: allow\ndisabledProviders:\n - groq\n```\n\nEffective settings inside `<repo>`:\n\n```yaml\ntools:\n approvalMode: write # kept from global (object deep-merge)\n approval:\n bash: allow # overridden by project\n read: allow # kept from global\ndisabledProviders:\n - groq # project array REPLACES the global array\n```\n\nArray replacement is the most common surprise: the project's `disabledProviders` does not extend the global list — it becomes the entire list for that project. The same applies to `enabledModels`, `cycleOrder`, `extensions`, and every other array-typed setting.\n\n## Project-local config\n\nCreate `<repo>/.omp/config.yml` when a repository needs its own settings:\n\n```yaml\n# <repo>/.omp/config.yml\nmodelRoles:\n default: anthropic/claude-sonnet-4-5\n smol: openai/gpt-4.1-mini\n slow: anthropic/claude-opus-4-5:high\n\ntools:\n approvalMode: write\n approval:\n bash: prompt\n\ncompaction:\n strategy: context-full\n thresholdPercent: 80\n\ntheme:\n dark: titanium\n```\n\nKeep secrets out of committed project config unless your repository policy allows it. Prefer environment variables, stored auth, an auth broker, or an untracked `--config` overlay for credentials.\n\n### One-shot overlays\n\nUse `--config` for a temporary layer that should not persist:\n\n```bash\nomp --config ./local/ci-settings.yml \"check this failure\"\nomp --config ./base.yml --config ./experiment.yml \"try this model\"\n```\n\nOverlay paths are resolved relative to the process working directory (and `~` is expanded). Each overlay must parse as a YAML mapping; a missing file, invalid YAML, or a top-level array/scalar is a hard error — it does **not** silently fall back to lower-precedence settings.\n\n## Path-scoped arrays\n\nTwo array settings — `enabledModels` and `disabledProviders` — accept path-scoped entries in addition to bare strings, so a single global config can behave differently per directory:\n\n```yaml\nenabledModels:\n - claude-sonnet-4-5 # applies everywhere\n - path: ~/work/high-context\n models:\n - anthropic/claude-opus-4-5\n\ndisabledProviders:\n - ollama # applies everywhere\n - paths:\n - ~/projects/sensitive\n - ~/clients/acme\n providers:\n - anthropic\n - openai\n```\n\nBare string entries apply everywhere. A scoped entry applies when the current working directory **is** the configured path or is **under** it. `~` expands to your home directory and relative paths are resolved before matching.\n\nAccepted **path** keys (any of them, combined): `path`, `paths`, `pathPrefix`, `pathPrefixes`.\n\nAccepted **value** keys:\n\n- `models` (for `enabledModels`) or `providers` (for `disabledProviders`)\n- `values` or `items` (for either setting)\n\nOnly string values are kept; malformed scoped entries are ignored. Path scoping is resolved **after** the layer merge, so it reads the final effective array.\n\n## Provider and source disabling\n\n`disabledProviders` is a single shared id namespace that gates two different subsystems, before any credential check:\n\n| Entry kind | Example ids | Effect |\n|---|---|---|\n| Model providers | `anthropic`, `openai`, `gemini`, `groq`, `ollama`, `openrouter` | Removes those backends from model selection, even when credentials are available. See [Providers](./providers.md). |\n| Discovery sources | `native`, `claude`, `codex`, `gemini`, `github`, `opencode`, `cursor`, `agents-md` | Stops that source from contributing context files, MCP servers, commands, skills, hooks, tools, prompts, or settings. See [Context files](./context-files.md). |\n\nMost provider-control use cases list model provider ids. Disabling the `claude` discovery source is different from disabling the `anthropic` model provider — one stops Claude-format config discovery, the other stops the Anthropic model backend.\n\nBecause arrays replace rather than append, a project that sets `disabledProviders` must list the complete desired set:\n\n```yaml\n# ~/.omp/agent/config.yml\ndisabledProviders:\n - anthropic\n - openai\n\n# <repo>/.omp/config.yml — inside this repo ONLY groq is disabled\ndisabledProviders:\n - groq\n```\n\nThe default is an empty array (nothing disabled). For the two subsystems' provider ids and ordering, see [Providers](./providers.md) and [Context files](./context-files.md).\n\n## Settings catalog\n\nEvery key below is defined in the settings schema; `omp config list` shows the full set with current values. Defaults and enum values are taken from the schema. Settings that accept an env or flag override are noted; those overrides are process-local and not persisted.\n\n### Models\n\n`modelRoles`, `modelTags`, and `cycleOrder` work together to define the models you can switch between. Role values may carry a thinking suffix (`:minimal`, `:low`, `:medium`, `:high`, `:xhigh`).\n\n```yaml\nmodelRoles:\n default: anthropic/claude-sonnet-4-5\n smol: openai/gpt-4.1-mini\n slow: anthropic/claude-opus-4-5:high\n vision: gemini/gemini-3-pro-preview\n plan: anthropic/claude-opus-4-5\n advisor: anthropic/claude-sonnet-4-5:medium\n\ncycleOrder:\n - smol\n - default\n - slow\n\nmodelProviderOrder:\n - anthropic\n - openai\n\nenabledModels:\n - claude-sonnet-4-5\n```\n\n| Key | Type | Default | Notes |\n|---|---|---|---|\n| `modelRoles` | record | `{}` | Map of role name -> model id. Built-in roles: `default`, `smol`, `slow`, `vision`, `plan`, `designer`, `commit`, `title`, `task`, `advisor`. Per-role env/flags exist only for `--model`/`--smol`/`--slow`/`--plan`; configure the advisor with `modelRoles.advisor`. |\n| `modelTags` | record | `{}` | Custom role/tag metadata; can introduce additional roles. |\n| `modelProviderOrder` | array | `[]` | Preferred provider order when a model id is ambiguous. |\n| `cycleOrder` | array | `[\"smol\",\"default\",\"slow\"]` | Roles cycled by the model switcher. |\n| `enabledModels` | array | `[]` | Allow-list of models; supports [path-scoped entries](#path-scoped-arrays). Empty means all available models. |\n| `disabledProviders` | array | `[]` | Disabled model/discovery providers; supports path-scoped entries. See [above](#provider-and-source-disabling). |\n| `includeModelInPrompt` | boolean | `true` | Include the active model name in the system prompt. |\n\nSee [Models](./models.md) for the `models.yml` schema and custom-provider definitions.\n\n### Advisor\n\nThe advisor is a second model that reviews each completed turn and can inject advice into the primary session. Assign a model with `modelRoles.advisor`, then enable it with `advisor.enabled`, `/advisor on`, or by launching with the `--advisor` flag.\n\nSee [Advisor and WATCHDOG.md](./advisor-watchdog.md) for runtime behavior, `WATCHDOG.md` discovery, and bounded catch-up semantics.\n\n| Key | Type | Default | Notes |\n|---|---|---|---|\n| `advisor.enabled` | boolean | `false` | Enable the advisor runtime when `modelRoles.advisor` resolves to an available model. |\n| `advisor.subagents` | boolean | `false` | Also enable advisor runtimes for spawned task/eval subagents. |\n| `advisor.syncBacklog` | enum | `off` | Bounded advisor catch-up delay: `off`, `1`, `3`, or `5`. The primary waits up to 30 seconds only while advisor backlog is at or above the threshold. |\n| `advisor.immuneTurns` | number | `1` | After a `concern`/`blocker` interrupts, route further concerns/blockers as non-interrupting asides for this many completed primary turns. |\n\n### Thinking\n\n```yaml\ndefaultThinkingLevel: high\nhideThinkingBlock: false\nthinkingBudgets:\n minimal: 1024\n low: 2048\n medium: 8192\n high: 16384\n xhigh: 32768\n```\n\n| Key | Type | Default | Values |\n|---|---|---|---|\n| `defaultThinkingLevel` | enum | `high` | `minimal`, `low`, `medium`, `high`, `xhigh`, `auto`. Override per run with `--thinking`. |\n| `hideThinkingBlock` | boolean | `false` | Hide thinking blocks in output. `--hide-thinking` sets it for the run (display only). |\n| `thinkingBudgets.minimal` | number | `1024` | Token budget for the `minimal` level. |\n| `thinkingBudgets.low` | number | `2048` | Token budget for `low`. |\n| `thinkingBudgets.medium` | number | `8192` | Token budget for `medium`. |\n| `thinkingBudgets.high` | number | `16384` | Token budget for `high`. |\n| `thinkingBudgets.xhigh` | number | `32768` | Token budget for `xhigh`. |\n\n### Sampling\n\nA value of `-1` means \"use the provider/model default\" — `omp` does not send that parameter.\n\n| Key | Type | Default | Notes |\n|---|---|---|---|\n| `temperature` | number | `-1` | Sampling temperature. |\n| `topP` | number | `-1` | Nucleus sampling. |\n| `topK` | number | `-1` | Top-K sampling. |\n| `minP` | number | `-1` | Minimum-probability cutoff. |\n| `presencePenalty` | number | `-1` | Presence penalty. |\n| `repetitionPenalty` | number | `-1` | Repetition penalty. |\n| `serviceTier` | enum | `none` | `none`, `auto`, `default`, `flex`, `scale`, `priority`, `openai-only`, `claude-only`. |\n| `personality` | enum | `default` | `default`, `friendly`, `pragmatic`, `none`. |\n\n### Retry and fallback\n\n```yaml\nretry:\n enabled: true\n maxRetries: 10\n baseDelayMs: 500\n maxDelayMs: 300000\n modelFallback: true\n fallbackRevertPolicy: cooldown-expiry\n```\n\n| Key | Type | Default | Notes |\n|---|---|---|---|\n| `retry.enabled` | boolean | `true` | Retry transient provider errors. |\n| `retry.maxRetries` | number | `10` | Max retries per request. |\n| `retry.baseDelayMs` | number | `500` | Initial backoff. |\n| `retry.maxDelayMs` | number | `300000` | Backoff ceiling (5 min). |\n| `retry.modelFallback` | boolean | `true` | Fall back to another model when one is unavailable. |\n| `retry.fallbackChains` | record | `{}` | Per-model fallback chains. |\n| `retry.fallbackRevertPolicy` | enum | `cooldown-expiry` | `cooldown-expiry`, `never`. |\n\n### Tools and approvals\n\n```yaml\ntools:\n approvalMode: yolo # default\n approval:\n bash: prompt\n edit: allow\n discoveryMode: auto\n maxTimeout: 0\n intentTracing: true\n```\n\n| Key | Type | Default | Notes |\n|---|---|---|---|\n| `tools.approvalMode` | enum | `yolo` | `always-ask` (auto-approve read-only), `write` (auto-approve read + workspace-write), `yolo` (auto-approve all tiers). `--approval-mode` and `--auto-approve`/`--yolo` override per run. |\n| `tools.approval` | record | `{}` | Per-tool policy keyed by tool name; each value is `allow`, `deny`, or `prompt`. e.g. `omp config set tools.approval '{\"bash\":\"prompt\"}'`. |\n| `tools.discoveryMode` | enum | `auto` | `auto`, `off`, `mcp-only`, `all`. Controls dynamic tool discovery. |\n| `tools.essentialOverride` | array | `[]` | Tool names kept available even when tools are narrowed. |\n| `tools.maxTimeout` | number | `0` | Max tool runtime in seconds; `0` = no cap. |\n| `tools.intentTracing` | boolean | `true` | Record per-call intent strings. |\n| `tools.outputMaxColumns` | number | `768` | Per-line byte cap for streaming output; `0` disables. |\n| `tools.artifactSpillThreshold` | number | `50` | KB of tool output above which output spills to an artifact. |\n| `tools.artifactHeadBytes` | number | `20` | KB of head kept inline on spill; `0` = tail-only. |\n| `tools.artifactTailBytes` | number | `20` | KB of tail kept inline on spill. |\n| `tools.artifactTailLines` | number | `500` | Max tail lines kept inline on spill. |\n\nIndividual built-in tools are toggled by their own keys, e.g. `bash.enabled`, `eval.py`, `eval.js`, `find.enabled`, `search.enabled`, `fetch.enabled`, `browser.enabled`, `astEdit.enabled`, `astGrep.enabled`, `web_search.enabled`, `inspect_image.enabled`.\n\n### Shell, eval, and LSP\n\n```yaml\nbash:\n enabled: true\n stripTrailingHeadTail: true\n autoBackground:\n enabled: false\n thresholdMs: 60000\n\neval:\n py: true\n js: true\n\npython:\n kernelMode: session # session, per-call\n interpreter: \"\"\n\nlsp:\n enabled: true\n lazy: true\n diagnosticsOnWrite: true\n diagnosticsOnEdit: false\n formatOnWrite: false\n```\n\n| Key | Type | Default | Notes |\n|---|---|---|---|\n| `bash.enabled` | boolean | `true` | Enable the bash tool. |\n| `bash.stripTrailingHeadTail` | boolean | `true` | Strip trailing head/tail noise from output. |\n| `bash.autoBackground.enabled` | boolean | `false` | Auto-background long-running commands. |\n| `bash.autoBackground.thresholdMs` | number | `60000` | Threshold before auto-backgrounding. |\n| `eval.py` | boolean | `true` | Python eval backend. `PI_PY=0` disables for the process. |\n| `eval.js` | boolean | `true` | JavaScript eval backend. `PI_JS=0` disables for the process. |\n| `python.kernelMode` | enum | `session` | `session` (persistent kernel) or `per-call`. |\n| `python.interpreter` | string | `\"\"` | Path to a Python interpreter; empty = auto-detect. |\n| `lsp.enabled` | boolean | `true` | Language-server integration. `--no-lsp` disables for the run. |\n| `lsp.lazy` | boolean | `true` | Start servers on demand. |\n| `lsp.diagnosticsOnWrite` | boolean | `true` | Run diagnostics after a write. |\n| `lsp.diagnosticsOnEdit` | boolean | `false` | Run diagnostics after an edit. |\n| `lsp.formatOnWrite` | boolean | `false` | Format files on write. |\n| `lsp.diagnosticsDeduplicate` | boolean | `true` | Collapse duplicate diagnostics. |\n| `shellPath` | string | _(unset)_ | Override the shell binary used by bash. |\n\n### Files: editing and reading\n\n```yaml\nedit:\n mode: hashline # apply_patch, hashline, patch, replace\n fuzzyMatch: true\n fuzzyThreshold: 0.95\n blockAutoGenerated: true\n\nread:\n defaultLimit: 300\n toolResultPreview: false\n summarize:\n enabled: true\n prose: false\n```\n\n| Key | Type | Default | Notes |\n|---|---|---|---|\n| `edit.mode` | enum | `hashline` | `apply_patch`, `hashline`, `patch`, `replace`. |\n| `edit.fuzzyMatch` | boolean | `true` | Allow fuzzy anchor matching. |\n| `edit.fuzzyThreshold` | number | `0.95` | Similarity threshold for fuzzy matching. |\n| `edit.blockAutoGenerated` | boolean | `true` | Refuse to edit generated/lockfile-like files. |\n| `edit.streamingAbort` | boolean | `false` | Abort on streaming edit mismatch. |\n| `read.defaultLimit` | number | `300` | Default line count for `read` without a selector. |\n| `read.summarize.enabled` | boolean | `true` | Structural summaries for code reads. |\n| `read.summarize.prose` | boolean | `false` | Summarize prose files too. |\n| `read.toolResultPreview` | boolean | `false` | Inline preview of tool results. |\n| `readHashLines` | boolean | `true` | Show hashline tags in read output. |\n| `readLineNumbers` | boolean | `false` | Show plain line numbers. |\n\n### Context, compaction, and memory\n\n```yaml\ncontextPromotion:\n enabled: true\n\ncompaction:\n enabled: true\n strategy: context-full # context-full, handoff, shake, snapcompact, off\n thresholdPercent: -1 # -1 = default reserve-based behavior\n thresholdTokens: -1 # fixed token limit when > 0\n remoteEnabled: true\n\nmemory:\n backend: off # off, local, hindsight, mnemopi\n```\n\n| Key | Type | Default | Notes |\n|---|---|---|---|\n| `contextPromotion.enabled` | boolean | `true` | Promote relevant earlier context. |\n| `compaction.enabled` | boolean | `true` | Automatic conversation compaction. |\n| `compaction.strategy` | enum | `context-full` | `context-full`, `handoff`, `shake`, `snapcompact`, `off`. |\n| `compaction.thresholdPercent` | number | `-1` | Percent-of-context trigger; `-1` = reserve-based default. |\n| `compaction.thresholdTokens` | number | `-1` | Fixed token trigger when `> 0`. |\n| `compaction.reserveTokens` | number | `16384` | Tokens reserved for the next turn. |\n| `compaction.keepRecentTokens` | number | `20000` | Recent tokens always preserved. |\n| `compaction.remoteEnabled` | boolean | `true` | Allow remote compaction service. |\n| `compaction.autoContinue` | boolean | `true` | Continue automatically after compaction. |\n| `memory.backend` | enum | `off` | `off`, `local`, `hindsight`, `mnemopi`. Each backend has its own `hindsight.*` / `mnemopi.*` / `memories.*` tuning keys. |\n| `autolearn.enabled` | boolean | `false` | Experimental: after the agent stops, nudge it to capture lessons to memory and create/enhance isolated managed skills under `~/.omp/agent/managed-skills`. Enables the `manage_skill` tool (and `learn` when a memory backend is active). |\n| `autolearn.autoContinue` | boolean | `false` | When `autolearn.enabled`, auto-run one capture turn at stop (uses extra tokens). Off = a passive reminder rides your next turn. |\n| `autolearn.minToolCalls` | number | `5` | Only nudge after a turn that used at least this many tools. |\n\n`compaction` has additional tuning keys (idle compaction, supersede/drop heuristics) visible in `omp config list`. See [Compaction](./compaction.md) for the full strategy reference.\n\n### Appearance and terminal\n\n```yaml\ntheme:\n dark: titanium\n light: light\nsymbolPreset: unicode # unicode, nerd, ascii\ncolorBlindMode: false\n\nstatusLine:\n preset: default # default, minimal, compact, full, nerd, ascii, custom\n separator: powerline-thin\n transparent: false\n showHookStatus: true\n\nterminal:\n showImages: true\nimages:\n autoResize: true\n blockImages: false\ntui:\n hyperlinks: auto # off, auto, always\n```\n\n| Key | Type | Default | Values |\n|---|---|---|---|\n| `theme.dark` | string | `titanium` | Theme used on a dark terminal background. |\n| `theme.light` | string | `light` | Theme used on a light terminal background. |\n| `symbolPreset` | enum | `unicode` | `unicode`, `nerd`, `ascii`. |\n| `colorBlindMode` | boolean | `false` | Use blue instead of green for diff additions. |\n| `showHardwareCursor` | boolean | `true` | Show the terminal hardware cursor. |\n| `statusLine.preset` | enum | `default` | `default`, `minimal`, `compact`, `full`, `nerd`, `ascii`, `custom`. |\n| `statusLine.separator` | enum | `powerline-thin` | `powerline`, `powerline-thin`, `slash`, `pipe`, `block`, `none`, `ascii`. |\n| `statusLine.sessionAccent` | boolean | `true` | Tint the editor border with the session color. |\n| `statusLine.transparent` | boolean | `false` | Use the terminal background for the status line. |\n| `statusLine.showHookStatus` | boolean | `true` | Show hook status messages. |\n| `terminal.showImages` | boolean | `true` | Render images inline (when the terminal supports it). |\n| `images.autoResize` | boolean | `true` | Resize large images for model compatibility. |\n| `images.blockImages` | boolean | `false` | Never send images to providers. |\n| `tui.hyperlinks` | enum | `auto` | `off`, `auto`, `always`. |\n\nFor a custom status line, set `statusLine.preset: custom` and configure `statusLine.leftSegments`, `statusLine.rightSegments`, and `statusLine.segmentOptions`.\n\n### Interaction\n\n| Key | Type | Default | Values |\n|---|---|---|---|\n| `steeringMode` | enum | `one-at-a-time` | `all`, `one-at-a-time`. How queued steering messages are delivered. |\n| `followUpMode` | enum | `one-at-a-time` | `all`, `one-at-a-time`. |\n| `interruptMode` | enum | `immediate` | `immediate`, `wait`. |\n| `doubleEscapeAction` | enum | `tree` | `branch`, `tree`, `none`. |\n| `autoResume` | boolean | `false` | Auto-resume the most recent session in the cwd. |\n| `ask.timeout` | number | `0` | Seconds before an `ask` prompt times out; `0` = no timeout. (Legacy ms values are migrated to seconds.) |\n| `ask.notify` | enum | `on` | `on`, `off`. |\n\n### Providers and services\n\n```yaml\nproviders:\n webSearch: auto\n image: auto\n fetch: auto\n tinyModel: online\n tinyModelDevice: default\n tinyModelDtype: default\n openaiWebsockets: auto\n openrouterVariant: default\n kimiApiFormat: anthropic\n\nprovider:\n appendOnlyContext: auto # auto, on, off\n\nexa:\n enabled: true\n enableSearch: true\n enableResearcher: false\n enableWebsets: false\n\nsearxng:\n endpoint: https://search.example.com\n token: SEARXNG_TOKEN\n```\n\n| Key | Type | Default | Values / notes |\n|---|---|---|---|\n| `providers.webSearch` | enum | `auto` | `auto` plus the configured search providers (`perplexity`, `gemini`, `anthropic`, `codex`, `zai`, `exa`, `jina`, `kagi`, `tavily`, `brave`, `kimi`, `parallel`, `synthetic`, `searxng`). |\n| `providers.image` | enum | `auto` | `auto`, `openai`, `antigravity`, `xai`, `gemini`, `openrouter`. |\n| `providers.fetch` | enum | `auto` | `auto`, `native`, `trafilatura`, `lynx`, `parallel`, `jina`. |\n| `providers.tinyModel` | enum | `online` | `online` or a local model (`lfm2-350m`, `qwen3-0.6b`, `gemma-270m`, `qwen2.5-0.5b`, `lfm2-700m`). |\n| `providers.tinyModelDevice` | enum | `default` | ONNX execution provider for local tiny models. Overridden by `PI_TINY_DEVICE`. |\n| `providers.tinyModelDtype` | enum | `default` | ONNX precision for local tiny models. Overridden by `PI_TINY_DTYPE`. |\n| `providers.openaiWebsockets` | enum | `auto` | `auto`, `off`, `on`. |\n| `providers.openrouterVariant` | enum | `default` | `default`, `nitro`, `floor`, `online`, `exacto`. |\n| `providers.kimiApiFormat` | enum | `anthropic` | `openai`, `anthropic`. |\n| `provider.appendOnlyContext` | enum | `auto` | `auto`, `on`, `off`. |\n| `exa.enabled` | boolean | `true` | Enable Exa integration. |\n| `exa.enableSearch` | boolean | `true` | Exa search. |\n| `exa.enableResearcher` | boolean | `false` | Exa researcher. |\n| `exa.enableWebsets` | boolean | `false` | Exa websets. |\n| `searxng.endpoint` | string | _(unset)_ | SearXNG instance URL. |\n| `searxng.token` | string | _(unset)_ | SearXNG token; also `searxng.basicUsername`/`searxng.basicPassword`/`searxng.categories`/`searxng.language`. |\n| `auth.broker.url` | string | _(unset)_ | Auth-broker URL. Overridden by `OMP_AUTH_BROKER_URL`. |\n| `auth.broker.token` | string | _(unset)_ | Auth-broker token. Overridden by `OMP_AUTH_BROKER_TOKEN`. |\n\nProvider credentials and custom model definitions are configured separately — see [Providers](./providers.md) and [Models](./models.md).\n\n### Other groups\n\n`omp config list` exposes many more grouped settings, including: `task.*` (subagent concurrency, isolation, model overrides), `skills.*` and `commands.*` (discovery toggles), `mcp.*`, `github.*`, `async.*`, `goal.*`, `loop.*`, `todo.*`, `magicKeywords.*`, `ttsr.*` (time-traveling stream rules), `display.*`, `startup.*`, `share.*`, `collab.*`, `stt.*`/`tts.*`, `memories.*`/`hindsight.*`/`mnemopi.*` (memory backends), and `bashInterceptor.*`. Each follows the same type/default rules shown above.\n\n## Legacy migration\n\n`omp` migrates older config shapes automatically. None of these require action; they are listed so you know what changes you may see in `config.yml`.\n\n### Startup migration to `config.yml`\n\nWhen `~/.omp/agent/config.yml` does not exist, startup builds it once from legacy sources, then writes the result:\n\n1. `~/.omp/agent/settings.json` (renamed to `settings.json.bak` after a successful migration).\n2. Settings persisted in `agent.db`.\n\nAfter `config.yml` exists, these legacy sources are no longer consulted. The generic config loader also performs `.json` -> `.yml` migration for other config files when only the `.json` form is present.\n\n### Field-level migrations\n\nApplied whenever raw settings are loaded (global, project, overlays, and runtime overrides):\n\n| Old | New |\n|---|---|\n| `queueMode` | `steeringMode` |\n| `ask.timeout` in milliseconds (value `> 1000`) | seconds (divided by 1000) |\n| flat `theme: \"<name>\"` string | `theme.dark` / `theme.light` (slot chosen by luminance; built-in `light`/`dark` are dropped to use defaults) |\n| `task.isolation.enabled: true/false` | `task.isolation.mode: auto/none` |\n| `task.simple` | removed |\n| legacy `task.isolation.mode` (`worktree`, `fuse-overlay`, `fuse-projfs`) | `rcopy`, `overlayfs`, `projfs` |\n| `lastChangelogVersion` | moved to a marker file and stripped from `config.yml` |\n\n## Troubleshooting\n\n### A project setting is not taking effect\n\n- Start `omp` from the directory that contains `.omp/config.yml`. Settings discovery only checks the current working directory's `.omp/`, not ancestor directories.\n- Ensure `.omp/` is non-empty; empty config directories are ignored.\n- Confirm the file is valid YAML and its top level is a mapping.\n- Run `omp config get <key>` from that directory to see the effective value.\n- Remember that `--config` overlays and runtime flags override project config.\n\n### A global array disappeared in a project\n\nArrays replace; they do not append. If a project sets `disabledProviders`, `enabledModels`, `cycleOrder`, `extensions`, or any other array, include the **complete** desired value in the project layer — the global array is fully replaced.\n\n### A provider is still available after editing config\n\n- Check whether you disabled the model provider id (e.g. `anthropic`) or a discovery source id (e.g. `claude`) — they are different namespaces with different effects.\n- Check for a project (or overlay) `disabledProviders` array replacing your global one.\n- Credentials can still come from environment variables, `.env`, OAuth, stored auth, or `models.yml`; disabling a provider blocks selection regardless, but verify you edited the right layer. See [Providers](./providers.md).\n- Restart the session if the model list was already initialized.\n\n### `omp config set` changed the wrong file\n\n`omp config set` and `omp config reset` always write the global `config.yml` under the active agent directory. Run `omp config path` to print it. For project-local settings, edit `<repo>/.omp/config.yml` directly.\n\n### `omp config reset` did not remove my key\n\n`reset` writes the schema **default** value into the global config — it persists the default rather than deleting the key. To stop overriding a project value from global config, delete the key from `~/.omp/agent/config.yml` by hand.\n\n### A `--config` overlay fails at startup\n\n`--config` files are process-local YAML mappings. A missing file, invalid YAML, or a top-level array/scalar is a hard error — it does not silently fall back to lower-precedence settings. Fix the path or contents.\n\n### An environment variable beats my config\n\nSome settings (model roles, eval backends, tiny-model device/precision, auth broker, PTY) are overridable by env vars or CLI flags for per-machine convenience, and those take precedence over `config.yml`. Unset the variable or drop the flag to let the persisted value win. See [Environment overrides](#environment-overrides) and [Environment variables](./environment-variables.md).\n\n### `omp config set <key>` says \"Unknown setting\"\n\nKeys must match a schema path exactly, with no shorthand. Use `theme.dark`, not `theme`. Run `omp config list` to see every valid key.\n",
65
67
  "skills.md": "# Skills\n\nSkills are file-backed capability packs discovered at startup and exposed to the model as:\n\n- lightweight metadata in the system prompt (name + description)\n- on-demand content via the `read` tool against `skill://...`\n- optional interactive `/skill:<name>` commands\n\nThis document covers current runtime behavior in `src/extensibility/skills.ts`, `src/discovery/builtin.ts`, `src/internal-urls/skill-protocol.ts`, and `src/discovery/agents-md.ts`.\n\n## What a skill is in this codebase\n\nA discovered skill is represented as:\n\n- `name`\n- `description`\n- `filePath` (the `SKILL.md` path)\n- `baseDir` (skill directory)\n- source metadata (`provider`, `level`, path)\n\nThe runtime only requires `name` and `path` for validity. In practice, matching quality depends on `description` being meaningful.\n\n## Required layout and SKILL.md expectations\n\n### Directory layout\n\nFor provider-based discovery (native/Claude/Codex/Agents/plugin providers), skills are discovered as **one level under `skills/`**:\n\n- `<skills-root>/<skill-name>/SKILL.md`\n\nNested patterns like `<skills-root>/group/<skill>/SKILL.md` are not discovered by provider loaders.\n\nFor `skills.customDirectories`, scanning uses the same non-recursive layout (`*/SKILL.md`).\n\n```text\nProvider-discovered layout (non-recursive under skills/):\n\n<root>/skills/\n ├─ postgres/\n │ └─ SKILL.md ✅ discovered\n ├─ pdf/\n │ └─ SKILL.md ✅ discovered\n └─ team/\n └─ internal/\n └─ SKILL.md ❌ not discovered by provider loaders\n\nCustom-directory scanning is also non-recursive, so nested paths are ignored unless you point `customDirectories` at that nested parent.\n```\n\n### `SKILL.md` frontmatter\n\nSupported frontmatter fields on the skill type:\n\n- `name?: string`\n- `description?: string`\n- `globs?: string[]`\n- `alwaysApply?: boolean`\n- `hide?: boolean`\n- `disableModelInvocation?: boolean` (Agent Skills equivalent of `hide`; normalized from kebab-case `disable-model-invocation`)\n- additional keys are preserved as unknown metadata\n\nCurrent runtime behavior:\n\n- `name` defaults to the skill directory name\n- `description` is required for:\n - native `.omp` provider skill discovery (`requireDescription: true`)\n - `omp-plugins` extension-package skills and the `github` provider (`.github/skills/`), which also pass `requireDescription: true`\n - `skills.customDirectories` scans via `scanSkillsFromDir` in `src/discovery/helpers.ts` (non-recursive)\n- the claude/codex/agents/opencode/claude-plugins providers can load skills without description\n\n## Discovery pipeline\n\n`loadSkills()` in `src/extensibility/skills.ts` does three passes:\n\n1. **Capability providers** via `loadCapability(\"skills\")` (the managed/auto-learn provider's skills are skipped here and handled in pass 3)\n2. **Custom directories** via `scanSkillsFromDir(..., { requireDescription: true })` (one-level directory enumeration)\n3. **Managed (auto-learn) skills** (`omp-managed` provider) resolved dead-last with first-wins, so any same-named authored skill from any provider or custom directory takes precedence\n\nIf `skills.enabled` is `false`, discovery returns no skills.\n\n### Built-in skill providers and precedence\n\nProvider ordering is priority-first (higher wins), then registration order for ties.\n\nCurrent registered skill providers:\n\n1. `native` (priority 100) — `.omp` user/project skills via `src/discovery/builtin.ts`\n2. `omp-plugins` (priority 90) — `skills/` bundled next to extension packages loaded through `extensions:`, `--extension`/`-e`, or installed plugins under `~/.omp/plugins/node_modules`\n3. `claude` (priority 80)\n4. priority 70 group (in registration order):\n - `claude-plugins`\n - `agents`\n - `codex`\n5. `opencode` (priority 55)\n6. `github` (priority 30) — `.github/skills/<name>/SKILL.md` (GitHub Agent Skills layout, project-only)\n7. `omp-managed` (priority 5) — auto-learn skills under `~/.omp/agent/managed-skills`, registered in `src/discovery/builtin.ts` and discovered unconditionally (only writing/nudging is gated by `autolearn.enabled`); always defers to a same-named authored skill\n\nDedup key is skill name. First item with a given name wins.\n\n### Source toggles and filtering\n\n`loadSkills()` applies these controls:\n\n- source toggles: `enableCodexUser`, `enableClaudeUser`, `enableClaudeProject`, `enablePiUser`, `enablePiProject`, `enableAgentsUser`, `enableAgentsProject`\n- `disabledExtensions` entries with `skill:<name>`\n- `ignoredSkills` (exclude; glob patterns)\n- `includeSkills` (include allowlist; glob patterns; empty means include all)\n\nFilter order is:\n\n1. not disabled by `disabledExtensions`\n2. source enabled\n3. not ignored\n4. included (if include list present)\n\nThe `agents` provider (`.agent[s]/skills`) is the canonical OMP-native location and has its own `enableAgentsUser`/`enableAgentsProject` toggles — disabling Claude/Codex/Pi does **not** turn it off. For providers without a dedicated toggle (`claude-plugins`, `opencode`, `gemini`, `github`, …), enablement falls back to: enabled if **any** named source toggle is enabled.\n\n### Collision and duplicate handling\n\n- Capability dedup already keeps first skill per name (highest-precedence provider)\n- `extensibility/skills.ts` additionally:\n - de-duplicates identical files by `realpath` (symlink-safe)\n - emits collision warnings when a later skill name conflicts\n - keeps the convenience `loadSkillsFromDir({ dir, source })` API as a thin adapter over `scanSkillsFromDir`\n- Custom-directory skills are merged after provider skills and follow the same collision behavior\n\n## Runtime usage behavior\n\n### System prompt exposure\n\nSystem prompt construction (`src/system-prompt.ts`) uses discovered skills as follows:\n\n- if `read` tool is available:\n - include discovered skills list in prompt, excluding skills with `hide: true`\n- otherwise:\n - omit discovered list\n\n`hide: true` does not disable the skill. Hidden skills are still loaded and remain reachable through `skill://<name>` and `/skill:<name>` when skill commands are enabled.\n\nTask tool subagents receive the session's discovered/provided skills list via normal session creation; there is no per-task skill pinning override.\n\n### Interactive `/skill:<name>` commands\n\nIf `skills.enableSkillCommands` is true, interactive mode registers one slash command per discovered skill.\n\n`/skill:<name> [args]` behavior:\n\n- reads the skill file directly from `filePath`\n- strips frontmatter\n- injects skill body as a custom message\n- delivery mode follows the **submission keybinding**:\n - **Enter** → invokes the skill on the `steer` queue while streaming (matches free-text Enter, which also steers), or as a normal idle prompt when the agent is not streaming\n - **Ctrl+Enter** (`app.message.followUp`) → invokes the skill on the `followUp` queue while streaming, or as a normal idle prompt when the agent is not streaming\n- appends metadata (`Skill: <path>`, optional `User: <args>`)\n\nThere is no flag, mode-selector, or frontmatter knob to override this — the keybinding _is_ the choice, identical to how free text is routed during streaming (`input-controller.ts:562-568` for Enter, `input-controller.ts:961-966` for Ctrl+Enter; both dispatch through `#invokeSkillCommand`).\n\n## `skill://` URL behavior\n\n`src/internal-urls/skill-protocol.ts` supports:\n\n- `skill://<name>` → resolves to that skill's `SKILL.md`\n- `skill://<name>/<relative-path>` → resolves inside that skill directory\n\n```text\nskill:// URL resolution\n\nskill://pdf\n -> <pdf-base>/SKILL.md\n\nskill://pdf/references/tables.md\n -> <pdf-base>/references/tables.md\n\nGuards:\n- reject absolute paths\n- reject `..` traversal\n- reject any resolved path escaping <pdf-base>\n```\n\nResolution details:\n\n- skill name must match exactly\n- relative paths are URL-decoded\n- absolute paths are rejected\n- path traversal (`..`) is rejected\n- resolved path must remain within `baseDir`\n- missing files return an explicit `File not found` error\n\nContent type:\n\n- `.md` => `text/markdown`\n- everything else => `text/plain`\n\nNo fallback search is performed for missing assets.\n\n## Skills vs AGENTS.md, commands, tools, hooks\n\n### Skills vs AGENTS.md\n\n- **Skills**: named, optional capability packs selected by task context or explicitly requested\n- **AGENTS.md/context files**: persistent instruction files loaded as context-file capability and merged by level/depth rules\n\n`src/discovery/agents-md.ts` specifically walks ancestor directories from `cwd` to discover standalone `AGENTS.md` files (stopping at the repo root, or home when no repo root is known), skipping files whose containing directory name starts with a dot.\n\n### Skills vs slash commands\n\n- **Skills**: model-readable knowledge/workflow content\n- **Slash commands**: user-invoked command entry points\n- `/skill:<name>` is a convenience wrapper that injects skill text; it does not change skill discovery semantics\n\n### Skills vs custom tools\n\n- **Skills**: documentation/workflow content loaded through prompt context and `read`\n- **Custom tools**: executable tool APIs callable by the model with schemas and runtime side effects\n\n### Skills vs hooks\n\n- **Skills**: passive content\n- **Hooks**: event-driven runtime interceptors that can block/modify behavior during execution\n\n## Practical authoring guidance tied to discovery logic\n\n- Put each skill in its own directory: `<skills-root>/<skill-name>/SKILL.md`\n- Always include explicit `name` and `description` frontmatter\n- Keep referenced assets under the same skill directory and access with `skill://<name>/...`\n- For nested taxonomy (`team/domain/skill`), point `skills.customDirectories` to the nested parent directory; scanning itself remains non-recursive\n- Avoid duplicate skill names across sources; first match wins by provider precedence\n",
66
- "skills/authoring-extensions.md": "---\nname: authoring-extensions\ndescription: Use when creating a new omp extension. Covers ExtensionAPI, factory signature, tool/command/event registration, and local-dev testing.\n---\n\n# Authoring Extensions\n\nExtensions are the primary way to add capabilities to `oh-my-pi`. A single extension module can register tools the LLM can call, slash commands users can invoke, and event handlers that run throughout the session lifecycle — all from one TypeScript file.\n\n## Minimum viable extension\n\n```ts\nimport type { ExtensionAPI } from \"@oh-my-pi/pi-coding-agent\";\n\nexport default function (pi: ExtensionAPI) {\n pi.on(\"session_start\", async (_event, ctx) => {\n ctx.ui.notify(\"My extension loaded!\", \"info\");\n });\n}\n```\n\nThat is a working extension. Drop it into `~/.omp/agent/extensions/hello.ts` and restart omp to see the notification.\n\n## Full example\n\nThe following extension registers a slash command, a tool, and a session-start hook:\n\n```ts\nimport type { ExtensionAPI } from \"@oh-my-pi/pi-coding-agent\";\n\nexport default function myExtension(pi: ExtensionAPI) {\n const z = pi.zod;\n\n // Runs once when the session loads\n pi.on(\"session_start\", async (_event, ctx) => {\n ctx.ui.notify(`Session ready in ${ctx.cwd}`, \"info\");\n });\n\n // Slash command: /greet\n pi.registerCommand(\"greet\", {\n description: \"Send a greeting into the conversation\",\n handler: async (args, ctx) => {\n const name = args.trim() || \"world\";\n pi.sendMessage(\n {\n customType: \"greeting\",\n content: `Hello, ${name}!`,\n display: true,\n attribution: \"user\",\n },\n { triggerTurn: false }\n );\n ctx.ui.notify(`Greeted ${name}`, \"info\");\n },\n });\n\n // LLM-callable tool\n pi.registerTool({\n name: \"word_count\",\n label: \"Word Count\",\n description: \"Count the words in a string\",\n parameters: z.object({\n text: z.string().describe(\"Text to count\"),\n }),\n async execute(_id, params, _signal, _onUpdate, _ctx) {\n const count = params.text.split(/\\s+/).filter(Boolean).length;\n return {\n content: [{ type: \"text\", text: String(count) }],\n details: { count },\n };\n },\n });\n}\n```\n\n## Discovery paths\n\nomp loads extension modules from these sources:\n\n1. Native `.omp` locations discovered through the capability system:\n - `<cwd>/.omp/extensions/`\n - `~/.omp/agent/extensions/`\n - legacy extension paths listed in `.omp/settings.json#extensions` or `~/.omp/agent/settings.json#extensions`\n2. Installed plugins under `~/.omp/plugins/node_modules` (`omp plugin install` npm/git specs, or `omp plugin link`) via their `omp.extensions`/`pi.extensions` manifests. Marketplace cache installs do not feed extension modules — they surface skills/commands/hooks/tools/MCP only.\n3. Explicit configured paths passed by the CLI (`omp --extension ./my-ext.ts`, also `-e`; `--hook` is treated as an alias) and by the `extensions:` setting in config.\n\nThe runtime de-duplicates by resolved absolute path — first seen wins.\n\nWhen a path points to a directory, omp resolves the entry point in this order:\n\n1. `package.json` with `omp.extensions` (or legacy `pi.extensions`) field\n2. `index.ts`\n3. `index.js`\n\nWhen scanning an `extensions/` directory, omp also loads direct `*.ts`/`*.js` files and one-level subdirectories that have `index.ts`, `index.js`, or a manifest.\n\nExtension packages can also bundle sibling capability directories. When a package is loaded through `extensions:` or `--extension`/`-e`, the `omp-plugins` provider discovers its `skills/`, `hooks/pre|post/`, `tools/`, `commands/`, `rules/`, `prompts/`, and `.mcp.json`.\n\n## package.json manifest\n\nTo package an extension as an installable plugin, add an `omp` field to `package.json`:\n\n```json\n{\n \"name\": \"my-omp-extension\",\n \"omp\": {\n \"extensions\": [\"./src/main.ts\"]\n }\n}\n```\n\nThe legacy `pi` key is also accepted for backwards compatibility:\n\n```json\n{\n \"pi\": {\n \"extensions\": [\"./index.ts\"]\n }\n}\n```\n\nMultiple entry points are supported:\n\n```json\n{\n \"omp\": {\n \"extensions\": [\"./src/safety.ts\", \"./src/tools.ts\"]\n }\n}\n```\n\n## Registering commands\n\n```ts\npi.registerCommand(\"my-cmd\", {\n description: \"What the command does\",\n handler: async (args, ctx) => {\n // args: everything the user typed after /my-cmd\n // ctx: ExtensionCommandContext — includes ctx.ui, ctx.cwd, session controls\n ctx.ui.notify(\"Running!\", \"info\");\n await ctx.waitForIdle();\n await ctx.newSession();\n },\n});\n```\n\n`ExtensionCommandContext` session-control methods (safe to call from commands only):\n\n| Method | Effect |\n|---|---|\n| `waitForIdle()` | Wait for the agent to finish streaming |\n| `newSession(opts?)` | Open a fresh session |\n| `switchSession(path)` | Switch to an existing session file |\n| `branch(entryId)` | Fork from a specific history entry |\n| `navigateTree(id, opts?)` | Jump to a different point in the session tree |\n| `reload()` | Reload the session runtime |\n| `compact(opts?)` | Compact the current context |\n\n## Registering tools\n\nTools are called by the LLM. Parameters use [Zod](https://zod.dev) schemas, available at `pi.zod`:\n\n```ts\nconst z = pi.zod;\n\npi.registerTool({\n name: \"search_notes\", // snake_case, unique\n label: \"Search Notes\", // human-readable label for TUI\n description: \"Full-text search through project notes\",\n parameters: z.object({\n query: z.string().describe(\"Search query\"),\n limit: z.number().default(10).describe(\"Max results\").optional(),\n }),\n async execute(toolCallId, params, signal, onUpdate, ctx) {\n if (signal?.aborted) {\n return { content: [{ type: \"text\", text: \"Cancelled\" }] };\n }\n onUpdate?.({ content: [{ type: \"text\", text: \"Searching...\" }] });\n // ... do work ...\n return {\n content: [{ type: \"text\", text: `Found N results for \"${params.query}\"` }],\n details: { query: params.query, count: 0 },\n };\n },\n});\n```\n\n## Subscribing to events\n\n```ts\npi.on(\"tool_call\", async (event, ctx) => {\n // event.toolName, event.input, event.toolCallId\n if (event.toolName !== \"bash\") return;\n\n const command = String((event.input as { command?: unknown }).command ?? \"\");\n if (command.includes(\"rm -rf /\")) {\n return { block: true, reason: \"Blocked by safety policy\" };\n }\n});\n\npi.on(\"turn_end\", async (_event, ctx) => {\n ctx.ui.setStatus(\"tokens\", `~${ctx.getContextUsage()?.tokens ?? \"?\"} tokens`);\n});\n```\n\nFull event catalog: see [hooks authoring guide](./authoring-hooks.md).\n\n## Extension vs hook — when to use which\n\n| Need | Use |\n|---|---|\n| Tools + commands + events in one module | **Extension** (`ExtensionAPI`) |\n| Pure event interception (policy, redaction) | **Extension** or **Hook** (both work; extension is preferred) |\n| Legacy hook module already exists | **Hook** (`HookAPI` from `@oh-my-pi/pi-coding-agent/extensibility/hooks`) |\n| Registering a provider, shortcut, or CLI flag | **Extension only** |\n| Shipping as a marketplace plugin | **Extension** (use `package.json` manifest) |\n\nExtensions are a strict superset of hooks. New authoring should use `ExtensionAPI`.\n\n## Debugging\n\nomp writes structured logs to a rotating file under `~/.omp/logs/` (debug level is always on; nothing is written to the console, which would corrupt the TUI). Tail today's log to see extension load diagnostics:\n\n```\ntail -f ~/.omp/logs/omp.$(date +%F).log\n```\n\nFailed extension loads are logged with their path and error. Loaded extensions may also emit their own debug logs via `pi.logger`.\n\nTo temporarily disable a specific extension module by name without removing the file:\n\n```yaml\n# ~/.omp/agent/config.yml\ndisabledExtensions:\n - extension-module:my-ext\n```\n\nThe derived name is the filename stem (or directory name for `index.ts`-style entries): `/path/to/my-ext.ts` → `my-ext`.\n\n## Important constraints\n\n- **Do not call runtime actions during load.** Methods like `pi.sendMessage()` throw `ExtensionRuntimeNotInitializedError` if called synchronously during module evaluation (before a session is active). Register handlers/tools/commands during load; perform runtime actions only from event handlers, tools, or commands.\n- **`tool_call` errors are fail-closed.** If a `tool_call` handler throws, the tool is blocked.\n- **Command names must not clash with built-ins.** Conflicts are skipped with a diagnostic log.\n- **Reserved shortcuts are ignored** (`ctrl+c`, `ctrl+d`, `ctrl+z`, `ctrl+k`, `ctrl+p`, `ctrl+l`, `ctrl+o`, `ctrl+t`, `ctrl+g`, `ctrl+q`, `alt+m`, `shift+tab`, `shift+ctrl+p`, `alt+enter`, `escape`, `enter`).\n\n## Further reading\n\n- `docs/extensions.md` — runtime internals and full API surface reference\n- `docs/extension-loading.md` — detailed path resolution rules\n- `docs/hooks.md` — hook subsystem internals\n- `docs/skills/examples/hello-extension/` — complete working example\n",
68
+ "skills/authoring-extensions.md": "---\nname: authoring-extensions\ndescription: Use when creating a new omp extension. Covers ExtensionAPI, factory signature, tool/command/event registration, and local-dev testing.\n---\n\n# Authoring Extensions\n\nExtensions are the primary way to add capabilities to `oh-my-pi`. A single extension module can register tools the LLM can call, slash commands users can invoke, and event handlers that run throughout the session lifecycle — all from one TypeScript file.\n\n## Minimum viable extension\n\n```ts\nimport type { ExtensionAPI } from \"@oh-my-pi/pi-coding-agent\";\n\nexport default function (pi: ExtensionAPI) {\n pi.on(\"session_start\", async (_event, ctx) => {\n ctx.ui.notify(\"My extension loaded!\", \"info\");\n });\n}\n```\n\nThat is a working extension. Drop it into `~/.omp/agent/extensions/hello.ts` and restart omp to see the notification.\n\n## Full example\n\nThe following extension registers a slash command, a tool, and a session-start hook:\n\n```ts\nimport type { ExtensionAPI } from \"@oh-my-pi/pi-coding-agent\";\n\nexport default function myExtension(pi: ExtensionAPI) {\n const z = pi.zod;\n\n // Runs once when the session loads\n pi.on(\"session_start\", async (_event, ctx) => {\n ctx.ui.notify(`Session ready in ${ctx.cwd}`, \"info\");\n });\n\n // Slash command: /greet\n pi.registerCommand(\"greet\", {\n description: \"Send a greeting into the conversation\",\n handler: async (args, ctx) => {\n const name = args.trim() || \"world\";\n pi.sendMessage(\n {\n customType: \"greeting\",\n content: `Hello, ${name}!`,\n display: true,\n attribution: \"user\",\n },\n { triggerTurn: false }\n );\n ctx.ui.notify(`Greeted ${name}`, \"info\");\n },\n });\n\n // LLM-callable tool\n pi.registerTool({\n name: \"word_count\",\n label: \"Word Count\",\n description: \"Count the words in a string\",\n parameters: z.object({\n text: z.string().describe(\"Text to count\"),\n }),\n async execute(_id, params, _signal, _onUpdate, _ctx) {\n const count = params.text.split(/\\s+/).filter(Boolean).length;\n return {\n content: [{ type: \"text\", text: String(count) }],\n details: { count },\n };\n },\n });\n}\n```\n\n## Discovery paths\n\nomp loads extension modules from these sources:\n\n1. Native `.omp` locations discovered through the capability system:\n - `<cwd>/.omp/extensions/`\n - `~/.omp/agent/extensions/`\n - legacy extension paths listed in `.omp/settings.json#extensions` or `~/.omp/agent/settings.json#extensions`\n2. Installed plugins under `~/.omp/plugins/node_modules` (`omp plugin install` npm/git specs, or `omp plugin link`) via their `omp.extensions`/`pi.extensions` manifests. Marketplace cache installs do not feed extension modules — they surface skills/commands/hooks/tools/MCP only.\n3. Explicit configured paths passed by the CLI (`omp --extension ./my-ext.ts`, also `-e`; `--hook` is treated as an alias) and by the `extensions:` setting in config.\n\nThe runtime de-duplicates by resolved absolute path — first seen wins.\n\nWhen a path points to a directory, omp resolves the entry point in this order:\n\n1. `package.json` with `omp.extensions` (or legacy `pi.extensions`) field\n2. `index.ts`\n3. `index.js`\n\nWhen scanning an `extensions/` directory, omp also loads direct `*.ts`/`*.js` files and one-level subdirectories that have `index.ts`, `index.js`, or a manifest.\n\nExtension packages can also bundle sibling capability directories. When a package is loaded through `extensions:` or `--extension`/`-e`, the `omp-plugins` provider discovers its `skills/`, `hooks/pre|post/`, `tools/`, `commands/`, `rules/`, `prompts/`, and `.mcp.json`.\n\n## package.json manifest\n\nTo package an extension as an installable plugin, add an `omp` field to `package.json`:\n\n```json\n{\n \"name\": \"my-omp-extension\",\n \"omp\": {\n \"extensions\": [\"./src/main.ts\"]\n }\n}\n```\n\nThe legacy `pi` key is also accepted for backwards compatibility:\n\n```json\n{\n \"pi\": {\n \"extensions\": [\"./index.ts\"]\n }\n}\n```\n\nMultiple entry points are supported:\n\n```json\n{\n \"omp\": {\n \"extensions\": [\"./src/safety.ts\", \"./src/tools.ts\"]\n }\n}\n```\n\n## Registering commands\n\n```ts\npi.registerCommand(\"my-cmd\", {\n description: \"What the command does\",\n handler: async (args, ctx) => {\n // args: everything the user typed after /my-cmd\n // ctx: ExtensionCommandContext — includes ctx.ui, ctx.cwd, session controls\n ctx.ui.notify(\"Running!\", \"info\");\n await ctx.waitForIdle();\n await ctx.newSession();\n },\n});\n```\n\n`ExtensionCommandContext` session-control methods (safe to call from commands only):\n\n| Method | Effect |\n|---|---|\n| `waitForIdle()` | Wait for the agent to finish streaming |\n| `newSession(opts?)` | Open a fresh session |\n| `switchSession(path)` | Switch to an existing session file |\n| `branch(entryId)` | Fork from a specific history entry |\n| `navigateTree(id, opts?)` | Jump to a different point in the session tree |\n| `reload()` | Reload the session runtime |\n| `compact(opts?)` | Compact the current context |\n\n## Registering tools\n\nTools are called by the LLM. Parameters use [Zod](https://zod.dev) schemas, available at `pi.zod`:\n\n```ts\nconst z = pi.zod;\n\npi.registerTool({\n name: \"search_notes\", // snake_case, unique\n label: \"Search Notes\", // human-readable label for TUI\n description: \"Full-text search through project notes\",\n parameters: z.object({\n query: z.string().describe(\"Search query\"),\n limit: z.number().default(10).describe(\"Max results\").optional(),\n }),\n async execute(toolCallId, params, signal, onUpdate, ctx) {\n if (signal?.aborted) {\n return { content: [{ type: \"text\", text: \"Cancelled\" }] };\n }\n onUpdate?.({ content: [{ type: \"text\", text: \"Searching...\" }] });\n // ... do work ...\n return {\n content: [{ type: \"text\", text: `Found N results for \"${params.query}\"` }],\n details: { query: params.query, count: 0 },\n };\n },\n});\n```\n\n## Subscribing to events\n\n```ts\npi.on(\"tool_call\", async (event, ctx) => {\n // event.toolName, event.input, event.toolCallId\n if (event.toolName !== \"bash\") return;\n\n const command = String((event.input as { command?: unknown }).command ?? \"\");\n if (command.includes(\"rm -rf /\")) {\n return { block: true, reason: \"Blocked by safety policy\" };\n }\n});\n\npi.on(\"turn_end\", async (_event, ctx) => {\n ctx.ui.setStatus(\"tokens\", `~${ctx.getContextUsage()?.tokens ?? \"?\"} tokens`);\n});\n\npi.on(\"session_stop\", async (event) => {\n if (event.stop_hook_active) return;\n return { continue: true, additionalContext: `Review final status after turn ${event.turn_id}.` };\n});\n```\n\nFull event catalog: see [extension authoring guide](../extensions.md).\n\n## Extension vs hook — when to use which\n\n| Need | Use |\n|---|---|\n| Tools + commands + events in one module | **Extension** (`ExtensionAPI`) |\n| Pure event interception (policy, redaction) | **Extension** or **Hook** (both work; extension is preferred) |\n| Legacy hook module already exists | **Hook** (`HookAPI` from `@oh-my-pi/pi-coding-agent/extensibility/hooks`) |\n| Registering a provider, shortcut, or CLI flag | **Extension only** |\n| Shipping as a marketplace plugin | **Extension** (use `package.json` manifest) |\n\nExtensions are a strict superset of hooks. New authoring should use `ExtensionAPI`.\n\n## Debugging\n\nomp writes structured logs to a rotating file under `~/.omp/logs/` (debug level is always on; nothing is written to the console, which would corrupt the TUI). Tail today's log to see extension load diagnostics:\n\n```\ntail -f ~/.omp/logs/omp.$(date +%F).log\n```\n\nFailed extension loads are logged with their path and error. Loaded extensions may also emit their own debug logs via `pi.logger`.\n\nTo temporarily disable a specific extension module by name without removing the file:\n\n```yaml\n# ~/.omp/agent/config.yml\ndisabledExtensions:\n - extension-module:my-ext\n```\n\nThe derived name is the filename stem (or directory name for `index.ts`-style entries): `/path/to/my-ext.ts` → `my-ext`.\n\n## Important constraints\n\n- **Do not call runtime actions during load.** Methods like `pi.sendMessage()` throw `ExtensionRuntimeNotInitializedError` if called synchronously during module evaluation (before a session is active). Register handlers/tools/commands during load; perform runtime actions only from event handlers, tools, or commands.\n- **`tool_call` errors are fail-closed.** If a `tool_call` handler throws, the tool is blocked.\n- **Command names must not clash with built-ins.** Conflicts are skipped with a diagnostic log.\n- **Reserved shortcuts are ignored** (`ctrl+c`, `ctrl+d`, `ctrl+z`, `ctrl+k`, `ctrl+p`, `ctrl+l`, `ctrl+o`, `ctrl+t`, `ctrl+g`, `ctrl+q`, `alt+m`, `shift+tab`, `shift+ctrl+p`, `alt+enter`, `escape`, `enter`).\n\n## Further reading\n\n- `docs/extensions.md` — runtime internals and full API surface reference\n- `docs/extension-loading.md` — detailed path resolution rules\n- `docs/hooks.md` — hook subsystem internals\n- `docs/skills/examples/hello-extension/` — complete working example\n",
67
69
  "skills/authoring-hooks.md": "---\nname: authoring-hooks\ndescription: Use when creating a new omp hook. Covers HookAPI, event catalog, blocking/overriding tool calls, and context modification.\n---\n\n# Authoring Hooks\n\nHooks are event-driven interceptors that run alongside the agent loop. They are best used for cross-cutting concerns: safety policy, secret redaction, context pruning, audit logging. A hook module registers handlers via `pi.on(event, handler)` and can block tool execution, override tool output, or rewrite the message context before each LLM call.\n\n> **Relationship to extensions:** The hook subsystem (`HookAPI`) is the legacy API. The extension runner now handles everything hooks can do plus more. `ExtensionAPI` supports the hook event model plus extension-only events. Use `ExtensionAPI` for new work; use `HookAPI` only if you are maintaining an existing hook module.\n\n## Factory signature\n\n```ts\nimport type { HookAPI } from \"@oh-my-pi/pi-coding-agent/extensibility/hooks\";\n\nexport default function myHook(omp: HookAPI): void {\n omp.on(\"tool_call\", async (event, ctx) => {\n // intercept every tool call\n });\n}\n```\n\nThe default export must be a plain function (not async, not a class). It receives a `HookAPI` instance and must register all handlers synchronously during execution.\n\nAlternatively, using `ExtensionAPI` (preferred):\n\n```ts\nimport type { ExtensionAPI } from \"@oh-my-pi/pi-coding-agent\";\n\nexport default function myExtension(pi: ExtensionAPI): void {\n pi.on(\"tool_call\", async (event, ctx) => { /* ... */ });\n}\n```\n\n## Event catalog\n\n### Tool lifecycle\n\n| Event | Fires | Can return |\n|---|---|---|\n| `tool_call` | Before every tool execution | `{ block?: boolean; reason?: string }` |\n| `tool_result` | After every tool execution | `{ content?; details?; isError?: boolean }` |\n\n### Session lifecycle\n\n| Event | Fires | Can return |\n|---|---|---|\n| `session_start` | On initial session load | — |\n| `session_before_switch` | Before session switch | `{ cancel?: boolean }` |\n| `session_switch` | After session switch | — |\n| `session_before_branch` | Before session branch | `{ cancel?: boolean; skipConversationRestore?: boolean }` |\n| `session_branch` | After session branch | — |\n| `session_before_compact` | Before compaction | `{ cancel?: boolean; compaction?: CompactionResult }` |\n| `session.compacting` | During compaction (inject context) | `{ context?: string[]; prompt?: string; preserveData?: Record<string, unknown> }` |\n| `session_compact` | After compaction | — |\n| `session_before_tree` | Before tree navigation | `{ cancel?: boolean; summary?: { summary: string; details?: unknown } }` |\n| `session_tree` | After tree navigation | — |\n| `session_shutdown` | On session shutdown | — |\n\n### Agent/turn lifecycle\n\n| Event | Fires | Can return |\n|---|---|---|\n| `before_agent_start` | Before agent starts a turn | `{ message?: { customType; content; display; details; attribution? } }` |\n| `agent_start` | Agent streaming starts | — |\n| `agent_end` | Agent streaming ends | — |\n| `turn_start` | Start of a user→agent turn | — |\n| `turn_end` | End of a user→agent turn | — |\n| `context` | Before each LLM API call | `{ messages?: Message[] }` |\n| `auto_compaction_start` | Auto-compaction begins | — |\n| `auto_compaction_end` | Auto-compaction ends | — |\n| `auto_retry_start` | Auto-retry begins | — |\n| `auto_retry_end` | Auto-retry ends | — |\n| `ttsr_triggered` | TTSR (too-short response) triggered | — |\n| `todo_reminder` | Todo reminder fires | — |\n\nExtension-only events such as `tool_execution_start`, `tool_execution_update`, `tool_execution_end`, `input`, `user_bash`, and `user_python` require `ExtensionAPI`.\n\n## Pre-tool blocking contract\n\nReturn `{ block: true, reason: \"...\" }` from a `tool_call` handler to prevent execution:\n\n```ts\nomp.on(\"tool_call\", async (event, ctx) => {\n if (event.toolName === \"bash\") {\n const cmd = String(event.input.command ?? \"\");\n if (/\\brm\\s+-rf\\s+\\//.test(cmd)) {\n return { block: true, reason: \"Refusing to delete root filesystem\" };\n }\n }\n});\n```\n\nContract:\n\n- If **any** handler returns `{ block: true }`, execution stops immediately.\n- `reason` is returned to the LLM as the tool error text.\n- If a handler **throws**, the tool is also blocked (fail-closed).\n- Last non-blocking return wins for non-blocking results; first `block: true` short-circuits.\n\n## Post-tool override contract\n\nReturn `{ content, details, isError }` from a `tool_result` handler to patch what the LLM sees:\n\n```ts\nomp.on(\"tool_result\", async (event, ctx) => {\n if (event.toolName === \"read\" && !event.isError) {\n const redacted = event.content.map(chunk => {\n if (chunk.type !== \"text\") return chunk;\n return {\n ...chunk,\n text: chunk.text.replace(/(?:sk|pk)-[a-zA-Z0-9]{20,}/g, \"[REDACTED_API_KEY]\"),\n };\n });\n return { content: redacted };\n }\n});\n```\n\nContract:\n\n- Handlers run in registration order. For `HookAPI`, each handler receives the original tool result event, and the last returned override wins.\n- `content` replaces the full content array for the LLM.\n- `details` replaces the structured details object.\n- `isError` exists on the shared result type, but `HookToolWrapper` does not propagate it into a successful tool result; on a tool failure, the original error is rethrown after handlers complete.\n- On a tool failure, `tool_result` is still emitted with `isError: true`.\n\n## Context modification contract\n\nReturn `{ messages: [...] }` from a `context` handler to rewrite the message list before each LLM API call:\n\n```ts\nomp.on(\"context\", async (event, ctx) => {\n // Remove debug-only custom messages from LLM context\n const filtered = event.messages.filter(\n msg => !(msg.role === \"custom\" && msg.customType === \"debug-only\")\n );\n return { messages: filtered };\n});\n```\n\nContract:\n\n- `event.messages` is the current accumulated list.\n- Handlers run in order; each receives the output of the previous handler.\n- Return `undefined` (or nothing) to pass messages through unmodified.\n\n## Three complete examples\n\n### 1. rm-rf blocker\n\n```ts\nimport type { HookAPI } from \"@oh-my-pi/pi-coding-agent/extensibility/hooks\";\n\nexport default function rmRfBlocker(omp: HookAPI): void {\n omp.on(\"tool_call\", async (event, ctx) => {\n if (event.toolName !== \"bash\") return;\n\n const cmd = String(event.input.command ?? \"\");\n if (!/\\brm\\s+-rf\\s+\\//.test(cmd)) return;\n\n // Allow if user explicitly confirms (interactive mode only)\n if (ctx.hasUI) {\n const allow = await ctx.ui.confirm(\n \"Dangerous command\",\n `This command deletes from root:\\n${cmd}\\n\\nProceed?`\n );\n if (allow) return;\n }\n\n return { block: true, reason: \"rm -rf / blocked by safety policy\" };\n });\n}\n```\n\n### 2. API-key redactor\n\n```ts\nimport type { HookAPI } from \"@oh-my-pi/pi-coding-agent/extensibility/hooks\";\n\n// Common API-key shapes. Not exhaustive — providers using bespoke formats\n// (Anthropic `sk-ant-…`, JWT-style bearers, gateway-specific prefixes, etc.)\n// need their own entries.\nconst SECRET_PATTERNS = [\n /\\b(sk|pk)-[a-zA-Z0-9]{20,}\\b/g,\n /\\bAKIA[A-Z0-9]{16}\\b/g,\n /\\bghp_[a-zA-Z0-9]{36}\\b/g,\n // Zhipu / GLM Coding Plan: `<id>.<secret>` (no `sk-` prefix).\n /\\b[a-zA-Z0-9]{16,}\\.[a-zA-Z0-9]{16,}\\b/g,\n /\\b[a-zA-Z0-9_-]{20,}\\s*=\\s*[\"']?[a-zA-Z0-9._/+=-]{20,}[\"']?/g,\n];\n\nexport default function apiKeyRedactor(omp: HookAPI): void {\n omp.on(\"tool_result\", async (event) => {\n if (event.isError) return;\n\n let changed = false;\n const redacted = event.content.map(chunk => {\n if (chunk.type !== \"text\") return chunk;\n let text = chunk.text;\n for (const pattern of SECRET_PATTERNS) {\n const next = text.replace(pattern, \"[REDACTED]\");\n if (next !== text) { changed = true; text = next; }\n }\n return { ...chunk, text };\n });\n\n if (changed) return { content: redacted };\n });\n}\n```\n\n### 3. Context filter\n\n```ts\nimport type { HookAPI } from \"@oh-my-pi/pi-coding-agent/extensibility/hooks\";\n\nexport default function contextFilter(omp: HookAPI): void {\n omp.on(\"context\", async (event) => {\n const MAX_TOOL_OUTPUT_CHARS = 8_000;\n\n const trimmed = event.messages.map(msg => {\n // Truncate very large tool results to keep context manageable\n if (msg.role !== \"toolResult\") return msg;\n const content = msg.content.map(chunk => {\n if (chunk.type !== \"text\" || chunk.text.length <= MAX_TOOL_OUTPUT_CHARS) return chunk;\n return {\n ...chunk,\n text: chunk.text.slice(0, MAX_TOOL_OUTPUT_CHARS) + \"\\n[... truncated by context-filter hook]\",\n };\n });\n return { ...msg, content };\n });\n\n return { messages: trimmed };\n });\n}\n```\n\n## UI methods in hook context\n\n`ctx.ui` is a `HookUIContext`. Available methods:\n\n| Method | Description |\n|---|---|\n| `notify(message, type?)` | Show an in-app notification |\n| `setStatus(key, text)` | Set footer status text (keyed, sorted by key) |\n| `select(title, options)` | Show a selection dialog |\n| `confirm(title, message)` | Show a yes/no dialog |\n| `input(title, placeholder?)` | Show a text input dialog |\n| `editor(title, prefill?, { signal }?, { promptStyle }?)` | Show a multi-line editor |\n| `setEditorText(text)` | Set the input editor content |\n| `getEditorText()` | Get current input editor content |\n| `custom(factory)` | Render a custom TUI component |\n| `theme` | Current theme object |\n\nPass `{ promptStyle: true }` as the fourth argument when Enter should submit and Shift+Enter should insert a newline. The default hook editor behavior keeps Enter as newline and Ctrl+Enter as submit.\n\n`ctx.hasUI` is `false` in headless/print/subagent mode — always guard interactive calls.\n\n## Further reading\n\n- `docs/hooks.md` — hook subsystem internals, ordering rules, error propagation\n- `docs/extensions.md` — `ExtensionAPI` (superset of `HookAPI`)\n- `docs/skills/examples/safety-hook/` — complete working example\n",
68
70
  "skills/authoring-marketplaces.md": "---\nname: authoring-marketplaces\ndescription: Use when creating a new omp marketplace. Covers marketplace.json schema, source types, install commands, and publishing.\n---\n\n# Authoring Marketplaces\n\nA marketplace is a Git repository (or local directory) that contains a catalog file at either `.omp-plugin/marketplace.json` (preferred for omp-specific catalogs) or `.claude-plugin/marketplace.json` (Claude Code-compatible; used as the fallback). Anyone can author one. Users add it with `/marketplace add owner/repo` and then install individual plugins from it.\n\n## Minimum viable marketplace\n\n```\nmy-marketplace/\n .claude-plugin/\n marketplace.json\n plugins/\n my-plugin/\n skills/\n my-skill/\n SKILL.md\n```\n\n```json\n{\n \"name\": \"my-marketplace\",\n \"owner\": { \"name\": \"Your Name\" },\n \"plugins\": [\n {\n \"name\": \"my-plugin\",\n \"description\": \"What it does\",\n \"source\": \"./plugins/my-plugin\"\n }\n ]\n}\n```\n\nPush to GitHub. Users install with:\n\n```\n/marketplace add your-github-username/my-marketplace\n/marketplace install my-plugin@my-marketplace\n```\n\n## marketplace.json schema\n\nThe catalog file lives at either `.omp-plugin/marketplace.json` or `.claude-plugin/marketplace.json` in the repository root. omp prefers the `.omp-plugin/` path and falls back to the Claude path; a repository may publish both to expose tool-specific catalogs from a single source tree.\n\n### Top-level fields\n\n| Field | Required | Description |\n|---|---|---|\n| `name` | yes | Marketplace name. Lowercase alphanumeric, hyphens, dots. Must start and end with alphanumeric. Max 64 chars. |\n| `owner` | yes | Object with at minimum `owner.name` (string) |\n| `owner.name` | yes | Marketplace owner name |\n| `owner.email` | no | Owner contact email |\n| `plugins` | yes | Array of plugin entries (see below) |\n| `metadata.description` | no | Short description of the marketplace |\n| `metadata.version` | no | Catalog metadata version string |\n| `metadata.pluginRoot` | no | String prepended to all relative plugin source paths |\n| extra top-level fields | no | Preserved by the parser but not used by marketplace install/runtime logic |\n\n### Plugin entry fields\n\n| Field | Required | Description |\n|---|---|---|\n| `name` | yes | Plugin name (same naming rules as marketplace name) |\n| `source` | yes | Where to find the plugin — string or object (see source types below) |\n| `description` | no | Short plugin description |\n| `version` | no | Version string |\n| `author` | no | `{ name, email? }` |\n| `homepage` | no | URL |\n| `category` | no | e.g. `development`, `productivity`, `security` |\n| `tags` / `keywords` | no | Arrays of string tags/keywords |\n| `repository` | no | Repository URL |\n| `license` | no | License string |\n| `strict` | no | Boolean plugin metadata flag |\n| `commands`, `agents`, `hooks`, `mcpServers`, `lspServers` | no | Capability metadata used by plugin tooling and selectors |\n\n### Full catalog example\n\n```json\n{\n \"$schema\": \"https://anthropic.com/claude-code/marketplace.schema.json\",\n \"name\": \"acme-plugins\",\n \"owner\": {\n \"name\": \"Acme Corp\",\n \"email\": \"plugins@acme.example\"\n },\n \"metadata\": {\n \"description\": \"Official Acme plugins for oh-my-pi\"\n },\n \"plugins\": [\n {\n \"name\": \"acme-linter\",\n \"description\": \"Enforce Acme coding standards\",\n \"category\": \"development\",\n \"source\": \"./plugins/linter\"\n },\n {\n \"name\": \"acme-deploy\",\n \"description\": \"One-command deploy to Acme cloud\",\n \"category\": \"devops\",\n \"source\": {\n \"source\": \"github\",\n \"repo\": \"acme-corp/omp-deploy-plugin\",\n \"ref\": \"main\"\n }\n }\n ]\n}\n```\n\n## Plugin source types\n\n### 1. Relative path string\n\nPoints to a subdirectory inside the marketplace repository itself. Must start with `./`.\n\n```json\n\"source\": \"./plugins/my-plugin\"\n```\n\nThe path is resolved relative to the marketplace repository root. Path traversal outside the repo root is rejected.\n\nUse `metadata.pluginRoot` to avoid repeating a common prefix:\n\n```json\n{\n \"metadata\": { \"pluginRoot\": \"./plugins\" },\n \"plugins\": [\n { \"name\": \"plugin-a\", \"source\": \"./plugin-a\" },\n { \"name\": \"plugin-b\", \"source\": \"./plugin-b\" }\n ]\n}\n```\n\n### 2. Git URL\n\nA full Git repository URL. Optionally pin to a branch/tag (`ref`) or exact commit (`sha`):\n\n```json\n\"source\": {\n \"source\": \"url\",\n \"url\": \"https://github.com/org/my-plugin.git\",\n \"ref\": \"main\",\n \"sha\": \"a1b2c3d4...\"\n}\n```\n\n### 3. GitHub shorthand\n\nShorthand for GitHub repositories. Functionally equivalent to a Git URL but more concise:\n\n```json\n\"source\": {\n \"source\": \"github\",\n \"repo\": \"org/my-plugin\",\n \"ref\": \"v2.1.0\",\n \"sha\": \"a1b2c3d4...\"\n}\n```\n\n### 4. Git subdirectory (monorepo)\n\nFor plugins living inside a subdirectory of a larger repository. `url` accepts a full HTTPS URL or a GitHub `owner/repo` shorthand:\n\n```json\n\"source\": {\n \"source\": \"git-subdir\",\n \"url\": \"https://github.com/org/monorepo.git\",\n \"path\": \"packages/my-plugin\",\n \"ref\": \"main\",\n \"sha\": \"a1b2c3d4...\"\n}\n```\n\nThe `path` must resolve inside the cloned repository — directory escape is rejected.\n\n### 5. NPM package\n\nDeclares the plugin as an npm package. `version` is optional:\n\n```json\n\"source\": {\n \"source\": \"npm\",\n \"package\": \"@acme/omp-plugin\",\n \"version\": \"1.2.0\"\n}\n```\n\n> Note: npm plugin sources are declared in the schema but installation support is not yet fully implemented. Use Git-based sources for plugins that need to work today.\n\n## Plugin structure\n\nA plugin directory (regardless of source type) ships its content in conventional locations, all optional:\n\n```\nmy-plugin/\n skills/<name>/SKILL.md ← skills\n commands/*.md ← slash commands\n agents/*.md ← subagent definitions\n hooks/pre/, hooks/post/ ← hooks\n tools/ ← custom tools\n .mcp.json ← MCP server definitions\n package.json ← optional; its version is a fallback when the catalog entry has no version\n README.md ← recommended: description + usage\n```\n\n> Note: extension modules declared via `package.json` `omp.extensions` are **not** loaded from marketplace installs — that mechanism only applies to npm-installed or `omp plugin link`ed plugins. Ship marketplace plugin behavior through the conventional directories above.\n\n## Install command\n\n```\n/marketplace install name@marketplace-name\n/marketplace install --force name@marketplace-name # reinstall\n/marketplace install --scope project name@marketplace # project-scoped\n```\n\nCLI equivalent:\n\n```\nomp plugin marketplace add owner/repo\nomp plugin install name@marketplace-name\n```\n\nScope behavior:\n\n- **user** (default) — installed in `~/.omp/plugins/installed_plugins.json`, available in all projects\n- **project** — installed in `<project>/.omp/plugins/installed_plugins.json`, available only in that project\n\nProject-scoped installs shadow user-scoped installs of the same plugin name.\n\n## Naming rules\n\nMarketplace names and plugin names must:\n\n- Contain only lowercase letters, digits, hyphens (`-`), and dots (`.`)\n- Start and end with a lowercase letter or digit\n- Be at most 64 characters\n\nPlugin IDs (`name@marketplace`) must be at most 128 characters total.\n\nValid: `my-plugin`, `code-review`, `acme.tools`, `ai-v2`\nInvalid: `-bad-start`, `bad-end-`, `.dot-start`, `Under_score`, `HAS_CAPS`\n\n## Publishing workflow\n\n1. Create `marketplace.json` at `.omp-plugin/marketplace.json` (omp-only) or `.claude-plugin/marketplace.json` (shared with Claude Code) in a new Git repo.\n2. Add plugin entries pointing to subdirectories (or external sources).\n3. Push to GitHub.\n4. Share the `owner/repo` string. Users add it with `/marketplace add owner/repo`.\n5. When you update the catalog, users run `/marketplace update your-marketplace-name` to pull the latest.\n\nTo test locally before publishing:\n\n```\n/marketplace add ./path/to/my-marketplace\n```\n\nLocal path sources also accept `~/` and absolute paths.\n\n## Further reading\n\n- `docs/marketplace.md` — marketplace system internals, on-disk layout, command reference\n- `docs/skills/authoring-extensions.md` — how to author the extension modules inside plugins\n- `docs/skills/examples/mini-marketplace/` — minimal working marketplace example\n",
69
71
  "skills/examples/hello-extension/README.md": "# hello-extension\n\nA minimal `oh-my-pi` extension that demonstrates the two most common authoring patterns: subscribing to `session_start` to notify on load, and registering a `/hello` slash command that sends a greeting into the conversation. It is intentionally small — use it as a copy-paste starting point for your own extension.\n\n## Install\n\n**Option A — drop into user extensions directory:**\n\n```\ncp -r . ~/.omp/agent/extensions/hello-extension\n```\n\nRestart `omp`. You will see the startup notification immediately.\n\n**Option B — point the settings `extensions` array at it:**\n\n```yaml\n# ~/.omp/agent/config.yml\nextensions:\n - /path/to/hello-extension\n```\n\n**Option C — load once via CLI flag:**\n\n```\nomp --extension ./hello-extension\n```\n\n## Usage\n\nAfter loading, type `/hello` or `/hello Ada` in the omp prompt. The command sends a visible greeting custom message into the conversation and shows a \"Message sent!\" notification.\n\n## What it demonstrates\n\n- Default export factory receiving `ExtensionAPI`\n- `pi.on(\"session_start\", ...)` — session lifecycle hook\n- `pi.registerCommand(...)` — slash command registration\n- `ctx.ui.notify(...)` — user-facing notification\n- `package.json` with `omp.extensions` manifest field\n",
@@ -97,7 +99,7 @@ export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
97
99
  "tools/irc.md": "# irc\n\n> Send and receive messages between agents over a process-global mailbox bus.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/irc.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/irc.md`\n- Key collaborators:\n - `packages/coding-agent/src/irc/bus.ts` — process-global `IrcBus`: per-agent mailboxes, delivery, waiter matching.\n - `packages/coding-agent/src/registry/agent-registry.ts` — process-global agent directory and status.\n - `packages/coding-agent/src/registry/agent-lifecycle.ts` — revival of parked recipients on direct send.\n - `packages/coding-agent/src/session/agent-session.ts` — `deliverIrcMessage(...)`: recipient-side injection and wake turns.\n - `packages/coding-agent/src/prompts/system/irc-incoming.md` — incoming-message rendering for the recipient.\n - `packages/coding-agent/src/prompts/system/irc-autoreply.md` — prompt for the ephemeral auto-reply side turn (busy recipient, async disabled).\n - `packages/coding-agent/src/config/settings-schema.ts` — `irc.timeoutMs`.\n - `packages/coding-agent/src/modes/controllers/event-controller.ts` — renders IRC events into chat UI.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `op` | `\"send\" \\| \"wait\" \\| \"inbox\" \\| \"list\"` | Yes | Operation. |\n| `to` | `string` | `send` | Recipient agent id, or `\"all\"` for broadcast. Whitespace trimmed; self-send rejected. |\n| `message` | `string` | `send` | Message body. Empty-after-trim is rejected. |\n| `replyTo` | `string` | No | `send`: message id being answered. |\n| `await` | `boolean` | No | `send`: after delivery, block until the next message from that peer arrives (round-trip sugar). Invalid with `to: \"all\"`. |\n| `from` | `string` | No | `wait`: only accept a message from this agent id. |\n| `timeoutMs` | `number` | No | `wait` / `send await:true`: timeout in milliseconds; `0` waits indefinitely. Defaults to `irc.timeoutMs`. |\n| `peek` | `boolean` | No | `inbox`: list messages without consuming them. |\n\n## Outputs\n- Single-shot `AgentToolResult`; no streaming updates.\n- `content` is one text block:\n - `list`: `No other agents.` or `<n> peer(s):` bullets — `id [displayName · kind · status]` plus unread count, parent, and last-activity age; a footer notes that parked agents are revived automatically when messaged.\n - `send`: per-recipient delivery receipts (`injected` / `woken` / `revived` / `failed — <error>`); with `await: true`, the reply body or a clean no-reply timeout note.\n - `wait`: the consumed message as `[<msgId>] <from>: <body>` (with a reply-to tag), or `No message within <duration>.`\n - `inbox`: `Inbox empty.` or `<n> message(s):` bullets.\n- `details: IrcDetails`: `{ op, from?, to?, receipts?, waited?, inbox?, peers? }`. `waited` is `null` when a wait timed out; `receipts` carry `{ to, outcome, error? }`.\n\n## Flow\n1. `IrcTool.createIf` constructs the tool only when `isIrcEnabled` passes and the session has both an `AgentRegistry` and `getAgentId`. There is no `irc.enabled` setting: availability is derived — true for every subagent (`taskDepth > 0`; a parent always exists) and for any session that can still spawn subagents through the task tool. Only a top-level session with task spawning unavailable has no peers, hence no irc.\n2. `execute` resolves the registry and sender id; missing either returns a text error result instead of throwing.\n3. `op: \"list\"`: `registry.list()` minus self and minus `aborted` agents — `parked` peers ARE listed. Each row includes the unread count from `IrcBus.unreadCount(...)` and last activity.\n4. `op: \"send\"` validates `to`/`message`, rejects self-sends, and rejects `await` with `to: \"all\"`.\n5. Target resolution: broadcasts fan out to `registry.listVisibleTo(senderId)` (live peers only — `running`/`idle`; reviving every parked agent on a broadcast would be a stampede). Direct sends go through the bus unfiltered, so a parked recipient is revived.\n6. `IrcBus.send(...)` is fire-and-forget — it never blocks on the recipient generating anything. Delivery by recipient status:\n - `running` → message enqueued and injected as a non-interrupting aside at the recipient's next step boundary (`AgentSession.deliverIrcMessage`, rendered from `irc-incoming.md`, persisted as an `irc:incoming` custom message) — receipt `injected`. If the sender awaits a reply (`expectsReply` from `await: true`) and the recipient has `async.enabled` off, the recipient also generates an ephemeral no-tools auto-reply (`runEphemeralTurn`, the `/btw` pipeline) and sends it back over the bus with `replyTo` set, recording an `irc:autoreply` aside in its own history — a recipient blocked in a synchronous task spawn can never reach a step boundary before the sender's timeout otherwise;\n - `idle` (live session) → enqueued and a real turn is started — the message wakes the agent — receipt `woken`;\n - `parked` → `AgentLifecycleManager.global().ensureLive(to)` revives the session first, then the wake path — receipt `revived`;\n - resolution/revival failure → receipt `failed` with the error; other recipients still complete.\n7. `send` with `await: true` then calls `IrcBus.wait(senderId, { from: to }, timeoutMs, signal)` and appends the reply (or a no-reply note suggesting `inbox`/`wait`) to the result. Awaited sends pass `{ expectsReply: true }` to `IrcBus.send` so a busy recipient can auto-reply (see step 6).\n8. `op: \"wait\"` blocks until a message for the caller (optionally filtered by `from`) arrives, consumes it, and returns it. Timeout returns a clean \"no message\" result, not an error.\n9. `op: \"inbox\"` drains pending messages (or peeks with `peek: true`) without blocking.\n10. Timeouts resolve as `params.timeoutMs ?? irc.timeoutMs`, normalized: `0` disables the timeout, negative/non-finite values fall back to the default `120_000`, positive values are truncated and clamped to ≥ 1 ms.\n\n## Modes / Variants\n- `list`: enumerate peers with status (`running`/`idle`/`parked`), unread counts, and last activity.\n- `send` direct: one exact peer id; wakes idle peers, revives parked ones.\n- `send` broadcast: `to: \"all\"` to every live peer; parked peers are skipped.\n- `send` + `await: true`: round-trip convenience — send, then wait for the next message from that peer. Marks the send `expectsReply`, enabling the busy-recipient auto-reply path when async execution is disabled.\n- `wait`: block for an incoming message, optionally filtered by sender.\n- `inbox`: non-blocking drain or peek.\n\n## Side Effects\n- Session state\n - Reads the process-global `AgentRegistry`; direct sends to parked agents revive their sessions through the lifecycle manager.\n - Persists `irc:incoming` custom messages into recipient history; replies are ordinary turns in the recipient's own session.\n - Waking an idle/parked recipient starts a real agent turn (model requests, tool use) in that recipient.\n- User-visible prompts / interactive UI\n - IRC events render as transcript cards in the TUI; the Agent Hub shows per-agent unread counts.\n- Background work / cancellation\n - `send` itself never blocks on reply generation; only `wait` (and `await: true`) blocks, bounded by the resolved timeout and the caller's `AbortSignal`.\n- Network\n - No IRC server connection. Woken recipients make their own model-provider calls as part of their turn.\n- Filesystem\n - No direct filesystem writes in the tool itself; recipient turns persist to their session JSONL as usual.\n\n## Limits & Caps\n- Availability gates: `isIrcEnabled` (running as a subagent, or task spawning available — there is no `irc.enabled` setting), an `AgentRegistry`, and a caller agent id.\n- Mailboxes are bounded at 100 messages per agent (`MAILBOX_CAP` in `packages/coding-agent/src/irc/bus.ts`); oldest messages are dropped beyond the cap.\n- `irc.timeoutMs` defaults to `120_000` and is the default `wait` / `send await:true` timeout; `0` disables the timeout, non-finite or negative values fall back to the default, positive values are truncated and clamped to at least `1` ms.\n- Broadcast scope: live peers only (`running`/`idle`) via `listVisibleTo`; direct sends address any non-aborted agent, including parked ones.\n\n## Errors\n- The tool returns text errors (with `isError: true`), not thrown exceptions, for:\n - missing registry: `IRC is unavailable in this session.`\n - missing sender id: `IRC is unavailable: caller has no agent id.`\n - missing `to` / `message` on `send`\n - self-send: `Cannot send an IRC message to yourself.`\n - `await` with `to: \"all\"`\n - unknown op\n- Per-recipient delivery failures surface as `failed` receipts with the error message; `send` is marked `isError` only when no recipient received the message.\n- `wait` timeout is a normal result (`waited: null`), not an error.\n\n## Notes\n- This is IRC-like naming only: no servers, sockets, channels, or join/part state. Addressing is by exact registry agent id.\n- Replies are real turns by the recipient, with one exception: an awaited send to a mid-turn recipient with `async.enabled` off triggers an ephemeral no-tools auto-reply (the old `respondAsBackground` path), because a recipient blocked in a synchronous task spawn whose batch includes the sender can never run a real turn before the sender's timeout. A recipient may otherwise keep working before answering; check `inbox` or `wait` again rather than re-sending.\n- Wake-on-message is the only resume primitive: messaging a parked agent revives it (same `ensureLive` path as the Agent Hub). The task tool has no `resume` parameter.\n- Message ids are Snowflakes; pass them as `replyTo` to thread an answer to a specific message.\n- Persistence is per recipient history: the sender gets receipts in the tool result; the recipient sees the injected `irc:incoming` message in its own transcript (visible via `history://<id>`).\n",
98
100
  "tools/job.md": "# job\n\n> Wait for or cancel background jobs managed by the session async runtime.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/job.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/job.md`\n- Key collaborators:\n - `packages/coding-agent/src/async/job-manager.ts` — job registry, cancellation, delivery suppression.\n - `packages/coding-agent/src/tools/bash.ts` — explicit async bash and auto-backgrounded bash jobs.\n - `packages/coding-agent/src/task/index.ts` — async task-job scheduling.\n - `packages/coding-agent/src/sdk.ts` — automatic follow-up delivery for unsuppressed completions.\n - `packages/coding-agent/src/config/settings-schema.ts` — `async.pollWaitDuration` options.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `poll` | `string[]` | No | Job ids to watch. Cannot be combined with `list`. If omitted (and `cancel` is also omitted), the tool watches all running jobs owned by the calling agent. If provided, missing ids — and ids owned by other agents — are silently filtered out before waiting. |\n| `cancel` | `string[]` | No | Job ids to cancel before any polling. Missing ids (and other agents' jobs) are reported as `not_found`; non-running ids as `already_completed`. |\n| `list` | `boolean` | No | Return an immediate snapshot of every job spawned by the calling agent (running + completed within retention) without waiting. Read-only — cannot be combined with `poll` or `cancel`. |\n\n## Outputs\nThe tool returns one text block plus `details`.\n\n- `content[0].text`: markdown-like plain text sections assembled by `#buildResult(...)`:\n - `## Cancelled (N)` for cancel outcomes.\n - `## Completed (N)` for non-running jobs, including stored `resultText` and `errorText`.\n - `## Still Running (N)` for jobs still in `running`.\n- `details.jobs`: array of snapshots:\n - `id: string`\n - `type: \"bash\" | \"task\"`\n - `status: \"running\" | \"completed\" | \"failed\" | \"cancelled\"`\n - `label: string`\n - `durationMs: number`\n - optional `resultText`, `errorText`\n- `details.cancelled` appears only when `cancel` was passed; each item is `{ id, status }` where status is `\"cancelled\" | \"not_found\" | \"already_completed\"`.\n\nStreaming behavior:\n- During a polling wait, `execute(...)` emits `onUpdate(...)` every 500 ms with an empty text block and fresh `details.jobs` snapshots.\n- Final return is single-shot after a completion, timeout, abort, or immediate fast path.\n\nRead-only snapshot path:\n- Calling `job` with `list: true` returns a markdown summary of every job spawned by the calling agent (running + completed within retention) without waiting.\n\n## Flow\n1. `JobTool` is registered unconditionally in `packages/coding-agent/src/tools/index.ts`; there is no `async.enabled` gate (the manager may still carry bash or task jobs from before a setting change).\n2. `execute(...)` fetches `session.asyncJobManager`. If absent, it returns `Async execution is disabled; no background jobs are available.`\n3. `cancel` ids are processed first:\n - `manager.getJob(id)` missing → `not_found`.\n - existing job with `status !== \"running\"` → `already_completed`.\n - running job → `manager.cancel(id)`, which sets `job.status = \"cancelled\"`, aborts the controller, and schedules eviction.\n4. Polling mode is chosen with `const shouldPoll = requestedPollIds !== undefined || cancelIds.length === 0`:\n - only `cancel` present → return immediately, no wait.\n - explicit `poll`, or no args at all → proceed to watch jobs.\n5. Watch set resolution:\n - explicit `poll` → resolve ids via `#visibleJobs(...)`, dropping missing ids and jobs owned by other agents.\n - no `poll` and no `cancel` → `manager.getRunningJobs(ownerFilter)` (jobs owned by the calling agent).\n6. Empty watch set returns immediately:\n - if cancellations happened, return snapshots for the cancelled ids that still exist.\n - else return either `No matching jobs found for IDs: ...` or `No running background jobs to wait for.`\n7. If every watched job is already non-running, `#buildResult(...)` returns immediately without waiting.\n8. Otherwise the tool waits on `Promise.race(...)` across:\n - every watched running job's `job.promise`,\n - a timeout promise for the poll wait window — `manager.nextPollWaitMs(ownerId)` when `async.pollWaitDuration` is `smart`, otherwise the fixed duration,\n - the tool-call abort signal when present.\n9. Before waiting, it calls `manager.watchJobs(watchedJobIds)`. This suppresses automatic completion delivery for those ids while they are being watched.\n10. If `onUpdate` exists, a 500 ms interval sends progress snapshots from `#snapshotJobs(...)`; one snapshot is emitted immediately before entering the race.\n11. In `finally`, the tool always calls `manager.unwatchJobs(...)`, clears the timeout, and stops the progress interval.\n12. `#buildResult(...)` deduplicates jobs, snapshots current manager state, then calls `manager.acknowledgeDeliveries(...)` for every non-running job in the result. That suppresses later automatic follow-up delivery for the same completions and removes queued deliveries for those ids.\n13. The final text groups jobs by non-running vs still-running state. A timeout is not an error path; it simply returns the current snapshot.\n\n## Modes / Variants\n- Poll all running jobs: call with neither `poll` nor `cancel`.\n- Poll explicit ids: call with `poll` only.\n- Cancel only: call with `cancel` only; cancellations happen and the tool returns immediately.\n- Cancel then poll: call with both. Cancellations are applied first, then the tool watches the remaining resolved `poll` ids.\n- Read-only inspection: call with `list: true` for the same snapshot data without waiting on completion.\n\nSpawn paths that produce jobs:\n- `packages/coding-agent/src/tools/bash.ts`\n - `async: true` always registers a `type: \"bash\"` job with `AsyncJobManager.register(...)` and returns a start message.\n - auto-background mode (`bash.autoBackground.enabled`) starts the same managed job path for non-PTY commands, waits up to `min(bash.autoBackground.thresholdMs, timeoutMs - 1000)`, and if the command is still running returns a background-job start result instead of inline command output.\n- `packages/coding-agent/src/task/index.ts`\n - every `task` call registers one `type: \"task\"` job, unless the session has no job manager or the agent definition declares `blocking: true` (sync fallback).\n\nLifecycle and exact state names:\n- Conceptual scheduling path: `pending` (only task-progress bookkeeping before work starts) → `running` → `completed` / `failed`; cancellation changes a running async job to `cancelled`.\n- Exact `AsyncJob.status` values in `packages/coding-agent/src/async/job-manager.ts`: `\"running\" | \"completed\" | \"failed\" | \"cancelled\"`.\n- Exact per-task progress values in `packages/coding-agent/src/task/types.ts`: `\"pending\" | \"running\" | \"completed\" | \"failed\" | \"aborted\"`.\n\n## Side Effects\n- Filesystem\n - None in `job.ts` itself.\n - Jobs being observed may already have written artifacts/results through their own tool runtimes.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Reads and mutates `session.asyncJobManager` state.\n - `watchJobs(...)` / `unwatchJobs(...)` toggle delivery suppression for the watched ids.\n - `acknowledgeDeliveries(...)` marks completed ids as suppressed and removes queued deliveries for them.\n - `cancel(...)` aborts running jobs through each job's `AbortController`.\n- User-visible prompts / interactive UI\n - Polling emits periodic `onUpdate` snapshots every 500 ms.\n - Automatic job completion follow-ups are generated by `packages/coding-agent/src/sdk.ts` only for unsuppressed deliveries.\n- Background work / cancellation\n - Waiting uses a timeout plus optional tool-call abort signal.\n - Cancelling a job does not synchronously await teardown; it flips state, aborts, and returns control to the manager/job promise.\n\n## Limits & Caps\n- Poll wait duration comes from `async.pollWaitDuration` (\"Max Poll Time\") in `packages/coding-agent/src/config/settings-schema.ts`:\n - allowed values: `5s`, `10s`, `30s`, `1m`, `5m`, `smart`\n - default: `smart`\n - fixed values block for exactly that long; `smart` uses the adaptive ladder `POLL_WAIT_LADDER_MS = [5s, 10s, 30s, 1m, 5m]` in `packages/coding-agent/src/async/job-manager.ts`, climbing one rung per back-to-back poll and resetting to the 5s floor after `POLL_ESCALATION_RESET_MS = 60_000` ms without polling. Per-owner state is driven by `nextPollWaitMs(...)` / `recordPollWaitEnd(...)`.\n- Progress update cadence while polling: `PROGRESS_INTERVAL_MS = 500` in `packages/coding-agent/src/tools/job.ts`.\n- Async job retention default: `DEFAULT_RETENTION_MS = 5 * 60 * 1000` in `packages/coding-agent/src/async/job-manager.ts`.\n- Manager fallback max-running limit: `DEFAULT_MAX_RUNNING_JOBS = 15` in `packages/coding-agent/src/async/job-manager.ts`.\n- Session wiring clamps `async.maxJobs` to `1..100` before constructing the manager in `packages/coding-agent/src/sdk.ts`; settings default is `100` in `packages/coding-agent/src/config/settings-schema.ts`.\n- Async completion delivery retry backoff in `packages/coding-agent/src/async/job-manager.ts`:\n - base `500` ms\n - max `30_000` ms\n - jitter `< 200` ms\n - exponent capped at 8 doublings\n\n## Errors\n- Tool-disabled path is returned as normal text, not thrown: `Async execution is disabled; no background jobs are available.`\n- Polling a nonexistent id is not an exception:\n - with `poll` only, missing ids are dropped; if none remain the tool returns `No matching jobs found for IDs: ...`.\n - with `cancel`, each missing id is reported as `not_found` in `details.cancelled` and text.\n- Cancelling a non-running job is not an exception; it reports `already_completed` even if the actual status is `completed`, `failed`, or `cancelled`.\n- Tool-call abort during polling stops waiting and returns a final snapshot through `#buildResult(...)`; it does not cancel watched jobs.\n- Failures inside the underlying async work are stored on the job (`status: \"failed\"`, `errorText`) and reported in normal tool output, not rethrown by `job`.\n- Calling `list: true` against an empty manager returns a normal empty-list result rather than throwing; missing ids passed to `poll` are silently filtered.\n- Combining `list` with `poll` or `cancel` throws a `ToolError`: `` `list` cannot be combined with `poll` or `cancel`. ``\n\n## Notes\n- `job` waits for the first watched running job to settle, not for all watched jobs. If others remain `running`, they are reported under `## Still Running`; the caller must invoke `job` again to continue waiting.\n- Delivery suppression is the key difference between snapshot and automatic delivery:\n - snapshots (`job` calls with `poll` or `list: true`) read current manager state;\n - follow-up delivery comes from `AsyncJobManager.#enqueueDelivery(...)` and `sdk.ts` `onJobComplete`;\n - watched or acknowledged ids are suppressed via `isDeliverySuppressed(...)`.\n- `manager.cancel(id)` sets `status = \"cancelled\"` before the underlying promise settles. The job function may later populate `resultText` or `errorText`; `job-manager.ts` preserves that text but does not transition the status away from `cancelled`.\n- Retention eviction removes the job record, suppression flags, and watch flag together. After eviction, both `job` calls and `list: true` snapshots behave as if the id never existed.\n",
99
101
  "tools/lsp.md": "# lsp\n\n> Query language servers for diagnostics, navigation, symbols, renames, code actions, capabilities, and raw requests.\n\n## Source\n- Entry: `packages/coding-agent/src/lsp/index.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/lsp.md`\n- Key collaborators:\n - `packages/coding-agent/src/lsp/client.ts` — client process lifecycle and JSON-RPC\n - `packages/coding-agent/src/lsp/config.ts` — config loading, auto-detect, server selection\n - `packages/coding-agent/src/lsp/lspmux.ts` — optional `lspmux` command wrapping\n - `packages/coding-agent/src/lsp/edits.ts` — apply `WorkspaceEdit` and text edits\n - `packages/coding-agent/src/lsp/utils.ts` — URI conversion, symbol resolution, formatting, glob expansion\n - `packages/coding-agent/src/lsp/types.ts` — tool schema and protocol types\n - `packages/coding-agent/src/lsp/clients/index.ts` — custom linter client cache/factory\n - `packages/coding-agent/src/lsp/clients/lsp-linter-client.ts` — LSP-backed linter adapter\n - `packages/coding-agent/src/lsp/clients/biome-client.ts` — Biome CLI diagnostics/formatting adapter\n - `packages/coding-agent/src/lsp/clients/swiftlint-client.ts` — SwiftLint CLI diagnostics adapter\n - `packages/coding-agent/src/tools/index.ts` — tool registration and `lsp.enabled` gating\n - `packages/coding-agent/src/tools/tool-timeouts.ts` — timeout defaults and clamping\n - `packages/coding-agent/src/lsp/defaults.json` — built-in server definitions for auto-detect\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `action` | string enum | Yes | One of `diagnostics`, `definition`, `references`, `hover`, `symbols`, `rename`, `rename_file`, `code_actions`, `type_definition`, `implementation`, `status`, `reload`, `capabilities`, `request`. |\n| `file` | string | No | File path; for `diagnostics` also a glob; for workspace forms use `\"*\"`; for `rename_file` this is the source path. |\n| `line` | number | No | 1-indexed line number for position-based actions. Defaults to `1` on the single-file action path. |\n| `symbol` | string | No | Substring used to resolve the column on `line`. Supports `name#N` occurrence selectors; `N` is 1-indexed and defaults to `1`. Required when `line` is given for `definition`/`references`/`rename` against project-aware servers. |\n| `query` | string | No | Workspace symbol query, code-action selector/filter, or LSP method name for `action=request`. |\n| `new_name` | string | No | Required for `rename` and `rename_file`. |\n| `apply` | boolean | No | For `rename`/`rename_file`, apply unless explicitly `false`. For `code_actions`, list unless explicitly `true`. |\n| `timeout` | number | No | Seconds, clamped by `clampTimeout(\"lsp\", ...)` to `5..60`, default `20`. |\n| `payload` | string | No | JSON string for `action=request`; overrides auto-built params. |\n\n## Outputs\n- Single-shot `AgentToolResult`.\n- `content` is always one text block: `[{ type: \"text\", text: string }]`.\n- `details` is `LspToolDetails`: `action`, `success`, optional `serverName`, optional original `request`.\n- No streaming updates.\n- No artifact URIs or background jobs.\n- Many validation failures are returned as ordinary text results with `details.success: false`; aborts throw `ToolAbortError` instead.\n\n## Flow\n1. `packages/coding-agent/src/tools/index.ts` registers `lsp: LspTool.createIf`; session creation also gates it behind `session.enableLsp !== false` and `settings.get(\"lsp.enabled\")`.\n2. `LspTool.execute()` in `packages/coding-agent/src/lsp/index.ts` clamps `timeout` with `clampTimeout(\"lsp\", ...)`, builds an `AbortSignal.timeout(...)`, and combines it with the caller signal.\n3. `getConfig()` loads and caches `LspConfig` per cwd, applies idle-timeout config via `setIdleTimeout()`, and reuses the cached config on later calls.\n4. Config loading in `packages/coding-agent/src/lsp/config.ts` merges `defaults.json` with JSON/YAML overrides from project, project config dirs, user config dirs, plugin roots, and home; if there are no overrides it auto-detects servers from root markers plus executable discovery.\n5. Server routing uses `getServersForFile()` / `getServerForFile()` from `config.ts`: extension or basename match, then sort primary servers before linters. `index.ts` further filters custom linter clients out of navigation/refactor paths with `getLspServersForFile()` / `getLspServerForFile()`.\n6. `getOrCreateClient()` in `client.ts` creates one process per `command:cwd`, optionally wraps supported commands with `lspmux`, spawns the server, starts the background message reader, sends `initialize`, stores server capabilities, then sends `initialized`.\n7. The message reader in `client.ts` parses LSP frames, resolves pending requests, caches `publishDiagnostics`, tracks `$/progress` tokens for project-load completion, answers `workspace/configuration`, and applies `workspace/applyEdit` requests through `applyWorkspaceEdit()`.\n8. File-scoped actions call `ensureFileOpen()` before requests. Column resolution uses `resolveSymbolColumn()` from `utils.ts`: read the target file, pick first non-whitespace when `symbol` is omitted, otherwise find the exact or case-insensitive match on the target line and honor `#N` occurrence selectors.\n9. Actions dispatch in `LspTool.execute()` through dedicated branches: workspace-only branches (`status`, some `diagnostics`, workspace `symbols`, workspace `reload`, `capabilities`, `request`) run before the single-file switch; all other single-file actions share one client lookup and `switch(action)`.\n10. Requests go through `sendRequest()` in `client.ts`, which allocates an incrementing JSON-RPC id, installs abort and timeout handling, sends `$/cancelRequest` on abort, and rejects on timeout or process exit.\n11. Actions that return edits either preview with `formatWorkspaceEdit()` or apply with `applyWorkspaceEdit()` from `edits.ts`; `rename_file` also performs the filesystem rename and then sends `workspace/didRenameFiles`.\n12. Non-abort failures inside the single-file action block are converted to `LSP error: ...`; many precondition failures return explicit text without throwing.\n\n## Modes / Variants\n### Routing and workspace scope\n- `file: \"*\"` is only special for `diagnostics`, `symbols`, and `reload`.\n- `status` ignores `file`.\n- `capabilities` with omitted `file` or `\"*\"` inspects all non-custom LSP servers; with a concrete file it scopes to matching non-custom servers.\n- `request` with omitted `file` or `\"*\"` chooses the first available non-custom LSP server; with a concrete file it chooses that file's primary non-linter server.\n- `rename_file` sends `workspace/willRenameFiles` and `workspace/didRenameFiles` to every non-custom LSP server from `getLspServers(config)` whose `fileTypes` match the source, destination, or any enumerated rename pair — not just one file-scoped server.\n- Diagnostics are the only tool action that queries both normal LSP servers and custom linter clients (`BiomeClient`, `SwiftLintClient`, or `LspLinterClient`).\n\n### `diagnostics`\n**Inputs**\n- Required: `file`, unless using workspace mode with `file: \"*\"`.\n- Optional: `timeout`.\n\n**Execution**\n- `file: \"*\"`: `runWorkspaceDiagnostics()` detects project type from root markers and runs one subprocess command: Rust `cargo check --message-format=short`, TypeScript `npx tsc --noEmit`, Go `go build ./...`, Python `pyright`.\n- Concrete file or glob: `resolveDiagnosticTargets()` treats non-globs as one target, otherwise expands a `Bun.Glob` up to `MAX_GLOB_DIAGNOSTIC_TARGETS`.\n- Per file, every matching server runs: custom clients call `lint(file)`; real LSP servers optionally wait for project load, capture `diagnosticsVersion`, `refreshFile()`, then `waitForDiagnostics()` for fresh `publishDiagnostics` (settles on the latest publish; exact-version match accepted immediately).\n- Results are deduplicated by range+message and severity-sorted.\n\n**Output text**\n- Single target with no issues: `OK`.\n- Single target with issues: `<summary>:\\n<grouped diagnostics>`.\n- Batch/glob target: one section per file, plus an initial truncation warning when the glob exceeds the file cap.\n- Workspace mode: `Workspace diagnostics (<detected description>):\\n<command output>`.\n\n### `definition`\n**Inputs**\n- Required: `file`.\n- Optional: `line`, `symbol`, `timeout`.\n\n**Execution**\n- Sends `textDocument/definition` with `{ textDocument, position }`.\n- Accepts `Location`, `Location[]`, `LocationLink`, or `LocationLink[]`; `normalizeLocationResult()` converts `LocationLink` to `targetSelectionRange ?? targetRange`.\n- Requires `symbol` when `line` is given on project-aware servers (the first-non-whitespace-column fallback is disabled for this action).\n- Waits for project load before the request.\n\n**Output text**\n- `No definition found` or `Found N definition(s):` followed by `file:line:col` and one context line above/below each location.\n\n### `type_definition`\nSame as `definition`, but sends `textDocument/typeDefinition` and reports `type definition(s)`.\n\n### `implementation`\nSame as `definition`, but sends `textDocument/implementation` and reports `implementation(s)`.\n\n### `references`\n**Inputs**\n- Required: `file`.\n- Optional: `line`, `symbol`, `timeout`.\n\n**Execution**\n- Sends `textDocument/references` with `includeDeclaration: true`.\n- Requires `symbol` when `line` is given on project-aware servers (the first-non-whitespace-column fallback is disabled for this action).\n- For project-aware servers, retries up to `REFERENCES_RETRY_COUNT` times when the only hit is the queried declaration; between retries it waits for project load and sleeps `REFERENCES_RETRY_DELAY_MS`.\n- First `REFERENCE_CONTEXT_LIMIT` references include surrounding context; the rest are location-only.\n\n**Output text**\n- `No references found` or `Found N reference(s):` with contextual entries first, then `... M additional reference(s) shown without context` when truncated.\n\n### `hover`\n**Inputs**\n- Required: `file`.\n- Optional: `line`, `symbol`, `timeout`.\n\n**Execution**\n- Sends `textDocument/hover`.\n- `extractHoverText()` flattens strings, markup content, marked-string objects, or arrays into plain text.\n\n**Output text**\n- `No hover information` or the extracted hover text.\n\n### `symbols`\n**Inputs**\n- Workspace mode: `file: \"*\"` or omitted file on the early workspace branch, plus required `query`.\n- Document mode: required `file`.\n- Optional: `timeout`.\n\n**Execution**\n- Workspace mode sends `workspace/symbol` to every non-custom LSP server, post-filters matches with `filterWorkspaceSymbols()`, deduplicates with `dedupeWorkspaceSymbols()`, then truncates to `WORKSPACE_SYMBOL_LIMIT`.\n- Document mode sends `textDocument/documentSymbol` to the primary server. If the first item has `selectionRange`, it formats hierarchical `DocumentSymbol`s; otherwise it formats flat `SymbolInformation`s.\n\n**Output text**\n- Workspace mode: `Found N symbol(s) matching \"query\":` plus formatted `name @ file:line:col`, with an omission line when over the limit.\n- Document mode: `Symbols in <file>:` plus hierarchical or flat symbol lines.\n\n### `rename`\n**Inputs**\n- Required: `file`, `new_name`.\n- Optional: `line`, `symbol`, `apply`, `timeout`.\n\n**Execution**\n- Requires `symbol` when `line` is given on project-aware servers, then waits for project load, sends `textDocument/rename`, receives a `WorkspaceEdit`.\n- `apply !== false` applies edits immediately with `applyWorkspaceEdit()`.\n- `apply === false` renders a preview with `formatWorkspaceEdit()`.\n\n**Output text**\n- `Rename returned no edits`, `Applied rename:` plus applied change lines, or `Rename preview:` plus summarized edits.\n\n### `rename_file`\n**Inputs**\n- Required: `file` source path, `new_name` destination path.\n- Optional: `apply`, `timeout`.\n\n**Execution**\n- Resolves absolute source and destination, rejects identical paths, missing source, existing destination, empty rename set, or directories with more than `MAX_RENAME_PAIRS` files.\n- `enumerateRenamePairs()` returns one `{oldUri,newUri}` pair for a file or walks every regular file in a directory tree.\n- Sends `workspace/willRenameFiles` with `{ files: pairs }` to every non-custom LSP server whose `fileTypes` match an affected path; collects returned `WorkspaceEdit`s and server notes.\n- Preview mode (`apply === false`) only formats those edits.\n- Apply mode coalesces the returned text edits per URI (a project-aware server's edits win on overlap; overlapping edits from other servers are discarded with a note), applies each URI once from a single snapshot, creates the destination parent directory and renames the source path on disk, sends `textDocument/didClose` for every renamed open file, deletes those `openFiles` entries, then sends `workspace/didRenameFiles`.\n\n**Output text**\n- Preview: `Rename preview: <file-count label> → <dest>` plus per-server edit summaries and optional server notes.\n- Apply: `Renamed <file-count label> → <dest>` plus applied edit summaries, filesystem rename line, and optional server notes.\n\n### `code_actions`\n**Inputs**\n- Required: `file`.\n- Optional: `line`, `symbol`, `query`, `apply`, `timeout`.\n\n**Execution**\n- Reads cached diagnostics for the open URI from `client.diagnostics` and sends `textDocument/codeAction` for a zero-width range at the resolved position.\n- When `apply !== true`, `query` is passed as `context.only: [query]`; this is a server-side kind filter.\n- When `apply === true`, `query` becomes a required client-side selector: either a zero-based numeric index or a case-insensitive substring of the action title.\n- Applying a `CodeAction` uses `applyCodeAction()`: optionally `codeAction/resolve`, then `applyWorkspaceEdit(edit)`, then optional `workspace/executeCommand`.\n- Applying a bare `Command` only runs `workspace/executeCommand`.\n\n**Output text**\n- List mode: `N code action(s):` plus `index: [kind] title` lines.\n- Apply mode success: `Applied \"title\":` plus `Workspace edit:` and/or `Executed command(s):` sections.\n- Apply mode miss: `No code action matches \"query\". Available actions:`.\n- Apply mode with no edit/command: `Action \"title\" has no workspace edit or command to apply`.\n\n### `status`\n**Inputs**\n- None.\n\n**Execution**\n- Reads configured servers from cached `LspConfig` and cross-references `getActiveClients()` so each server is labelled `(configured, not started)` or with its live client status.\n- Calls `detectLspmux()` and appends status text when `lspmux` is installed.\n\n**Output text**\n- `Language servers: <name (configured, not started) | name (<status>)>` plus an explanatory note line, or `No language servers configured for this project`, optionally followed by `lspmux: active (multiplexing enabled)` or `lspmux: installed but server not running`.\n\n### `reload`\n**Inputs**\n- Workspace mode: `file: \"*\"` or omitted `file`.\n- Single-file mode: required `file`.\n- Optional: `timeout`.\n\n**Execution**\n- Workspace mode reloads every non-custom LSP server.\n- Single-file mode reloads the primary server for that file.\n- `reloadServer()` tries `rust-analyzer/reloadWorkspace`, then `workspace/didChangeConfiguration` with `{ settings: {} }`; if neither works it kills the process so the next request cold-starts a new client.\n\n**Output text**\n- One line per server: `Reloaded <server>`, `Restarted <server>`, or `Failed to reload <server>: ...`.\n\n### `capabilities`\n**Inputs**\n- Optional: `file`, `timeout`.\n\n**Execution**\n- With a concrete `file`, inspects matching non-custom servers for that file.\n- With omitted `file` or `\"*\"`, inspects every non-custom configured server.\n- Starts servers as needed and dumps `client.serverCapabilities ?? {}` as pretty JSON.\n\n**Output text**\n- Per server: `<server>:` followed by indented `capabilities: { ... }`, or `<server>: failed to start (...)`.\n\n### `request`\n**Inputs**\n- Required: `query` method name.\n- Optional: `file`, `line`, `symbol`, `payload`, `timeout`.\n\n**Execution**\n- Chooses one non-custom server: file-scoped primary server, otherwise the first configured non-custom server.\n- Param building precedence:\n 1. If `payload` is present, parse JSON and use it verbatim.\n 2. Else if `file` is concrete and `line` is present, build `{ textDocument: { uri }, position: { line: line - 1, character } }` using `resolveSymbolColumn()`.\n 3. Else if `file` is concrete, build `{ textDocument: { uri } }`.\n 4. Else use `{}`.\n- Opens the file first when `file` is concrete.\n\n**Output text**\n- Success: `<server> ← <method>:\\n<formatted result>`, where non-string results are `JSON.stringify(..., null, 2)` and nullish values become `null`.\n- Failure: `LSP error from <server> on <method>: ...` followed by ` params: <preview>` echoing the request params (truncated to 400 chars).\n\n## Side Effects\n- Filesystem\n - Reads config files, target files, and root markers.\n - `rename` and `code_actions` may edit/create/delete/rename files via `applyWorkspaceEdit()`.\n - `rename_file` always renames the source path on disk in apply mode.\n - Server-initiated `workspace/applyEdit` requests also mutate files through `applyWorkspaceEdit()`.\n- Network\n - None directly; communication is local stdio JSON-RPC to subprocesses.\n- Subprocesses / native bindings\n - Spawns language servers with `ptree.spawn()`.\n - Workspace diagnostics spawns `cargo`, `npx`, `go`, or `pyright`.\n - `BiomeClient` and `SwiftLintClient` spawn CLI tools.\n - Optional `lspmux` detection spawns `lspmux status`; supported servers may be wrapped through `lspmux client`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Caches config per cwd in `configCache`.\n - Caches LSP clients per `command:cwd`, with `pendingRequests`, `diagnostics`, `openFiles`, `serverCapabilities`, and project-load state.\n - Caches custom linter clients by `serverName:cwd`.\n - Updates client `lastActivity`; optional idle-timeout cleanup is driven by `setIdleTimeout()`.\n- Background work / cancellation\n - Every request has an abortable timeout signal.\n - Aborting an in-flight LSP request sends `$/cancelRequest`.\n - Background message readers persist for each live client until process exit/shutdown.\n\n## Limits & Caps\n- Tool timeout clamp: default `20`, min `5`, max `60` seconds — `TOOL_TIMEOUTS.lsp` in `packages/coding-agent/src/tools/tool-timeouts.ts`.\n- LSP request default timeout inside `sendRequest()`: `30_000ms` — `DEFAULT_REQUEST_TIMEOUT_MS` in `packages/coding-agent/src/lsp/client.ts`.\n- Warmup initialize timeout default: `5_000ms` — `WARMUP_TIMEOUT_MS` in `packages/coding-agent/src/lsp/client.ts`.\n- Project-load wait fallback: `15_000ms` — `PROJECT_LOAD_TIMEOUT_MS` in `packages/coding-agent/src/lsp/client.ts`.\n- Idle-client sweep interval when enabled: `60_000ms` — `IDLE_CHECK_INTERVAL_MS` in `packages/coding-agent/src/lsp/client.ts`.\n- Diagnostic message output cap: first `50` messages — `DIAGNOSTIC_MESSAGE_LIMIT` in `packages/coding-agent/src/lsp/index.ts`.\n- Single-file diagnostics wait: `3_000ms` — `SINGLE_DIAGNOSTICS_WAIT_TIMEOUT_MS`.\n- Batch/glob diagnostics wait per file: `400ms` — `BATCH_DIAGNOSTICS_WAIT_TIMEOUT_MS`.\n- Glob diagnostic target cap: first `20` matches — `MAX_GLOB_DIAGNOSTIC_TARGETS`.\n- Workspace symbol cap: first `200` entries — `WORKSPACE_SYMBOL_LIMIT`.\n- Reference context cap: first `50` references include source context — `REFERENCE_CONTEXT_LIMIT`.\n- References retry count: `2` retries, `250ms` backoff — `REFERENCES_RETRY_COUNT`, `REFERENCES_RETRY_DELAY_MS`.\n- Directory rename cap: `1_000` file pairs — `MAX_RENAME_PAIRS`.\n- `detectLspmux()` state cache TTL: `5 * 60 * 1000ms`; liveness check timeout: `1_000ms` — `STATE_CACHE_TTL_MS`, `LIVENESS_TIMEOUT_MS` in `packages/coding-agent/src/lsp/lspmux.ts`.\n- Workspace diagnostics output cap: first `50` lines from the subprocess.\n\n## Errors\n- Missing or invalid inputs are usually returned as text with `details.success: false`, not thrown:\n - missing `file`/`query`/`new_name`\n - invalid JSON in `payload`\n - no matching server\n - invalid `rename_file` source/destination conditions\n- `resolveSymbolColumn()` throws explicit errors for missing files, missing symbols, and out-of-bounds `#N` selectors; these surface as `LSP error: ...` or request-specific error text.\n- `sendRequest()` rejects on timeout with `LSP request <method> timed out after <ms>ms`.\n- Client process exit rejects all pending requests with an exit-code/stderr error assembled in `getOrCreateClient()`.\n- Single-file action failures inside the main `try` become `LSP error: <message>`.\n- `request` has its own error envelope: `LSP error from <server> on <method>: <message>`.\n- Some server failures are intentionally softened:\n - diagnostics continue when one server fails\n - `rename_file` suppresses `workspace/willRenameFiles` “method not found” errors and records other server errors as notes\n - `code_actions` ignores `codeAction/resolve` failures and applies unresolved actions when possible\n- Caller aborts are not converted to text: `ToolAbortError` is rethrown. A wall-clock tool timeout without a caller abort instead throws `ToolError`: `LSP <action> timed out after <N>s on <server>. ...`.\n\n## Notes\n- `status` reports configured servers from `LspConfig` and labels each one via `getActiveClients()`: `(configured, not started)` means the binary resolves on PATH but no request has spawned it; a live client reports its status.\n- `getLspServerForFile()` excludes `createClient` adapters and linter-only servers; navigation/refactor actions never target Biome/SwiftLint custom clients.\n- `getServersForFile()` matches both file extensions and exact basenames from `fileTypes`; config can target names like `Dockerfile` if present.\n- `symbol` matching is exact first, then case-insensitive, and falls back to the Nth occurrence on the specified line only; it never scans other lines.\n- For `definition`, `references`, and `rename` against project-aware servers, omitting `symbol` while passing `line` is rejected with a `ToolError` instead of silently falling back to the first non-whitespace column.\n- `code_actions` uses `query` in two different ways: server-side `context.only` filter in list mode, client-side title/index selector in apply mode.\n- `rename` and `rename_file` default to apply. Preview requires `apply: false`.\n- `request` with `file: \"*\"` is treated the same as omitted `file`: it does not build workspace-specific params.\n- `reload` does not recreate a client immediately after killing it; the next request triggers reinitialization.\n- `workspace/applyEdit` can apply edits initiated by the server outside the direct tool action result path.\n- `detectLspmux()` can be disabled with `PI_DISABLE_LSPMUX=1`; only `rust-analyzer` is in `DEFAULT_SUPPORTED_SERVERS`.\n- Startup LSP discovery (`discoverStartupLspServers(cwd)` in `sdk.ts`) runs for `enableLsp && options.hasUI`; the background warmup additionally requires `!settings.get(\"lsp.lazy\")`. `lsp.lazy` defaults to `true`, so by default discovered servers are surfaced with status `\"available\"` (gray dot in the welcome screen) and cold-start through `getOrCreateClient()` on first use (lsp tool call or edit/write on a matching file type). Print/RPC/ACP/script sessions skip discovery and warmup entirely. See `docs/sdk.md` § Startup performance.\n- `configCache` is per-process and never auto-invalidated; config changes require a fresh process to be observed by `getConfig()` callers.",
100
- "tools/read.md": "# read\n\n> Read files, directories, archives, SQLite databases, internal resources, images, documents, and URLs through one `path` string.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/read.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/read.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/path-utils.ts` — split `path` from trailing selectors; normalize local paths.\n - `packages/coding-agent/src/tools/archive-reader.ts` — detect `archive.ext:inner/path`, index archives, list/read entries.\n - `packages/coding-agent/src/tools/sqlite-reader.ts` — detect SQLite targets, parse selectors, render tables.\n - `packages/coding-agent/src/tools/fetch.ts` — URL parsing, fetch/render pipeline, URL cache/artifacts.\n - `packages/coding-agent/src/internal-urls/router.ts` — resolve `agent://`, `artifact://`, `history://`, `issue://`, `local://`, `mcp://`, `memory://`, `omp://`, `pr://`, `rule://`, `skill://`, and `vault://`.\n - `packages/coding-agent/src/edit/notebook.ts` — convert `.ipynb` to editable `# %% [...] cell:N` text.\n - `packages/coding-agent/src/utils/file-display-mode.ts` — decide hashline vs line-number vs raw display.\n - `packages/coding-agent/src/workspace-tree.ts` — render directory trees.\n - `packages/coding-agent/src/edit/file-snapshot-store.ts` — stores read lines for later hashline edit verification/recovery.\n - `packages/coding-agent/src/tools/index.ts` — registers `read: s => new ReadTool(s)`.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `path` | `string` | Yes | Filesystem path, internal URL, or web URL. May end with a trailing selector such as `:50-100` or `:raw`. |\n\n### Selector grammar\n\nFor normal file-like reads, `splitPathAndSel()` in `packages/coding-agent/src/tools/path-utils.ts` recognizes the final suffix only when it matches one of these forms:\n\n| Suffix | Meaning |\n| --- | --- |\n| `:raw` | Raw/verbatim mode. Disables structural summaries and line prefixes. |\n| `:conflicts` | Render unresolved Git merge-conflict regions for a local file. |\n| `:N` / `:LN` / `:N-` / `:N..` | Start at 1-indexed line `N`, open-ended. |\n| `:A-B` / `:LA-LB` / `:A..B` | Inclusive 1-indexed line range (`..` is a forgiving alias normalized to `-`). |\n| `:A+C` / `:LA+LC` | `C` lines starting at `A`; tool converts this to end line `A + C - 1`. |\n| `:R1,R2,...` | Multiple ranges, sorted and merged before reading (for example `:5-16,960-973`). |\n| `:range:raw` or `:raw:range` | Same line selection, but raw output. |\n\nValidation in `parseLineRangeChunk()`:\n- line numbers are 1-indexed; `:0` throws.\n- `+` counts must be `>= 1`.\n- `-` end must be `>= start`.\n\nSelector parsing intentionally falls through for unrecognized trailing `:...`; archive and SQLite paths consume their own colon syntax.\n\nURL selectors are parsed separately in `packages/coding-agent/src/tools/fetch.ts`, but use the same line-range parser for `:raw`, `:N`, `:A-B`, `:A+C`, `:5-10,20-30`, and `:range:raw` / `:raw:range`. Because URL ports also use `:`, add a trailing slash before a selector on a host/port URL, e.g. `https://example.com/:80`.\n\n## Outputs\n- Single-shot `AgentToolResult` built through `toolResult()` in `packages/coding-agent/src/tools/tool-result.ts`.\n- `content` is usually one text block. Image reads may return `[text, image]`.\n- `details` is path-dependent. `ReadToolDetails` may include:\n - `kind: \"file\" | \"url\"` (URL path uses `kind: \"url\"`; file reads usually omit `kind`)\n - `isDirectory`\n - `resolvedPath`\n - `suffixResolution`\n - URL fields: `url`, `finalUrl`, `contentType`, `method`, `notes`\n - `truncation`\n - `displayContent` (unprefixed text + starting line for TUI rendering)\n - `summary` (`lines`, `elidedSpans`, `elidedLines`) for structural summaries\n - `meta` from `packages/coding-agent/src/tools/output-meta.ts`\n- `details.meta.source` is set to the backing path, URL, or internal URL.\n- `details.meta.truncation` carries shown range, total lines/bytes, next offset, and optional `artifactId` for cached URL output.\n- Directory/archive listings and SQLite table lists also set `details.meta.limits` when list limits trigger.\n\n## Flow\n1. `ReadTool.execute()` accepts `{ path }`. `file://...` inputs are expanded first with `expandPath()`.\n2. It tries URL handling first via `parseReadUrlTarget()` from `packages/coding-agent/src/tools/fetch.ts`.\n - Plain URL reads call `executeReadUrl()`.\n - URL reads with line selectors load or refresh the URL cache with `loadReadUrlCacheEntry()` and paginate the cached text locally with `#buildInMemoryTextResult()`.\n3. If not a web URL, it checks `InternalUrlRouter.instance().canHandle(...)`.\n - Internal URLs are resolved with `internalRouter.resolve()`.\n - `agent://` query extraction (`/path` or `?q=`) bypasses pagination and returns the extracted content directly.\n - Other internal resources are paginated in-memory by `#buildInMemoryTextResult()`.\n4. It tries archive resolution next with `#resolveArchiveReadPath()`.\n - `parseArchivePathCandidates()` scans for `.tar`, `.tar.gz`, `.tgz`, or `.zip` anywhere before `:sub/path`.\n - On success, `#readArchive()` either lists a directory or decodes an entry as UTF-8 text.\n5. It tries SQLite resolution with `#resolveSqliteReadPath()`.\n - `parseSqlitePathCandidates()` scans for `.sqlite`, `.sqlite3`, `.db`, `.db3` before any `:table`, `:key`, or `?query` suffix.\n - `#readSqlite()` dispatches on `parseSqliteSelector()`.\n6. Otherwise it treats the input as a local filesystem path.\n - `resolveReadPath()` expands `~`, resolves relative to session cwd, treats bare `/` as session cwd, and retries macOS screenshot/NFD/curly-quote variants.\n - If the path does not exist, `findUniqueSuffixMatch()` does a workspace glob-based unique suffix lookup (skipped for remote mounts).\n7. Directories go through `#readDirectory()`.\n8. Non-directories branch by content type:\n - image metadata / inline image\n - editable notebook text\n - markit-converted document\n - structural summary for parseable code/prose\n - streamed text/line-range read\n9. Local text reads are streamed by `streamLinesFromFile()` rather than loading the whole file. The tool adds `1` leading and `3` trailing context lines around explicit bounded ranges (constrained sides only).\n10. Hashline-eligible local reads record a whole-file snapshot into the session snapshot store (`getFileSnapshotStore()` on `session.fileSnapshotStore`, `packages/coding-agent/src/edit/file-snapshot-store.ts`) for later hashline edit verification/recovery.\n11. If suffix resolution happened, the first text block is prefixed with `[Path '...' not found; resolved to '...' via suffix match]`.\n\n## Modes / Variants\n\n### Local text files\n- No selector: if summarization is enabled and the file is small enough, `#trySummarize()` calls `summarizeCode()`.\n - Guards: file size `<= 2 MiB` (`MAX_SUMMARY_BYTES`), line count `<= 20_000` (`MAX_SUMMARY_LINES`).\n - Summary output keeps selected declarations and replaces elided spans with `...` or merged brace-pair lines containing `..`. When at least one span is elided, the text content ends with a footer like `[NN lines elided; re-read needed ranges, e.g. <path>:5-16,40-80]` using concrete ranges from the actual elisions.\n - When an elided block sits between matching brace lines, `#renderSummary()` may merge them into one anchored line rather than emitting separate opener/closer lines.\n- Explicit selector or summarization miss: streamed text read.\n - Default open-ended limit is `min(session setting read.defaultLimit, DEFAULT_MAX_LINES)`.\n - Explicit ranges expand by `RANGE_LEADING_CONTEXT_LINES = 1` / `RANGE_TRAILING_CONTEXT_LINES = 3` on the constrained sides only.\n - Non-raw output uses `resolveFileDisplayMode()`:\n - hashline numbered output when edit mode is hashline, read is not raw, source is mutable, edit tool exists, and `readHashLines !== false`\n - otherwise optional line numbers when `readLineNumbers === true`\n - raw mode suppresses both\n- Prefix format in hashline mode is a `[PATH#TAG]` header followed by `LINE:TEXT`, e.g. `[src/foo.ts#0A1B]` and `41:def alpha():`, from the session snapshot store plus `formatNumberedLine()` / `formatHashlineHeader()`.\n- The `edit`/hashline path consumes that header plus bare line numbers later; the four-hex tag is a content-derived hash of the whole normalized file, resolvable through the session snapshot store that recorded it. Immutable sources and `:raw` intentionally suppress hashline headers.\n\n### Directory listings\n- `#readDirectory()` calls `buildDirectoryTree()` with:\n - `maxDepth = 2`\n - `perDirLimit = 12`\n - `rootLimit = null`\n - `lineCap = limit` when a line selector was present, else unlimited at this layer\n- `buildDirectoryTree()` sorts siblings by recency, shows file sizes and relative ages, and may mark `limits.resultLimit` when the tree truncates.\n- Empty directories render as `(empty directory)`.\n\n### Archives\n- Supported archive containers: `.tar`, `.tar.gz`, `.tgz`, `.zip`.\n- Syntax: `archive.ext`, `archive.ext:path/inside`, `archive.ext:path/inside:50-60`.\n- `openArchive()` branches by format:\n - tar/tgz reads the whole archive into memory (capped at `MAX_TAR_ARCHIVE_BYTES = 256 MiB`) and indexes it with `new Bun.Archive(bytes)`\n - zip is indexed via ranged central-directory reads (`readZipEntries()`); entries are inflated on demand with `fflate.inflateSync()`, with declared member sizes capped at `MAX_ARCHIVE_MEMBER_BYTES = 64 MiB`\n- Archive paths normalize `/`, drop `.` segments, and reject `..`.\n- Directory reads list immediate children; files show `name` plus ` (size)` when size > 0.\n- Directory listing default limit is `500` entries in `#readArchiveDirectory()`.\n- File entries are UTF-8 decoded. Non-UTF-8 entries return `[Cannot read binary archive entry '...' (...)]` instead of bytes.\n- Text archive entries reuse the normal in-memory pagination/anchoring path.\n\n### SQLite databases\n- Database detection requires both a matching extension and a valid SQLite file header (`isSqliteFile()`).\n- Selector forms from `parseSqliteSelector()`:\n\n#### `db.sqlite`\n- `kind: \"list\"`\n- Lists non-`sqlite_%` tables with row counts.\n- `#readSqlite()` caps the rendered list to `500` tables via `applyListLimit()`.\n\n#### `db.sqlite:table`\n- `kind: \"schema\"`\n- Returns `sqlite_master.sql` plus sample rows.\n- Sample size is `DEFAULT_SCHEMA_SAMPLE_LIMIT = 5`.\n\n#### `db.sqlite:table:key`\n- `kind: \"row\"`\n- Resolves by primary key when the table has exactly one PK column; otherwise falls back to `rowid` lookup.\n- No query parameters allowed on row lookups.\n\n#### `db.sqlite:table?limit=...&offset=...&order=...&where=...`\n- `kind: \"query\"`\n- Defaults: `limit = 20`, `offset = 0`.\n- `limit` is capped at `500`.\n- `order` accepts `column` or `column:asc|desc` and must name an existing column.\n- `where` is accepted only after `validateWhereClause()` rejects comments, semicolons, and control keywords like `LIMIT`, `OFFSET`, `UNION`, `ATTACH`, `PRAGMA`.\n- Unknown query parameters throw.\n\n#### `db.sqlite?q=SELECT ...`\n- `kind: \"raw\"`\n- Cannot be combined with table selectors or any other query param.\n- Empty `q` throws.\n- `executeReadQuery()` prepares the SQL, rejects bound parameters, and collects rows from `statement.iterate()` capped at `MAX_RAW_QUERY_ROWS = 1000`; it does not verify that the SQL starts with `SELECT`.\n\n- Rendering caps in `packages/coding-agent/src/tools/sqlite-reader.ts`:\n - ASCII table width `120` (`MAX_RENDER_WIDTH`)\n - per-column width `40` (`MAX_COLUMN_WIDTH`)\n- `#readSqlite()` opens Bun SQLite in `{ readonly: true, strict: true }` and sets `PRAGMA busy_timeout = 3000`.\n\n### Documents\n- `CONVERTIBLE_EXTENSIONS` in `packages/coding-agent/src/tools/read.ts` covers `.pdf`, `.doc`, `.docx`, `.ppt`, `.pptx`, `.xls`, `.xlsx`, `.rtf`, `.epub`.\n- `convertFileWithMarkit()` converts the file to text/markdown.\n- Converted output is then head-truncated with normal shared limits; there is no line selector support inside the source document before conversion.\n- Conversion failures return a text block like `[Cannot read .pdf file: ...]`.\n\n### Jupyter notebooks\n- `.ipynb` goes through `readEditableNotebookText()` unless `:raw` was requested.\n- Output is editable plain text with markers like:\n\n```text\n# %% [code] cell:0\n...\n```\n\n- Raw mode bypasses that conversion and falls back to file-text reading.\n\n### Images\n- Image detection is metadata-based (`readImageMetadata()`).\n- Max accepted image size is `20 MiB` (`MAX_IMAGE_INPUT_BYTES`, re-exported as `MAX_IMAGE_SIZE`). Larger files throw.\n- If `inspect_image.enabled` is true, `read` returns metadata only (MIME, bytes, dimensions, channels, alpha) plus a suggestion to call `inspect_image`.\n- Otherwise it calls `loadImageInput()` and returns:\n - a text note from the image loader\n - an inline image block\n- Unsupported/undecodable image formats throw a `ToolError`.\n\n### Internal URLs\n- `read` does not resolve these itself; it delegates to `InternalUrlRouter.instance().resolve()`.\n- Registered protocols are outside this file, but the router in `packages/coding-agent/src/internal-urls/router.ts` is built for `agent://`, `artifact://`, `history://`, `issue://`, `local://`, `mcp://`, `memory://`, `omp://`, `pr://`, `rule://`, `skill://`, and `vault://`.\n- `#handleInternalUrl()` behavior:\n - parses the URL with `parseInternalUrl()` so colons inside the host segment are legal\n - for `agent://`, treats non-root path extraction or `?q=` extraction as a special no-pagination mode\n - otherwise paginates the resolved text in memory\n - passes `immutable` through to `resolveFileDisplayMode()` so anchors are suppressed for immutable resources such as artifacts, skills, memory, and agent outputs\n - sets `ignoreResultLimits: true` for `skill://` so the full skill text is paginated only by explicit selectors, not by the normal default line limit\n- `issue://<N>` / `pr://<N>` (and the long form `issue://<owner>/<repo>/<N>` / `pr://<owner>/<repo>/<N>`) route through the same SQLite cache the `github` tool writes to; `?comments=0` selects the no-comments rendering. Bare `issue://` / `pr://` (and `issue://<owner>/<repo>` / `pr://<owner>/<repo>`) issue a live `gh issue list` / `gh pr list` for browsing, accepting `?state=`, `?limit=`, `?author=`, `?label=`. PR diffs share the same cache through `pr://<N>/diff` (numbered file listing with per-file hints), `pr://<N>/diff/<i>` (single file slice; 1-indexed), and `pr://<N>/diff/all` (verbatim unified diff); the listing and per-file slices are reconstructed from the cached unified-diff payload, so all three variants share one `gh pr diff` invocation per PR. Diff content is served as `text/plain`. Soft TTL `github.cache.softTtlSec` (default 5 minutes), hard TTL `github.cache.hardTtlSec` (default 7 days). Stale-hit returns the cached row and schedules a background refresh.\n\n### Web URLs\n- `parseReadUrlTarget()` accepts `http://`, `https://`, or `www.` targets.\n- Plain URL reads call `executeReadUrl()` in `packages/coding-agent/src/tools/fetch.ts`.\n- `:raw` means raw HTML/body fallback path; plain URL reads prefer rendered/reader-friendly output.\n- `:N`, `:A-B`, `:A+C`, and comma-separated multi-ranges do not refetch when cached output is usable. They page over cached output from the prior or current URL render.\n- URL render pipeline in `renderUrl()`:\n 1. normalize scheme (`https://` added for bare `www.`)\n 2. try special handlers for known sites unless raw\n 3. fetch with `loadPage()`\n 4. if content is image/PDF/DOCX/etc., try binary fetch + markit/image handling\n 5. handle JSON directly, feeds via feed parser, plain text directly\n 6. for HTML and non-raw mode, try markdown alternates, `URL.md`, content negotiation, feed alternates, HTML-to-text renderers, extracted linked documents, then `llms.txt`\n 7. fall back to raw body text/html\n- URL output is wrapped with a small header:\n\n```text\nURL: ...\nContent-Type: ...\nMethod: ...\nNotes: ...\n\n---\n```\n\n- `method` records the winning path (`json`, `feed`, `text`, `alternate-markdown`, `md-suffix`, `content-negotiation`, `image`, `markit`, `llms.txt`, `raw`, `raw-html`, etc.).\n- URL reads may return an inline image block when the fetched resource is a supported image and survives resizing.\n\n## Side Effects\n- Filesystem\n - Opens and streams local files.\n - Reads tar/tgz archives fully into memory before indexing (256 MiB cap); ZIP archives are indexed via ranged central-directory reads.\n - May read URL-cache artifact files from the session artifacts directory.\n - Writes URL output artifacts when URL output is truncated or when line-range pagination needs a persisted cache body.\n- Network\n - URL mode performs HTTP fetches, binary refetches, and alternate-endpoint probes.\n- Subprocesses / native bindings\n - Uses Bun SQLite for `.db`/`.sqlite*`.\n - Uses `Bun.Archive` for tar/tgz and `fflate` for zip.\n - URL HTML rendering can delegate into site handlers and HTML-to-text backends from `packages/coding-agent/src/tools/fetch.ts`.\n- Session state\n - Records whole-file snapshots of local text reads into `session.fileSnapshotStore` for later stale-anchor recovery.\n - Passes session `cwd`, `settings`, and `localProtocolOptions` into the process-global `InternalUrlRouter.instance().resolve()` for internal URLs.\n - Uses `session.allocateOutputArtifact()` for cached/truncated URL output.\n- Background work / cancellation\n - Only the deterministic disk reads are non-abortable: plain-file line/range reads (`streamLinesFromFile`, multi-range) and directory listings (`#readDirectory`) are called with `undefined` instead of the `AbortSignal`, so an interrupt mid-read can't surface a misleading \"Operation aborted\" on a read that would have finished instantly. Every other branch keeps the signal and its helpers call `throwIfAborted(signal)` to stop promptly: URL/internal-URL reads (network), archive, sqlite, document conversion, image decode, structural summary, conflict scan, and the suffix-glob path resolution.\n\n## Limits & Caps\n- Shared text truncation defaults from `packages/coding-agent/src/session/streaming-output.ts`:\n - `DEFAULT_MAX_LINES = 3000`\n - `DEFAULT_MAX_BYTES = 50 * 1024`\n- Local text open-ended default line limit: `read.defaultLimit`, clamped to `[1, DEFAULT_MAX_LINES]`.\n- Explicit line ranges add `1` leading and `3` trailing context lines on the constrained sides (`RANGE_LEADING_CONTEXT_LINES` / `RANGE_TRAILING_CONTEXT_LINES`).\n- File streaming chunk size: `8 * 1024` bytes (`READ_CHUNK_SIZE`).\n- Local streamed byte budget for line reads: `max(DEFAULT_MAX_BYTES, maxLinesToCollect * 512)`.\n- Structural summaries only run when file size `<= 2 MiB` and line count `<= 20_000`.\n- Image input max: `20 MiB`.\n- Directory tree caps for local directories: depth `2`, per-directory children `12`.\n- Archive directory default list cap: `500` entries.\n- SQLite:\n - default row query limit `20`\n - schema sample limit `5`\n - max query limit `500`\n - raw `?q=` row cap `1000` (`MAX_RAW_QUERY_ROWS`)\n - table list cap `500`\n - render width `120`, column width `40`\n - busy timeout `3000` ms\n- URL read result shown to the model is truncated to `300` lines and `50 KiB` in `executeReadUrl()`; full cached output can be attached as an artifact.\n- Inline fetched URL images:\n - source bytes cap `20 MiB`\n - post-resize inline output cap `300 KiB`\n- Unique suffix auto-resolution glob timeout: `5000` ms.\n- File snapshot store holds `30` paths with up to `4` versions each (`DEFAULT_MAX_PATHS` / `DEFAULT_MAX_VERSIONS_PER_PATH` in `packages/hashline/src/snapshots.ts`); files over `4 MiB` (`SNAPSHOT_MAX_BYTES`) are not snapshotted.\n\n## Errors\n- Validation and operational failures surface as `ToolError`.\n- Selector errors include:\n - `Line selector 0 is invalid; lines are 1-indexed. Use :1.`\n - invalid `A+B` / `A-B` shapes\n - `Cannot combine query extraction with line selectors` for `agent://.../path:50`\n- Missing local/archive/sqlite paths first attempt unique suffix resolution; if no unique match exists they error.\n- Out-of-bounds line reads do not throw. They return explanatory text with a suggestion such as `Use :1 ...` or `Use :<last line> ...`.\n- Binary archive entries do not throw; they return a text notice.\n- Document conversion failure returns a text notice.\n- Image oversize/unsupported/invalid cases throw.\n- SQLite parser rejects unsupported parameter combinations early; DB/runtime errors are caught and rethrown as `ToolError(message)`.\n- URL fetch failure does not throw when HTTP fetch succeeds but `response.ok === false`; it returns a failed URL read with `method: \"failed\"` and explanatory notes.\n\n## Notes\n- Hashline anchors are suppressed for raw reads and immutable internal resources because there is no editable backing target for later `edit` consumption.\n- `splitPathAndSel()` intentionally treats unknown trailing `:...` as part of the path so `archive.zip:inner/file` and `db.sqlite:table:key` still work.\n- `resolveReadPath()` contains macOS-specific filename fallbacks for screenshot timestamps, NFD Unicode normalization, and curly apostrophes.\n- A bare `/` resolves to the session cwd, not the filesystem root.\n- URL cache keys are session-scoped and normalized by requested URL + raw/rendered mode; both requested URL and final redirected URL are cached.\n- URL line-range reads request `ensureArtifact: true, preferCached: true` so a later paginated read can reopen the same rendered body from artifact storage.\n- Raw SQLite `q=` execution is not keyword-restricted beyond “no bound parameters”; the read tool relies on the surrounding contract to keep it read-only.\n- The file snapshot store is not a read acceleration cache. It exists to verify and recover hashline edits when the file changed after the read.",
102
+ "tools/read.md": "# read\n\n> Read files, directories, archives, SQLite databases, internal resources, images, documents, and URLs through one `path` string.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/read.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/read.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/path-utils.ts` — split `path` from trailing selectors; normalize local paths.\n - `packages/coding-agent/src/tools/archive-reader.ts` — detect `archive.ext:inner/path`, index archives, list/read entries.\n - `packages/coding-agent/src/tools/sqlite-reader.ts` — detect SQLite targets, parse selectors, render tables.\n - `packages/coding-agent/src/tools/fetch.ts` — URL parsing, fetch/render pipeline, URL cache/artifacts.\n - `packages/coding-agent/src/internal-urls/router.ts` — resolve `agent://`, `artifact://`, `history://`, `issue://`, `local://`, `mcp://`, `memory://`, `omp://`, `pr://`, `rule://`, `skill://`, and `vault://`.\n - `packages/coding-agent/src/edit/notebook.ts` — convert `.ipynb` to editable `# %% [...] cell:N` text.\n - `packages/coding-agent/src/utils/file-display-mode.ts` — decide hashline vs line-number vs raw display.\n - `packages/coding-agent/src/workspace-tree.ts` — render directory trees.\n - `packages/coding-agent/src/edit/file-snapshot-store.ts` — stores read lines for later hashline edit verification/recovery.\n - `packages/coding-agent/src/tools/index.ts` — registers `read: s => new ReadTool(s)`.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `path` | `string` | Yes | Filesystem path, internal URL, or web URL. May end with a trailing selector such as `:50-100` or `:raw`. |\n\n### Selector grammar\n\nFor normal file-like reads, `splitPathAndSel()` in `packages/coding-agent/src/tools/path-utils.ts` recognizes the final suffix only when it matches one of these forms:\n\n| Suffix | Meaning |\n| --- | --- |\n| `:raw` | Raw/verbatim mode. Disables structural summaries and line prefixes. |\n| `:conflicts` | Render unresolved Git merge-conflict regions for a local file. |\n| `:N` / `:LN` / `:N-` / `:N..` | Start at 1-indexed line `N`, open-ended. |\n| `:A-B` / `:LA-LB` / `:A..B` | Inclusive 1-indexed line range (`..` is a forgiving alias normalized to `-`). |\n| `:A+C` / `:LA+LC` | `C` lines starting at `A`; tool converts this to end line `A + C - 1`. |\n| `:R1,R2,...` | Multiple ranges, sorted and merged before reading (for example `:5-16,960-973`). |\n| `:range:raw` or `:raw:range` | Same line selection, but raw output. |\n\nValidation in `parseLineRangeChunk()`:\n- line numbers are 1-indexed; `:0` throws.\n- `+` counts must be `>= 1`.\n- `-` end must be `>= start`.\n\nSelector parsing intentionally falls through for unrecognized trailing `:...`; archive and SQLite paths consume their own colon syntax.\n\nURL selectors are parsed separately in `packages/coding-agent/src/tools/fetch.ts`, but use the same line-range parser for `:raw`, `:N`, `:A-B`, `:A+C`, `:5-10,20-30`, and `:range:raw` / `:raw:range`. Because URL ports also use `:`, add a trailing slash before a selector on a host/port URL, e.g. `https://example.com/:80`.\n\n## Outputs\n- Single-shot `AgentToolResult` built through `toolResult()` in `packages/coding-agent/src/tools/tool-result.ts`.\n- `content` is usually one text block. Image reads may return `[text, image]`.\n- `details` is path-dependent. `ReadToolDetails` may include:\n - `kind: \"file\" | \"url\"` (URL path uses `kind: \"url\"`; file reads usually omit `kind`)\n - `isDirectory`\n - `resolvedPath`\n - `suffixResolution`\n - URL fields: `url`, `finalUrl`, `contentType`, `method`, `notes`\n - `truncation`\n - `displayContent` (unprefixed text + starting line for TUI rendering)\n - `summary` (`lines`, `elidedSpans`, `elidedLines`) for structural summaries\n - `meta` from `packages/coding-agent/src/tools/output-meta.ts`\n- `details.meta.source` is set to the backing path, URL, or internal URL.\n- `details.meta.truncation` carries shown range, total lines/bytes, next offset, and optional `artifactId` for cached URL output.\n- Directory/archive listings and SQLite table lists also set `details.meta.limits` when list limits trigger.\n\n## Flow\n1. `ReadTool.execute()` accepts `{ path }`. `file://...` inputs are expanded first with `expandPath()`.\n2. It tries URL handling first via `parseReadUrlTarget()` from `packages/coding-agent/src/tools/fetch.ts`.\n - Plain URL reads call `executeReadUrl()`.\n - URL reads with line selectors load or refresh the URL cache with `loadReadUrlCacheEntry()` and paginate the cached text locally with `#buildInMemoryTextResult()`.\n3. If not a web URL, it checks `InternalUrlRouter.instance().canHandle(...)`.\n - Internal URLs are resolved with `internalRouter.resolve()`.\n - `agent://` query extraction (`/path` or `?q=`) bypasses pagination and returns the extracted content directly.\n - Other internal resources are paginated in-memory by `#buildInMemoryTextResult()`.\n4. It tries archive resolution next with `#resolveArchiveReadPath()`.\n - `parseArchivePathCandidates()` scans for `.tar`, `.tar.gz`, `.tgz`, or `.zip` anywhere before `:sub/path`.\n - On success, `#readArchive()` either lists a directory or decodes an entry as UTF-8 text.\n5. It tries SQLite resolution with `#resolveSqliteReadPath()`.\n - `parseSqlitePathCandidates()` scans for `.sqlite`, `.sqlite3`, `.db`, `.db3` before any `:table`, `:key`, or `?query` suffix.\n - `#readSqlite()` dispatches on `parseSqliteSelector()`.\n6. Otherwise it treats the input as a local filesystem path.\n - `resolveReadPath()` expands `~`, resolves relative to session cwd, treats bare `/` as session cwd, and retries macOS screenshot/NFD/curly-quote variants.\n - If the path does not exist, `findUniqueSuffixMatch()` does a workspace glob-based unique suffix lookup (skipped for remote mounts).\n7. Directories go through `#readDirectory()`.\n8. Non-directories branch by content type:\n - image metadata / inline image\n - editable notebook text\n - markit-converted document\n - structural summary for parseable code/prose\n - streamed text/line-range read\n9. Local text reads are streamed by `streamLinesFromFile()` rather than loading the whole file. The tool adds `1` leading and `3` trailing context lines around explicit bounded ranges (constrained sides only).\n10. Hashline-eligible local reads record a whole-file snapshot into the session snapshot store (`getFileSnapshotStore()` on `session.fileSnapshotStore`, `packages/coding-agent/src/edit/file-snapshot-store.ts`) for later hashline edit verification/recovery.\n11. If suffix resolution happened, the first text block is prefixed with `[Path '...' not found; resolved to '...' via suffix match]`.\n\n## Modes / Variants\n\n### Local text files\n- No selector: if summarization is enabled and the file is small enough, `#trySummarize()` calls `summarizeCode()`.\n - Guards: file size `<= 2 MiB` (`MAX_SUMMARY_BYTES`), line count `<= 20_000` (`MAX_SUMMARY_LINES`).\n - Summary output keeps selected declarations and replaces elided spans with `...` or merged brace-pair lines containing `..`. When at least one span is elided, the text content ends with a footer like `[NN lines elided; re-read needed ranges, e.g. <path>:5-16,40-80]` using concrete ranges from the actual elisions.\n - When an elided block sits between matching brace lines, `#renderSummary()` may merge them into one anchored line rather than emitting separate opener/closer lines.\n- Explicit selector or summarization miss: streamed text read.\n - Default open-ended limit is `min(session setting read.defaultLimit, DEFAULT_MAX_LINES)`.\n - Explicit ranges expand by `RANGE_LEADING_CONTEXT_LINES = 1` / `RANGE_TRAILING_CONTEXT_LINES = 3` on the constrained sides only.\n - Non-raw output uses `resolveFileDisplayMode()`:\n - hashline numbered output when edit mode is hashline, read is not raw, source is mutable, edit tool exists, and `readHashLines !== false`\n - otherwise optional line numbers when `readLineNumbers === true`\n - raw mode suppresses both\n- Prefix format in hashline mode is a `[PATH#TAG]` header followed by `LINE:TEXT`, e.g. `[src/foo.ts#0A1B]` and `41:def alpha():`, from the session snapshot store plus `formatNumberedLine()` / `formatHashlineHeader()`.\n- The `edit`/hashline path consumes that header plus bare line numbers later; the four-hex tag is a content-derived hash of the whole normalized file, resolvable through the session snapshot store that recorded it. Immutable sources and `:raw` intentionally suppress hashline headers.\n\n### Directory listings\n- `#readDirectory()` calls `buildDirectoryTree()` with:\n - `maxDepth = 2`\n - `perDirLimit = 12`\n - `rootLimit = null`\n - `lineCap = limit` when a line selector was present, else unlimited at this layer\n- `buildDirectoryTree()` sorts siblings by recency, shows file sizes and relative ages, and may mark `limits.resultLimit` when the tree truncates.\n- Empty directories render as `(empty directory)`.\n\n### Archives\n- Supported archive containers: `.tar`, `.tar.gz`, `.tgz`, `.zip`.\n- Syntax: `archive.ext`, `archive.ext:path/inside`, `archive.ext:path/inside:50-60`.\n- `openArchive()` branches by format:\n - tar/tgz reads the whole archive into memory (capped at `MAX_TAR_ARCHIVE_BYTES = 256 MiB`) and indexes it with `new Bun.Archive(bytes)`\n - zip is indexed via ranged central-directory reads (`readZipEntries()`); entries are inflated on demand with `fflate.inflateSync()`, with declared member sizes capped at `MAX_ARCHIVE_MEMBER_BYTES = 64 MiB`\n- Archive paths normalize `/`, drop `.` segments, and reject `..`.\n- Directory reads list immediate children; files show `name` plus ` (size)` when size > 0.\n- Directory listing default limit is `500` entries in `#readArchiveDirectory()`.\n- File entries are UTF-8 decoded. Non-UTF-8 entries return `[Cannot read binary archive entry '...' (...)]` instead of bytes.\n- Text archive entries reuse the normal in-memory pagination/anchoring path.\n\n### SQLite databases\n- Database detection requires both a matching extension and a valid SQLite file header (`isSqliteFile()`).\n- Selector forms from `parseSqliteSelector()`:\n\n#### `db.sqlite`\n- `kind: \"list\"`\n- Lists non-`sqlite_%` tables with row counts.\n- `#readSqlite()` caps the rendered list to `500` tables via `applyListLimit()`.\n\n#### `db.sqlite:table`\n- `kind: \"schema\"`\n- Returns `sqlite_master.sql` plus sample rows.\n- Sample size is `DEFAULT_SCHEMA_SAMPLE_LIMIT = 5`.\n\n#### `db.sqlite:table:key`\n- `kind: \"row\"`\n- Resolves by primary key when the table has exactly one PK column; otherwise falls back to `rowid` lookup.\n- No query parameters allowed on row lookups.\n\n#### `db.sqlite:table?limit=...&offset=...&order=...&where=...`\n- `kind: \"query\"`\n- Defaults: `limit = 20`, `offset = 0`.\n- `limit` is capped at `500`.\n- `order` accepts `column` or `column:asc|desc` and must name an existing column.\n- `where` is accepted only after `validateWhereClause()` rejects comments, semicolons, and control keywords like `LIMIT`, `OFFSET`, `UNION`, `ATTACH`, `PRAGMA`.\n- Unknown query parameters throw.\n\n#### `db.sqlite?q=SELECT ...`\n- `kind: \"raw\"`\n- Cannot be combined with table selectors or any other query param.\n- Empty `q` throws.\n- `executeReadQuery()` prepares the SQL, rejects bound parameters, and collects rows from `statement.iterate()` capped at `MAX_RAW_QUERY_ROWS = 1000`; it does not verify that the SQL starts with `SELECT`.\n\n- Rendering caps in `packages/coding-agent/src/tools/sqlite-reader.ts`:\n - ASCII table width `120` (`MAX_RENDER_WIDTH`)\n - per-column width `40` (`MAX_COLUMN_WIDTH`)\n- `#readSqlite()` opens Bun SQLite in `{ readonly: true, strict: true }` and sets `PRAGMA busy_timeout = 3000`.\n\n### Documents\n- `CONVERTIBLE_EXTENSIONS` in `packages/coding-agent/src/tools/read.ts` covers `.pdf`, `.doc`, `.docx`, `.ppt`, `.pptx`, `.xls`, `.xlsx`, `.rtf`, `.epub`.\n- `convertFileWithMarkit()` converts the file to text/markdown; line-range and `:raw` selectors then apply to the converted output (`file.pdf:50-100`, `:5-16,40-80`).\n- For PDFs, embedded images are surfaced as browsable handles. markit emits a `<!-- image: <id> (page N, WxHpt) -->` region for each embedded image; `read.ts` rewrites it into a `read <pdf>:<id>.png` hint (as inline code, so spaces/parens in the path can't break markdown). Reading that handle (`doc.pdf:p11-img0.png`) extracts the image — passing markit an `imageDir` that lands in a session-artifact cache (`<artifacts>/pdf-assets/<key>/`, keyed by size+mtime, converted once per file) — and returns it through the normal image-loading path. `doc.pdf:` lists the extractable members; an unknown member errors with the available list. Requested members are matched against extracted basenames, so `..`/separators cannot escape the cache.\n- Conversion failures return a text block like `[Cannot read .pdf file: ...]`.\n\n### Jupyter notebooks\n- `.ipynb` goes through `readEditableNotebookText()` unless `:raw` was requested.\n- Output is editable plain text with markers like:\n\n```text\n# %% [code] cell:0\n...\n```\n\n- Raw mode bypasses that conversion and falls back to file-text reading.\n\n### Images\n- Image detection is metadata-based (`readImageMetadata()`).\n- Max accepted image size is `20 MiB` (`MAX_IMAGE_INPUT_BYTES`, re-exported as `MAX_IMAGE_SIZE`). Larger files throw.\n- If `inspect_image.enabled` is true, `read` returns metadata only (MIME, bytes, dimensions, channels, alpha) plus a suggestion to call `inspect_image`.\n- Otherwise it calls `loadImageInput()` and returns:\n - a text note from the image loader\n - an inline image block\n- Unsupported/undecodable image formats throw a `ToolError`.\n\n### Internal URLs\n- `read` does not resolve these itself; it delegates to `InternalUrlRouter.instance().resolve()`.\n- Registered protocols are outside this file, but the router in `packages/coding-agent/src/internal-urls/router.ts` is built for `agent://`, `artifact://`, `history://`, `issue://`, `local://`, `mcp://`, `memory://`, `omp://`, `pr://`, `rule://`, `skill://`, and `vault://`.\n- `#handleInternalUrl()` behavior:\n - parses the URL with `parseInternalUrl()` so colons inside the host segment are legal\n - for `agent://`, treats non-root path extraction or `?q=` extraction as a special no-pagination mode\n - otherwise paginates the resolved text in memory\n - passes `immutable` through to `resolveFileDisplayMode()` so anchors are suppressed for immutable resources such as artifacts, skills, memory, and agent outputs\n - sets `ignoreResultLimits: true` for `skill://` so the full skill text is paginated only by explicit selectors, not by the normal default line limit\n- `issue://<N>` / `pr://<N>` (and the long form `issue://<owner>/<repo>/<N>` / `pr://<owner>/<repo>/<N>`) route through the same SQLite cache the `github` tool writes to; `?comments=0` selects the no-comments rendering. Bare `issue://` / `pr://` (and `issue://<owner>/<repo>` / `pr://<owner>/<repo>`) issue a live `gh issue list` / `gh pr list` for browsing, accepting `?state=`, `?limit=`, `?author=`, `?label=`. PR diffs share the same cache through `pr://<N>/diff` (numbered file listing with per-file hints), `pr://<N>/diff/<i>` (single file slice; 1-indexed), and `pr://<N>/diff/all` (verbatim unified diff); the listing and per-file slices are reconstructed from the cached unified-diff payload, so all three variants share one `gh pr diff` invocation per PR. Diff content is served as `text/plain`. Soft TTL `github.cache.softTtlSec` (default 5 minutes), hard TTL `github.cache.hardTtlSec` (default 7 days). Stale-hit returns the cached row and schedules a background refresh.\n\n### Web URLs\n- `parseReadUrlTarget()` accepts `http://`, `https://`, or `www.` targets.\n- Plain URL reads call `executeReadUrl()` in `packages/coding-agent/src/tools/fetch.ts`.\n- `:raw` means raw HTML/body fallback path; plain URL reads prefer rendered/reader-friendly output.\n- `:N`, `:A-B`, `:A+C`, and comma-separated multi-ranges do not refetch when cached output is usable. They page over cached output from the prior or current URL render.\n- URL render pipeline in `renderUrl()`:\n 1. normalize scheme (`https://` added for bare `www.`)\n 2. try special handlers for known sites unless raw\n 3. fetch with `loadPage()`\n 4. if content is image/PDF/DOCX/etc., try binary fetch + markit/image handling\n 5. handle JSON directly, feeds via feed parser, plain text directly\n 6. for HTML and non-raw mode, try markdown alternates, `URL.md`, content negotiation, feed alternates, HTML-to-text renderers, extracted linked documents, then `llms.txt`\n 7. fall back to raw body text/html\n- URL output is wrapped with a small header:\n\n```text\nURL: ...\nContent-Type: ...\nMethod: ...\nNotes: ...\n\n---\n```\n\n- `method` records the winning path (`json`, `feed`, `text`, `alternate-markdown`, `md-suffix`, `content-negotiation`, `image`, `markit`, `llms.txt`, `raw`, `raw-html`, etc.).\n- URL reads may return an inline image block when the fetched resource is a supported image and survives resizing.\n\n## Side Effects\n- Filesystem\n - Opens and streams local files.\n - Reads tar/tgz archives fully into memory before indexing (256 MiB cap); ZIP archives are indexed via ranged central-directory reads.\n - May read URL-cache artifact files from the session artifacts directory.\n - Writes URL output artifacts when URL output is truncated or when line-range pagination needs a persisted cache body.\n- Network\n - URL mode performs HTTP fetches, binary refetches, and alternate-endpoint probes.\n- Subprocesses / native bindings\n - Uses Bun SQLite for `.db`/`.sqlite*`.\n - Uses `Bun.Archive` for tar/tgz and `fflate` for zip.\n - URL HTML rendering can delegate into site handlers and HTML-to-text backends from `packages/coding-agent/src/tools/fetch.ts`.\n- Session state\n - Records whole-file snapshots of local text reads into `session.fileSnapshotStore` for later stale-anchor recovery.\n - Passes session `cwd`, `settings`, and `localProtocolOptions` into the process-global `InternalUrlRouter.instance().resolve()` for internal URLs.\n - Uses `session.allocateOutputArtifact()` for cached/truncated URL output.\n- Background work / cancellation\n - Only the deterministic disk reads are non-abortable: plain-file line/range reads (`streamLinesFromFile`, multi-range) and directory listings (`#readDirectory`) are called with `undefined` instead of the `AbortSignal`, so an interrupt mid-read can't surface a misleading \"Operation aborted\" on a read that would have finished instantly. Every other branch keeps the signal and its helpers call `throwIfAborted(signal)` to stop promptly: URL/internal-URL reads (network), archive, sqlite, document conversion, image decode, structural summary, conflict scan, and the suffix-glob path resolution.\n\n## Limits & Caps\n- Shared text truncation defaults from `packages/coding-agent/src/session/streaming-output.ts`:\n - `DEFAULT_MAX_LINES = 3000`\n - `DEFAULT_MAX_BYTES = 50 * 1024`\n- Local text open-ended default line limit: `read.defaultLimit`, clamped to `[1, DEFAULT_MAX_LINES]`.\n- Explicit line ranges add `1` leading and `3` trailing context lines on the constrained sides (`RANGE_LEADING_CONTEXT_LINES` / `RANGE_TRAILING_CONTEXT_LINES`).\n- File streaming chunk size: `8 * 1024` bytes (`READ_CHUNK_SIZE`).\n- Local streamed byte budget for line reads: `max(DEFAULT_MAX_BYTES, maxLinesToCollect * 512)`.\n- Structural summaries only run when file size `<= 2 MiB` and line count `<= 20_000`.\n- Image input max: `20 MiB`.\n- Directory tree caps for local directories: depth `2`, per-directory children `12`.\n- Archive directory default list cap: `500` entries.\n- SQLite:\n - default row query limit `20`\n - schema sample limit `5`\n - max query limit `500`\n - raw `?q=` row cap `1000` (`MAX_RAW_QUERY_ROWS`)\n - table list cap `500`\n - render width `120`, column width `40`\n - busy timeout `3000` ms\n- URL read result shown to the model is truncated to `300` lines and `50 KiB` in `executeReadUrl()`; full cached output can be attached as an artifact.\n- Inline fetched URL images:\n - source bytes cap `20 MiB`\n - post-resize inline output cap `300 KiB`\n- Unique suffix auto-resolution glob timeout: `5000` ms.\n- File snapshot store holds `30` paths with up to `4` versions each (`DEFAULT_MAX_PATHS` / `DEFAULT_MAX_VERSIONS_PER_PATH` in `packages/hashline/src/snapshots.ts`); files over `4 MiB` (`SNAPSHOT_MAX_BYTES`) are not snapshotted.\n\n## Errors\n- Validation and operational failures surface as `ToolError`.\n- Selector errors include:\n - `Line selector 0 is invalid; lines are 1-indexed. Use :1.`\n - invalid `A+B` / `A-B` shapes\n - `Cannot combine query extraction with line selectors` for `agent://.../path:50`\n- Missing local/archive/sqlite paths first attempt unique suffix resolution; if no unique match exists they error.\n- Out-of-bounds line reads do not throw. They return explanatory text with a suggestion such as `Use :1 ...` or `Use :<last line> ...`.\n- Binary archive entries do not throw; they return a text notice.\n- Document conversion failure returns a text notice.\n- Image oversize/unsupported/invalid cases throw.\n- SQLite parser rejects unsupported parameter combinations early; DB/runtime errors are caught and rethrown as `ToolError(message)`.\n- URL fetch failure does not throw when HTTP fetch succeeds but `response.ok === false`; it returns a failed URL read with `method: \"failed\"` and explanatory notes.\n\n## Notes\n- Hashline anchors are suppressed for raw reads and immutable internal resources because there is no editable backing target for later `edit` consumption.\n- `splitPathAndSel()` intentionally treats unknown trailing `:...` as part of the path so `archive.zip:inner/file` and `db.sqlite:table:key` still work.\n- `resolveReadPath()` contains macOS-specific filename fallbacks for screenshot timestamps, NFD Unicode normalization, and curly apostrophes.\n- A bare `/` resolves to the session cwd, not the filesystem root.\n- URL cache keys are session-scoped and normalized by requested URL + raw/rendered mode; both requested URL and final redirected URL are cached.\n- URL line-range reads request `ensureArtifact: true, preferCached: true` so a later paginated read can reopen the same rendered body from artifact storage.\n- Raw SQLite `q=` execution is not keyword-restricted beyond “no bound parameters”; the read tool relies on the surrounding contract to keep it read-only.\n- The file snapshot store is not a read acceleration cache. It exists to verify and recover hashline edits when the file changed after the read.",
101
103
  "tools/recall.md": "# recall\n\n> Search the active long-term memory backend and return matching memories.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/memory-recall.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/recall.md`\n- Hindsight collaborators:\n - `packages/coding-agent/src/hindsight/state.ts` — session state, recall query defaults, prompt-side auto-recall.\n - `packages/coding-agent/src/hindsight/content.ts` — result formatting and UTC timestamp formatting.\n - `packages/coding-agent/src/hindsight/client.ts` — HTTP `recall` call and error mapping.\n - `packages/coding-agent/src/hindsight/bank.ts` — bank id and tag-filter scoping.\n- Mnemopi collaborators:\n - `packages/coding-agent/src/mnemopi/state.ts` — scoped local recall and result formatting with ids.\n - `packages/coding-agent/src/mnemopi/config.ts` — local bank scoping and recall limits.\n - `docs/tools/retain.md` — shared backend, storage, scoping, and retention behavior.\n\n## Inputs\n\n| Field | Type | Required | Description |\n|---|---|---:|---|\n| `query` | `string` | Yes | Natural-language search query. The tool passes it through unchanged except Mnemopi `per-project-tagged` may run an internal shared-bank fallback query. |\n\n## Outputs\nReturns a single-shot tool result.\n\nWhen matches exist:\n- `content[0].type = \"text\"`\n- `content[0].text = \"Found <n> relevant memory/memories (as of YYYY-MM-DD HH:MM UTC):\\n\\n<bullet list>\"`\n- `details = {}`\n\nHindsight bullet format comes from `formatMemories(...)`:\n- each bullet is `- <text> [<type>] (<mentioned_at>)`; the type and timestamp suffixes appear only when those fields are present.\n\nMnemopi bullet format comes from `formatScopedRecallWithIds(...)`:\n- each bullet is `- <content> (id: <id>|id unavailable) [<source>] (<YYYY-MM-DD>) c:<score>`; optional source, date, and score suffixes appear only when present.\n\nWhen no matches exist:\n- `content[0].text = \"No relevant memories found.\"`\n- `details = {}`\n\n## Flow\n1. `MemoryRecallTool.createIf(...)` exposes the tool when `memory.backend` is either `\"hindsight\"` or `\"mnemopi\"`.\n2. `execute(...)` wraps the operation in `untilAborted(...)`.\n3. If the backend is `mnemopi`:\n - it reads `session.getMnemopiSessionState()` and throws if the backend was not started;\n - it calls `state.recallResultsScoped(params.query)`;\n - scoped recall queries each configured recall bank with `recallEnhanced(query, recallLimit, { includeFacts: true, channelId: bank })`, merges/deduplicates results by id/content, sorts them, and truncates to `recallLimit`;\n - in `per-project-tagged`, the shared bank may receive one extra fallback query with project-bank literal tokens stripped so broad global memories still match;\n - results are formatted with ids for later `memory_edit` use.\n4. If the backend is `hindsight`:\n - it reads `session.getHindsightSessionState()` and throws if the backend was not started;\n - it calls `state.client.recall(...)` with `bankId`, query, configured `budget`, `maxTokens`, `types`, and bank-scope tag filters;\n - `HindsightApi.recall(...)` POSTs `/v1/default/banks/{bank_id}/memories/recall`;\n - results are formatted into a plain-text list with `formatMemories(...)`.\n5. Backend failures are logged with `logger.warn(\"recall failed\", ...)` and rethrown as `Error` instances when needed.\n\n## Modes / Variants\n- Tool path: explicit query-only recall. It does not compose context from recent turns.\n- Backend auto-recall has a richer query-composition path in `HindsightSessionState.beforeAgentStartPrompt(...)` / `maybeRecallOnAgentStart(...)` and `MnemopiSessionState.beforeAgentStartPrompt(...)` / `maybeRecallOnAgentStart(...)`.\n- Hindsight bank scoping:\n - `global` — no tag filter.\n - `per-project` — separate bank id per project label (git primary checkout root basename; cwd basename outside a repo).\n - `per-project-tagged` — shared bank id plus `project:<project label>` filter with `tagsMatch = \"any\"`, so project-tagged and untagged global memories can both surface.\n- Mnemopi bank scoping:\n - `global` — recall reads the shared bank.\n - `per-project` — recall reads the project bank.\n - `per-project-tagged` — recall reads the project bank and shared bank, then merges results.\n- Session scope: reads cross-session memory data, using the active session's cached config and scope.\n\n## Side Effects\n- Network\n - Hindsight: `POST /v1/default/banks/{bank_id}/memories/recall`.\n - Mnemopi: none unless configured local runtime providers perform embedding/LLM work during recall.\n- Session state\n - None on success for the explicit tool path. Unlike backend auto-recall, this tool does not update `lastRecallSnippet` or refresh the system prompt.\n- Background work / cancellation\n - Aborts through `untilAborted(...)` if the tool call signal is cancelled.\n\n## Limits & Caps\n- Tool availability requires `memory.backend` to be `\"hindsight\"` or `\"mnemopi\"`; default `memory.backend` is `\"off\"`.\n- Hindsight client default budget for raw `HindsightApi.recall(...)` is `\"mid\"`; this tool overrides from config.\n- Hindsight recall settings:\n - `hindsight.recallBudget = \"mid\"`\n - `hindsight.recallMaxTokens = 1024`\n - `hindsight.recallTypes = [\"world\", \"experience\"]`\n- Mnemopi recall settings:\n - `mnemopi.recallLimit = 8`\n - `mnemopi.scoping` selects which local bank(s) are searched\n- The explicit tool path does not apply `hindsight.recallContextTurns`, `hindsight.recallMaxQueryChars`, `mnemopi.recallContextTurns`, or `mnemopi.recallMaxQueryChars`; those caps only affect backend auto-recall query composition.\n\n## Errors\n- Throws `Mnemopi backend is not initialised for this session.` when `memory.backend == \"mnemopi\"` but no state exists.\n- Throws `Hindsight backend is not initialised for this session.` when `memory.backend == \"hindsight\"` but no state exists.\n- Hindsight HTTP and fetch failures become `HindsightError` with `statusCode` and parsed `details` when available.\n- Mnemopi recall target failures inside `collectScopedRecallResults(...)` are caught per bank and logged only when `mnemopi.debug` is enabled; if all targets fail, the tool can return `No relevant memories found.`\n- Non-`Error` failures caught by the tool are normalized to `new Error(String(err))` before rethrow.\n\n## Notes\n- Shared backend details are in `docs/tools/retain.md`: storage, subagent aliasing, bank scoping, mission setup, and mental-model behavior.\n- Hindsight mental models are not fetched by this tool. They may already be present in the agent's developer instructions because the backend caches a `<mental_models>` block separately from recall results.\n- Mnemopi developer instructions may include a `<memories>` block from auto-recall; this explicit tool does not update that block.\n- The tool returns memory hits; it does not synthesize across them. Use `reflect` for that path.\n",
102
104
  "tools/reflect.md": "# reflect\n\n> Synthesize an answer over the active long-term memory backend.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/memory-reflect.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/reflect.md`\n- Hindsight collaborators:\n - `packages/coding-agent/src/hindsight/bank.ts` — best-effort first-use bank/mission setup (`ensureBankExists`).\n - `packages/coding-agent/src/hindsight/state.ts` — session state, shared bank scope, recall/reflect config.\n - `packages/coding-agent/src/hindsight/client.ts` — HTTP `reflect` call and error mapping.\n- Mnemopi collaborators:\n - `packages/coding-agent/src/mnemopi/state.ts` — scoped local recall and context formatting.\n - `docs/tools/retain.md` — shared backend, storage, scoping, and mental-model behavior.\n\n## Inputs\n\n| Field | Type | Required | Description |\n|---|---|---:|---|\n| `query` | `string` | Yes | Question to answer from long-term memory. |\n| `context` | `string` | No | Extra guidance. Hindsight sends it as `context`; Mnemopi appends trimmed context to the recall query under `Additional context:`. |\n\n## Outputs\nReturns a single-shot tool result.\n\nHindsight:\n- `content[0].type = \"text\"`\n- `content[0].text = response.text?.trim() || \"No relevant information found to reflect on.\"`\n- `details = {}`\n- The tool returns the Hindsight server's synthesized text directly; it does not expose raw recall hits.\n\nMnemopi:\n- if no scoped recall results exist: `content[0].text = \"No relevant information found to reflect on.\"`\n- otherwise: `content[0].text = \"Based on recalled memories:\\n\\n<formatted context>\"`\n- `details = {}`\n- The local path performs recall plus formatting; it does not call a separate synthesis endpoint.\n\n## Flow\n1. `MemoryReflectTool.createIf(...)` exposes the tool when `memory.backend` is either `\"hindsight\"` or `\"mnemopi\"`.\n2. `execute(...)` runs under `untilAborted(...)`.\n3. If the backend is `mnemopi`:\n - it reads `session.getMnemopiSessionState()` and throws if the backend was not started;\n - if `context` has non-whitespace content, it recalls with `<query>\\n\\nAdditional context:\\n<context>`; otherwise it recalls with `query`;\n - it calls `state.recallResultsScoped(...)` using the same local scoping and merge behavior as `recall`;\n - if results exist, it renders them through `state.formatContextScoped(...)` and prefixes `Based on recalled memories:`.\n4. If the backend is `hindsight`:\n - it reads `session.getHindsightSessionState()` and throws if the backend was not started;\n - it calls `ensureBankExists(...)` with the current `bankId`, config, and the session state's `banksSet`;\n - `ensureBankExists(...)` best-effort `PUT`s `/v1/default/banks/{bank_id}` (`createBank`) with optional `reflect_mission` / `retain_mission` once per bank per session state; failures are swallowed;\n - it calls `state.client.reflect(...)` with `query`, optional `context`, configured recall budget, and bank-scope tag filters;\n - `HindsightApi.reflect(...)` POSTs `/v1/default/banks/{bank_id}/reflect` and defaults its own budget to `\"low\"` when callers omit one; this tool always passes the configured budget;\n - blank or whitespace-only responses are replaced with `No relevant information found to reflect on.`\n5. Backend failures are logged with `logger.warn(\"reflect failed\", ...)` and rethrown as `Error` instances when needed.\n\n## Modes / Variants\n- Hindsight tool path: one remote reflect request, optionally focused by `context`.\n- Mnemopi tool path: one local scoped recall followed by context formatting.\n- Hindsight bank scoping:\n - `global` — no tag filter.\n - `per-project` — separate bank id per project label (git primary checkout root basename; cwd basename outside a repo).\n - `per-project-tagged` — shared bank id plus `project:<project label>` filter with `tagsMatch = \"any\"`.\n- Mnemopi bank scoping:\n - `global` — reads the shared bank.\n - `per-project` — reads the project bank.\n - `per-project-tagged` — reads the project bank and shared bank, then merges results.\n- Session scope: reads cross-session memory data, but does not persist local output.\n\n## Side Effects\n- Network\n - Hindsight: optional `PUT /v1/default/banks/{bank_id}` from `ensureBankExists(...)`, then `POST /v1/default/banks/{bank_id}/reflect`.\n - Mnemopi: none unless configured embedding or LLM providers are used by the local runtime during recall.\n- Session state\n - Reads session-held backend scope and config only. Does not update `lastRecallSnippet`, Hindsight mental-model cache, or retain queues.\n- Background work / cancellation\n - Aborts through `untilAborted(...)` if the tool call signal is cancelled.\n\n## Limits & Caps\n- Tool availability requires `memory.backend` to be `\"hindsight\"` or `\"mnemopi\"`; default `memory.backend` is `\"off\"`.\n- Tool-level params: only `query` is required; `context` is optional.\n- Hindsight budget setting comes from `hindsight.recallBudget`, default `\"mid\"`.\n- Hindsight `reflect` has no client-side token cap parameter here; unlike `recall`, the tool does not pass `maxTokens`.\n- Hindsight bank initialization tracks up to `MISSION_SET_CAP = 10_000` bank ids per session state, then drops half of the sorted set.\n- Mnemopi result count is capped by `mnemopi.recallLimit`, default `8`.\n\n## Errors\n- Throws `Mnemopi backend is not initialised for this session.` when `memory.backend == \"mnemopi\"` but no state exists.\n- Throws `Hindsight backend is not initialised for this session.` when `memory.backend == \"hindsight\"` but no state exists.\n- Hindsight HTTP and fetch failures become `HindsightError` with `statusCode` and parsed `details` when available.\n- Hindsight `ensureBankExists(...)` failures are silent to the tool caller; only the later reflect request can fail visibly.\n- Mnemopi recall target failures inside `collectScopedRecallResults(...)` are caught per bank and logged only when `mnemopi.debug` is enabled; if all targets fail, the tool can return the no-information text.\n- Non-`Error` failures caught by the tool are normalized to `new Error(String(err))` before rethrow.\n\n## Notes\n- Shared backend details are in `docs/tools/retain.md`: storage, subagent aliasing, bank scoping, seed mental models, and prompt injection.\n- Hindsight `reflect` does not read the cached `<mental_models>` block directly. It queries the Hindsight server over the bank contents. The same session may also have separate mental-model context injected into its developer instructions.\n- Hindsight reflect mission and retain mission are bank-level server settings, not per-request payload. The tool just ensures they are present best-effort before reflecting.\n- Mnemopi `reflect` is local recall plus formatting, so its output shape differs from Hindsight's remote synthesized answer.\n",
103
105
  "tools/resolve.md": "# resolve\n\n> Finalizes a pending action by applying or discarding it.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/resolve.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/resolve.md`\n- Key collaborators:\n - `docs/resolve-tool-runtime.md` — preview/apply runtime reference\n - `packages/coding-agent/src/extensibility/custom-tools/loader.ts` — forwards custom pending actions into the queue\n - `packages/coding-agent/src/tools/ast-edit.ts` — built-in preview producer example\n - `packages/coding-agent/src/session/agent-session.ts` — tool-choice queue, standing resolve handler, and invoker access\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `action` | `\"apply\" | \"discard\"` | Yes | Whether to commit or reject the pending action. |\n| `reason` | `string` | Yes | Required explanation passed through to the handler. |\n| `extra` | `Record<string, unknown>` | No | Free-form metadata passed through to the handler. Plan approval uses this for data such as a title slug; preview-style actions usually ignore it. |\n\n## Outputs\n- Single-shot result.\n- `execute()` returns whatever the queued or standing invoker returns, with `details` wrapped/augmented to include:\n - `action`\n - `reason`\n - `extra?`\n - `sourceToolName?`\n - `label?`\n - `sourceResultDetails?` — original `result.details` from the apply/reject callback when present\n- If `discard` has no custom reject callback, or the reject callback returns `undefined`, the default success payload is `Discarded: <label>. Reason: <reason>`.\n- The TUI renderer is inline and merges call+result into one block.\n\n## Flow\n1. Preview-producing code can call `queueResolveHandler(...)` with a label, source tool name, `apply(reason, extra?)` callback, and optional `reject(reason, extra?)` callback.\n2. Modes can also register a standing resolve handler through `session.setStandingResolveHandler(...)`; `resolve.execute()` consults it only when no queued invoker is active.\n3. `queueResolveHandler(...)` asks the session for a forced `resolve` tool choice and pushes it into the tool-choice queue with `pushOnce(...)`.\n4. The queued entry is marked `now: true`; if the model rejects that forced tool choice, `onRejected` returns `requeue`, so the reminder comes back.\n5. `queueResolveHandler(...)` also injects a `resolve-reminder` steering message:\n\n```text\n<system-reminder>\nThis is a preview. Call the `resolve` tool to apply or discard these changes.\n</system-reminder>\n```\n\n6. When `resolve.execute()` runs, it wraps the call in `untilAborted(...)` and fetches `session.peekQueueInvoker?.() ?? session.peekStandingResolveHandler?.()`.\n7. If no invoker exists, `apply` throws `ToolError(\"No pending action to resolve. Nothing to apply or discard.\")`; `discard` instead returns a success payload `Nothing to discard; no pending action remains.` because the desired end-state (no staged change) already holds.\n8. Otherwise it invokes the current handler with the full params object.\n9. `runResolveInvocation(...)` builds base details from `action`, `reason`, `extra`, `sourceToolName`, and `label`.\n10. For `apply`, it calls the producer's `apply(reason, extra)` callback.\n11. If `apply` throws, `runResolveInvocation(...)` calls `onApplyError` when present. The queued preview integration uses this to re-push the resolve directive and steering reminder so the action remains pending. Non-`ToolError` exceptions are wrapped as `ToolError(\"Apply failed: <message>\")`.\n12. For `discard`, it calls `reject(reason, extra)` when provided. If no reject callback exists or it returns `undefined`, `resolve` fabricates the default discard message.\n13. Before returning callback results, it merges resolve metadata into `result.details` so renderer/UI code can show the action, label, and originating tool.\n\n## Modes / Variants\n- `apply`: runs the pending action's `apply(reason, extra?)` callback and returns its content.\n- `discard` with reject callback: runs `reject(reason, extra?)` and returns that callback's content when non-`undefined`.\n- `discard` without reject callback, or with a reject callback returning `undefined`: returns the built-in `Discarded: ...` text payload.\n- `discard` with no pending action at all: returns `Nothing to discard; no pending action remains.` as a success result.\n- Queued handler: one in-flight tool-choice queue invoker, used by preview producers such as `ast_edit`.\n- Standing handler: long-lived mode-owned handler, used as a fallback when no queue invoker is active.\n\n## Side Effects\n- Session state\n - Consumes or invokes the current pending action through the session tool-choice queue or standing handler; `resolve` does not maintain its own stack.\n - Adds a `resolve-reminder` steering message when a queued preview is registered.\n - On queued apply failure, requeues the same pending action before rethrowing so the model can discard or retry instead of losing the pending preview.\n- User-visible prompts / interactive UI\n - The visible effect depends on the preview-producing tool and the resolve renderer.\n - Renderer result blocks show `Accept`, `Discard`, or `Failed`, include the pending action label, and display the reason.\n- Background work / cancellation\n - `untilAborted(...)` lets abort signals interrupt resolution before or while the callback awaits.\n\n## Limits & Caps\n- Hidden tool: `ResolveTool.hidden = true`, and normal requested-tool filtering removes `resolve`; `createTools(...)` adds it separately as a hidden tool.\n- Exactly one active queue invoker is consulted per call via `session.peekQueueInvoker()`; if none exists, one standing handler may be consulted via `session.peekStandingResolveHandler()`.\n- There is no independent queue depth cap in this tool; ordering follows the shared tool-choice queue and mode-owned standing handler lifecycle.\n\n## Errors\n- `apply` with no pending action or standing handler: throws `ToolError(\"No pending action to resolve. Nothing to apply or discard.\")`. `discard` in the same situation succeeds with `Nothing to discard; no pending action remains.` instead of erroring.\n- `apply` callback throws `ToolError`: the original `ToolError` propagates.\n- `apply` callback throws any other value: `resolve` wraps it as `ToolError(\"Apply failed: <message>\")` after running `onApplyError` when present.\n- `reject` callback exceptions propagate without the apply-specific wrapper.\n- Aborts during `untilAborted(...)` surface as the underlying abort error from the utility.\n\n## Notes\n- `reason` and `extra` are passed through; `resolve` itself does not interpret them.\n- `queueResolveHandler(...)` is the canonical built-in preview integration point; custom tools use `pushPendingAction(...)`, which the loader forwards into the same mechanism.\n- Standing handlers let modes accept `resolve` invocations without forcing the tool choice every turn.\n- `sourceResultDetails` is added only when the apply/reject callback returned a non-null `details` field; custom pending-action `details` are not forwarded automatically by the loader.\n",
@@ -108,7 +110,7 @@ export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
108
110
  "tools/ssh.md": "# ssh\n\n> Execute one remote command on a discovered SSH host.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/ssh.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/ssh.md`\n- Key collaborators:\n - `packages/coding-agent/src/ssh/ssh-executor.ts` — runs `ssh`, captures output\n - `packages/coding-agent/src/ssh/connection-manager.ts` — master-connection reuse, host probing\n - `packages/coding-agent/src/ssh/sshfs-mount.ts` — optional `sshfs` mount side effect\n - `packages/coding-agent/src/discovery/ssh.ts` — discovers host configs\n - `packages/coding-agent/src/capability/ssh.ts` — canonical host shape\n - `packages/coding-agent/src/session/streaming-output.ts` — tail streaming, truncation, artifacts\n - `packages/coding-agent/src/tools/tool-timeouts.ts` — timeout clamp rules\n - `packages/utils/src/dirs.ts` — user/project ssh config paths\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `host` | `string` | Yes | Host name key from discovered SSH config entries, not an arbitrary hostname/IP. |\n| `command` | `string` | Yes | Remote command string passed to `ssh` as the remote command. |\n| `cwd` | `string` | No | Remote working directory. The tool prepends a shell-specific `cd`/`Set-Location` wrapper. |\n| `timeout` | `number` | No | Timeout in seconds. Default `60`; clamped to `1..3600`. |\n\n## Outputs\nThe tool returns a standard text tool result built in `packages/coding-agent/src/tools/ssh.ts`:\n\n- `content`: one text block containing combined remote stdout+stderr, or `\"(no output)\"` when empty.\n- `details.meta.truncation`: present when output exceeded the in-memory tail window; derived from the executor summary.\n\nStreaming behavior:\n\n- While the command runs, `onUpdate` receives tail-only text snapshots built from `TailBuffer` in `packages/coding-agent/src/session/streaming-output.ts`.\n- Final output is single-shot after process exit.\n\nSide-channel artifacts:\n\n- When session artifact allocation is available and output exceeds the spill threshold, full output is written to a session artifact file and the returned summary carries its `artifactId` internally.\n- The ssh tool itself does not print the `artifact://...` URI into the result text.\n\nFailure behavior:\n\n- Unknown host, missing host config, timeout, cancellation, SSH startup failure, key validation failure, or non-zero remote exit all surface as thrown `ToolError`s.\n- Non-zero remote exit includes captured output plus `Command exited with code N`.\n\n## Flow\n1. `loadSshTool()` in `packages/coding-agent/src/tools/ssh.ts` calls `loadCapability(sshCapability.id, { cwd: session.cwd })` to discover hosts.\n2. `packages/coding-agent/src/discovery/ssh.ts` loads host entries from, in this order: project managed ssh config, user managed ssh config, `ssh.json` in the repo root, `.ssh.json` in the repo root.\n3. `getSSHConfigPath(\"project\")` and `getSSHConfigPath(\"user\")` in `packages/utils/src/dirs.ts` resolve those managed files to `.omp/ssh.json` in the project and `~/.omp/agent/ssh.json` in the user config dir. This tool does not read `~/.ssh/config`.\n4. Capability loading deduplicates by host name with first item winning; provider order is priority-sorted and the SSH JSON provider registers at priority `5`.\n5. `loadHosts()` in `packages/coding-agent/src/tools/ssh.ts` builds `hostsByName` and drops later duplicates again with `if (!hostsByName.has(host.name))`.\n6. Tool description text is built from `packages/coding-agent/src/prompts/tools/ssh.md` plus an `Available hosts:` list. Each host entry calls `getCachedHostInfoSync()` to show detected shell/OS when cached; otherwise it renders `detecting...`.\n7. On execute, `SshTool.execute()` rejects any `host` not in the discovered host-name set.\n8. `ensureHostInfo()` in `packages/coding-agent/src/ssh/connection-manager.ts` ensures an SSH master connection exists, loads cached host info from disk if present, and probes remote OS/shell when cache is missing or stale.\n9. `buildRemoteCommand()` in `packages/coding-agent/src/tools/ssh.ts` prepends a cwd change when `cwd` is provided:\n - Unix-like or Windows compat shells: `cd -- '<cwd>' && <command>`\n - Windows PowerShell: `Set-Location -Path '<cwd>'; <command>`\n - Windows cmd: `cd /d \"<cwd>\" && <command>`\n10. `clampTimeout(\"ssh\", rawTimeout)` applies the `1..3600` second clamp from `packages/coding-agent/src/tools/tool-timeouts.ts`.\n11. `executeSSH()` in `packages/coding-agent/src/ssh/ssh-executor.ts` calls `ensureConnection(host)` again, opportunistically mounts the remote host root with `sshfs` if available, optionally wraps the command in `bash -c` or `sh -c` for Windows compat mode, then spawns `ssh` with `ptree.spawn`.\n12. Output from both stdout and stderr is piped into one `OutputSink`; chunks are sanitized and forwarded to streaming updates through `streamTailUpdates()`.\n13. On normal exit, the sink returns combined output plus truncation counters. On timeout or abort, `executeSSH()` returns `cancelled: true` and prefixes the output with a notice line such as `[SSH: ...]` or `[Command aborted: ...]`.\n14. `SshTool.execute()` converts `cancelled: true` into `ToolError`, converts non-zero exit codes into `ToolError`, otherwise returns the text result with truncation metadata.\n\n## Modes / Variants\n- **Tool unavailable**: `loadSshTool()` returns `null` when discovery finds no hosts, so the tool is not registered for that session.\n- **Unix-like target**: remote command is passed through directly, with optional `cd -- ... &&` prefix.\n- **Windows native shell**: cwd wrapper uses PowerShell `Set-Location` or cmd `cd /d`; command otherwise runs in the remote default Windows shell.\n- **Windows compat shell**: if host probing finds `bash` or `sh` on Windows, `executeSSH()` wraps the remote command as `bash -c '...'` or `sh -c '...'`. Host config can force compat on/off with `compat`.\n- **Cached vs probed host info**: shell/OS detection comes from in-memory cache, persisted JSON under the remote-host dir, or a fresh probe over SSH.\n- **Truncated vs untruncated output**: small output stays in memory; large output keeps only the last 50 KiB in memory and may spill full output to an artifact file.\n\n## Side Effects\n- Filesystem\n - Reads managed SSH config JSON plus legacy `ssh.json` / `.ssh.json`.\n - Validates private-key path existence and permissions before connecting.\n - Persists probed host info as JSON under the remote-host cache dir via `persistHostInfo()`.\n - May create the SSH control socket dir and, when `sshfs` exists, remote mount dirs.\n - May write full command output to a session artifact file.\n- Network\n - Opens SSH connections to the selected host.\n - May issue extra probe commands to detect OS/shell and compat shells.\n- Subprocesses / native bindings\n - Requires `ssh` on `PATH`; spawns it for connection checks, master startup, probing, and command execution.\n - May call `sshfs`, `mountpoint`, `fusermount`/`fusermount3`, or `umount`.\n - Sanitizes streamed text with `@oh-my-pi/pi-natives` text sanitization.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Uses session artifact allocation when available.\n - Registers postmortem cleanup hooks for SSH master connections and sshfs mounts.\n - Tool concurrency is `exclusive`, so the agent scheduler should not run multiple ssh tool calls concurrently.\n- Background work / cancellation\n - Process spawn receives the tool `AbortSignal`.\n - Cancellation/timeout ends the running ssh process and returns a cancelled result that the tool turns into an error.\n\n## Limits & Caps\n- Timeout defaults/clamps: `default=60`, `min=1`, `max=3600` in `packages/coding-agent/src/tools/tool-timeouts.ts`.\n- Output tail window: `DEFAULT_MAX_BYTES = 50 * 1024` in `packages/coding-agent/src/session/streaming-output.ts`.\n- Output sink spill threshold defaults to the same `50 KiB`; once exceeded, only the tail remains in memory.\n- SSH master reuse persistence: `ControlPersist=3600` in `packages/coding-agent/src/ssh/connection-manager.ts` and `packages/coding-agent/src/ssh/sshfs-mount.ts`.\n- SSH host info schema version: `HOST_INFO_VERSION = 2` in `packages/coding-agent/src/ssh/connection-manager.ts`; stale cache entries are reprobed.\n- Streaming tail buffer compacts after more than `10` pending chunks (`MAX_PENDING`) before trimming.\n\n## Errors\n- `Unknown SSH host: ... Available hosts: ...` when the model passes a host name not present in discovery.\n- `SSH host not loaded: ...` if the discovered-name set and `hostsByName` map diverge.\n- `ssh binary not found on PATH` when `ssh` is unavailable.\n- `SSH key not found: ...`, `SSH key is not a file: ...`, or `SSH key permissions must be 600 or stricter: ...` from key validation.\n- `Failed to start SSH master for <target>: <stderr>` when control-master startup fails.\n- Non-zero remote command exit becomes `ToolError` with captured output and `Command exited with code N`.\n- Timeout becomes a cancelled result with output notice `[SSH: <timeout message>]`, then `ToolError`.\n- Abort becomes a cancelled result with output notice `[Command aborted: <message>]`, then `ToolError`.\n- `sshfs` mount failures are logged and ignored in `executeSSH()`; they do not fail the tool call.\n- Discovery parse problems do not fail tool loading; they become capability warnings. If all sources are empty/invalid, the tool simply does not load.\n\n## Notes\n- Host discovery is JSON-based only. The tool does not parse OpenSSH config files.\n- Discovery expands environment variables recursively in the parsed JSON and expands `~` in `key`/`keyPath`.\n- Host names are capability keys; the model must pass the config key, not the raw hostname.\n- Commands run without a PTY. `executeSSH()` uses `ptree.spawn(..., { stdin: \"pipe\", stderr: \"full\" })` and does not request an interactive terminal.\n- The tool exposes `cwd` but no `env`, `pty`, upload, download, or explicit file-transfer fields.\n- Lower layers support an `artifactId` for full output and a `remotePath` mount target, but `SshTool.execute()` does not expose those knobs.\n- Both stdout and stderr are merged into one output stream; ordering is whatever arrives through the two streams.\n- `StrictHostKeyChecking=accept-new` and `BatchMode=yes` are always set for connection checks, master startup, and command runs.\n- Connection reuse is keyed by discovered host name, not by raw target tuple alone.\n- `closeAllConnections()` and sshfs unmount cleanup run through postmortem hooks, not per-call teardown.\n",
109
111
  "tools/task.md": "# task\n\n> Spawn subagents — one per call, or a `tasks[]` batch per call (`task.batch`, default on). With `async.enabled=true`, spawns run in the background; otherwise the call blocks until they finish.\n\n## Source\n- Entry: `packages/coding-agent/src/task/index.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/task.md`\n- Key collaborators:\n - `packages/coding-agent/src/task/types.ts` — dynamic schema, progress/result types, output caps.\n - `packages/coding-agent/src/task/discovery.ts` — discover project/user/plugin/bundled agents.\n - `packages/coding-agent/src/task/agents.ts` — bundled agent definitions and frontmatter parsing.\n - `packages/coding-agent/src/task/executor.ts` — create child sessions, run subagents, collect output, hand finished sessions to the lifecycle manager.\n - `packages/coding-agent/src/registry/agent-lifecycle.ts` — idle-TTL parking and revival of finished subagents.\n - `packages/coding-agent/src/registry/agent-registry.ts` — process-global agent directory (`running | idle | parked | aborted`).\n - `packages/coding-agent/src/async/job-manager.ts` — background job registration, progress, and result delivery.\n - `packages/coding-agent/src/task/parallel.ts` — `Semaphore` used for the session-scoped concurrency bound.\n - `@oh-my-pi/pi-natives` (`crates/pi-iso`) — isolation PAL: `isoResolve` / `isoStart` / `isoStop` backend resolution and fallback.\n - `packages/coding-agent/src/task/worktree.ts` — isolation mode mapping (`parseIsolationMode`) and lifecycle (`ensureIsolation`/`cleanupIsolation`), patch capture, branch merge.\n - `packages/coding-agent/src/task/output-manager.ts` — session-scoped `agent://` id allocation.\n - `packages/coding-agent/src/task/name-generator.ts` — default AdjectiveNoun agent ids.\n - `packages/coding-agent/src/internal-urls/agent-protocol.ts` — resolve `agent://<id>` to saved subagent output.\n - `packages/coding-agent/src/internal-urls/history-protocol.ts` — resolve `history://<id>` to a concise transcript.\n - `packages/coding-agent/src/tools/index.ts` — tool registration and recursion-depth gating.\n - `packages/coding-agent/src/sdk.ts` — child-session router/tool wiring and per-subagent `AgentOutputManager`.\n - `docs/task-agent-discovery.md` — deeper discovery and precedence notes.\n\n## Inputs\n\nThe wire schema is shape-swapped by `task.batch` (default on). One unit of work is the task item `{ id?, description?, role?, assignment, isolated? }` (`isolated` only when `task.isolation.mode` is not `none`):\n\n- **Batch shape** (`task.batch` on): `{ agent, context, tasks: item[] }` — one subagent per item, all run under the same fan-out rules. `context` is **required** shared background rendered into every spawned subagent's system prompt (`CONTEXT` section); `isolated` is per item.\n- **Flat shape** (`task.batch` off): `{ agent, ...item }` — exactly one spawn per call. Shared background goes into a `local://` file (e.g. `local://ctx.md`) that each assignment references; subagents share the parent's `local://` root.\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `agent` | `string` | Yes | Agent type to spawn (both shapes). |\n| `context` | `string` | Yes (batch) | Shared background prepended to every spawn of the call via the subagent system prompt. Rejected when `task.batch` is off. |\n| `tasks` | `array` | Yes (batch) | One task item per subagent. Provided ids must be unique within the call (case-insensitive). Rejected when `task.batch` is off. |\n| `id` | `string` | No | Stable agent id, schema max length 48. Defaults to a generated AdjectiveNoun name. Uniquified per session by `AgentOutputManager`. Item field in batch shape, top-level in flat shape. |\n| `description` | `string` | No | UI label only; the subagent never sees it. Item field in batch shape, top-level in flat shape. |\n| `role` | `string` | No | Specialist role/expertise the subagent embodies; schema max length 256 (`ROLE_INPUT_MAX`). The full trimmed text feeds the subagent's system-prompt identity (`role` preamble field); a one-line normalized form (`oneLineLabel`, `ROLE_LABEL_MAX = 80`) becomes its registry/roster display name, falling back to the agent type name when omitted. Item field in batch shape, top-level in flat shape. |\n| `assignment` | `string` | Yes | The work — complete, self-contained instructions. Empty-after-trim is rejected. Item field in batch shape, top-level in flat shape. |\n| `isolated` | `boolean` | No | Run in an isolated workspace and return patches. Exists only when `task.isolation.mode` is not `none`; per item in batch shape, top-level in flat shape. Isolated agents are torn down at completion — not revivable. |\n\nRuntime stays permissive: the flat form is accepted even while `task.batch` is on (internal callers such as the commit flow's `analyze_files`, and stale transcripts). The model only ever sees one shape.\n\nThere is no per-call `schema` parameter. Structured output comes from the agent definition's `output` frontmatter, the inherited parent session schema, or — for ad-hoc workflows — the eval bridge's `agent(prompt, schema)`.\n\n## Outputs\n\nThe tool returns one text block plus `details: TaskToolDetails`.\n\nBackground response (`async.enabled=true`):\n- `content`: `` Spawned agent `<id>` (job `<jobId>`). The result will be delivered when it yields. ... `` plus a coordination hint (`irc` DM when enabled, otherwise `job`). A batch call instead returns `` Spawned N background agents using <agent>. ... `` with a per-agent `- `<id>` (job `<jobId>`)` listing.\n- `details`: `{ projectAgentsDir: null, results: [], totalDurationMs: 0, progress: [<seeded AgentProgress per spawn>], async: { state: \"running\", jobId, type: \"task\" } }`. A batch call keeps one shared `progress[]` snapshot; `async.jobId` is the first started job and `async.state` aggregates (\"running\" until every job settles, \"failed\" if any spawn failed).\n- Live progress keeps streaming into the same tool block via `onUpdate(...)`; each final result arrives later as an async-result injection into the parent conversation. The delivery text appends a follow-up hint: `` <id> is now idle — message it via `irc` to follow up; transcript at history://<id> `` (aborted variant points at the transcript only).\n\nSettled response (`async.enabled=false`, no job manager, blocking agent, or async job body):\n- `content`: summary rendered from `packages/coding-agent/src/prompts/tools/task-summary.md` with a preview capped at 5000 chars; `agent://<id>` holds the full output. A sync batch concatenates the per-spawn summaries.\n- `details.results`: one `SingleResult` per spawn; `usage`, `outputPaths` populated (aggregated across spawns for a sync batch).\n\n`SingleResult` includes:\n- identity: `index`, `id`, `agent`, `agentSource`, `description`, optional `assignment`\n- status: `exitCode`, optional `error`, optional `aborted`, optional `abortReason`, optional `retryFailure`\n- output: `output`, `stderr`, `truncated`, `durationMs`, `tokens`, `requests`, optional `contextTokens`/`contextWindow`\n- artifact metadata: `outputPath?`, `patchPath?`, `branchName?`, `nestedPatches?`, `outputMeta?`\n- extracted tool data: `extractedToolData?` from registered subprocess tool handlers such as `yield` and `report_finding`\n\nArtifacts and side channels:\n- Every subagent with an artifacts dir writes `<id>.md`; `agent://<id>` resolves to that file.\n- If the output file is JSON, `agent://<id>/<path>` and `agent://<id>?q=<query>` perform JSON extraction.\n- Each subagent gets `<id>.jsonl` session history when the parent persists artifacts; `history://<id>` renders it as a concise transcript (works for live and parked agents).\n- Isolated patch mode writes `<id>.patch` before merge.\n\n## Flow\n1. `TaskTool.create(...)` discovers agents once per cwd through a process-level memo (`discoverAgentsForCreate`) to render the dynamic prompt description.\n2. `execute(...)` repairs raw params (`repairTaskParams`), then validates: `schema` is always rejected; `tasks`/`context` are rejected unless `task.batch` is on; batch calls need a non-empty `tasks` (per-item assignments, unique provided ids), a non-empty shared `context`, and no top-level `assignment`; flat calls need `assignment`. The call is then normalized into its spawn list (`resolveSpawnItems`).\n3. Sync execution runs when `async.enabled=false`, the session has no `AsyncJobManager` (orphaned host), or the selected agent definition declares `blocking: true`; the call then runs every spawn through `#executeSync(...)` inline under the session-scoped semaphore.\n4. Background execution runs only when `async.enabled=true` and the session has an `AsyncJobManager`:\n - agent ids are allocated up front via `AgentOutputManager.allocate(item.id || generateTaskName())`, one per spawn;\n - one `type: \"task\"` job per spawn is registered with `session.asyncJobManager` (`id` = agent id, `queued: true`, `ownerId` = caller agent id) and the tool returns immediately;\n - each job body acquires the session-scoped `Semaphore` (one per `TaskTool` instance, sized from `task.maxConcurrency` at first use), marks the job running, runs `#executeSync(...)` with that spawn's params, and reports progress through the shared `buildAsyncDetails`/`onUpdate`;\n - a failed or aborted run throws `TaskJobError` so the job lands `failed`, but the agent itself stays registered and interrogable.\n5. `#executeSync(...)` runs the spawn path (`#runSpawn`), which rediscovers agents from disk, so runtime resolution can differ from the create-time description.\n6. It resolves the requested agent, rejects unknown or settings-disabled agents, and enforces parent spawn policy plus `PI_BLOCKED_AGENT` self-recursion prevention.\n7. Output schema priority: agent frontmatter `output` → inherited parent session schema (the call itself never carries one).\n8. Plan mode swaps in an `effectiveAgent` with a read-only tool subset and plan-mode prompt; `runSubprocess(...)` receives the effective agent.\n9. If `isolated`, it requires a git repo (`getRepoRoot(...)` / `captureBaseline(...)`), maps `task.isolation.mode` to a backend-kind hint (`parseIsolationMode`), and materializes the workspace via the natives PAL (`ensureIsolation` → `isoResolve`/`isoStart`), walking the candidate list when a backend is unavailable.\n10. Artifacts dir comes from the parent session file when available, otherwise a temp dir. When the session is executing an approved plan, the plan reference is handed to the subagent.\n11. Non-isolated spawns call `runSubprocess(...)` directly with parent cwd; isolated spawns run inside the isolation workspace, then commit to a branch (`mergeMode === \"branch\"`) or capture a patch, and always clean up the workspace.\n12. `runSubprocess(...)` creates a child agent session with an isolated settings snapshot (forcing `async.enabled = false` and `bash.autoBackground.enabled = false` — subagents are internally synchronous), child `agentId` equal to the allocated id, child internal URL router/`AgentOutputManager`, output schema, the shared `context` (batch calls) in the system prompt's `CONTEXT` section, the per-spawn `role` (when given, via `resolveSubagentDisplayName`) as the subagent's system-prompt persona and registry/roster display name, and the IRC peer roster in the system prompt.\n13. Child tool availability: explicit `agent.tools` if provided; auto-add `task` when the agent has `spawns` and depth allows; strip `task` at `task.maxRecursionDepth`; ensure `irc` is present in explicit tool lists; expand `exec` to `eval` + `bash`; strip parent-owned `todo`.\n14. The child must finish through the hidden `yield` tool; up to 3 reminder prompts, the last forcing `toolChoice = yield` when supported. `finalizeSubprocessOutput(...)` reconciles raw text, `yield` payloads, structured schemas, `report_finding` data, and abort states.\n15. End-of-run lifecycle (keep-alive, in `runSubprocess`'s finalizer):\n - hard abort (caller signal / wall-clock / budget) → registry status `aborted`, session disposed — terminal;\n - isolated run → status `parked` without a reviver (workspace is merged + cleaned, so the session is not revivable; transcript stays readable via `history://`), then session disposed and detached;\n - everything else (success and failure alike) → status `idle` with the live session attached, and `AgentLifecycleManager.global().adopt(id, { idleTtlMs, revive })` arms the park timer. The reviver reopens the session JSONL (park closed the writer, so the single-writer lock is taken cleanly).\n16. Lifecycle thereafter: `idle` agents are parked after `task.agentIdleTtlMs` (session disposed; `AgentRef` + session file retained); messaging (`irc`) or the Agent Hub revives them back to `idle`. `\"Main\"` is never parked.\n\n## Modes / Variants\n- Execution mode\n - Background job — `async.enabled=true`; spawns go through `AsyncJobManager`.\n - Sync inline — `async.enabled=false`, no job manager, or `blocking: true` agent.\n- Batch mode (`task.batch`, default on)\n - on — `{ agent, context, tasks[] }`: one independent spawn per item, required `context` shared across the call's spawns, `isolated` per item. Lifecycle, revival, and concurrency semantics match N parallel single calls.\n - off — single spawn per call; `tasks`/`context` are rejected and removed from the schema.\n- Isolation mode (`task.isolation.mode`): `none`, `auto`, `apfs`, `btrfs`, `zfs`, `reflink`, `overlayfs`, `projfs`, `block-clone`, `rcopy` (legacy `worktree`, `fuse-overlay`, `fuse-projfs` accepted for back-compat); the PAL resolves the actual backend with fallback.\n- Isolation merge strategy: patch mode (capture/apply root patches) or branch mode (commit to `omp/task/<id>`, cherry-pick into parent).\n- Agent source precedence: project custom agents, then user custom agents, then bundled agents (`explore`, `plan`, `designer`, `reviewer`, `task`, `quick_task`, `librarian`, `oracle`).\n\n## Side Effects\n- Filesystem\n - Writes `<id>.jsonl` and `<id>.md` under the session artifacts dir or a temp task dir; isolated patch mode writes `<id>.patch`.\n - Creates/removes worktrees or overlay mount directories; branch mode creates temporary worktrees and task branches.\n- Network\n - Child sessions may use whichever networked tools/models their active tool set permits.\n - MCP proxy tools can call existing parent MCP connections with a 60_000 ms timeout.\n- Subprocesses / native bindings\n - Isolation backends run through the `pi-natives` PAL (`crates/pi-iso`): kernel `overlay` with `fuse-overlayfs`/`fusermount[3]` fallback on Linux, APFS/Btrfs/ZFS/reflink clones, ProjFS on Windows, recursive copy as last resort.\n - Git operations for baseline capture, patch apply, worktrees, branches, stash, cherry-pick, commits.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Creates child `AgentSession` instances with isolated settings snapshots; finished sessions stay registered in the process-global `AgentRegistry` as `idle`/`parked` until process teardown or explicit release.\n - With `async.enabled=true`, registers one async job per spawn in `session.asyncJobManager`; completion is injected into the parent as an async-result message.\n - Arms idle-TTL timers in `AgentLifecycleManager` (unref'd; they never hold the process open).\n - Emits `task:subagent:event`, `task:subagent:progress`, and `task:subagent:lifecycle` on the parent event bus.\n - Allocates session-scoped output ids through `AgentOutputManager` so `agent://` stays unique across invocations.\n - Shares the parent `local://` root and `ArtifactManager` with subagents.\n- Background work / cancellation\n - `job cancel` (or parent tool-call abort) cancels background jobs; parent tool-call abort cancels sync runs through the call signal. A hard-aborted run lands `aborted` and is torn down.\n - Missing-`yield` recovery sends up to three internal reminder prompts to the child session.\n\n## Limits & Caps\n- Concurrency: one session-scoped `Semaphore` sized from `task.maxConcurrency` at first use (later setting changes do not resize it) bounds concurrent subagents across parallel `task` calls — both async job bodies and the sync fallback acquire it.\n- Idle TTL: `task.agentIdleTtlMs`, default `420_000` ms (7 min); `<= 0` disables parking and keeps idle sessions live until exit.\n- Per-subagent output truncation: `MAX_OUTPUT_BYTES = 500_000` and `MAX_OUTPUT_LINES = 5000` in `packages/coding-agent/src/task/types.ts` (overridable via `PI_TASK_MAX_OUTPUT_BYTES` / `PI_TASK_MAX_OUTPUT_LINES`). Full raw output is still written to `<id>.md`.\n- Progress coalescing: `PROGRESS_COALESCE_MS = 150`; recent-output tail: `RECENT_OUTPUT_TAIL_BYTES = 8 * 1024` (last 8 non-empty lines).\n- Missing-`yield` reminder retries: `MAX_YIELD_RETRIES = 3`; MCP proxy timeout: `MCP_CALL_TIMEOUT_MS = 60_000` — both in `packages/coding-agent/src/task/executor.ts`.\n- Agent id schema cap: `id` `maxLength: 48` in `packages/coding-agent/src/task/types.ts`. Prompt text says ids should be `≤32` chars; this mismatch is real.\n- Soft request budget (`task.softRequestBudget`) and wall clock (`task.maxRuntimeMs`) apply to every spawn.\n- Recursion depth gate: `task.maxRecursionDepth`; `packages/coding-agent/src/tools/index.ts` hides the `task` tool at or beyond the limit, and `runSubprocess(...)` also strips child `task` access at max depth.\n- Final inline summary preview uses `fullOutputThreshold = 5000` chars in `packages/coding-agent/src/task/index.ts`; `agent://<id>` points to the full artifact.\n\n## Errors\n- Parameter validation failures are returned as normal tool text with empty `results`:\n - `schema` (never accepted)\n - `tasks` / `context` while `task.batch` is disabled\n - missing/empty `agent`\n - batch calls: missing/empty `tasks`, an item without `assignment`, duplicate provided ids, missing shared `context`, top-level `assignment` alongside `tasks`\n - flat calls: missing/empty `assignment`\n - unknown or settings-disabled agent, spawn-policy denial, requesting `isolated` while isolation mode is `none`\n- Isolated execution without a git repo returns `Isolated task execution requires a git repository. ...`; unavailable backends fall back through the PAL candidate list (reported via `fellBack`/`fallbackReason`), other backend errors rethrow, and exhausting every candidate errors with the fallback reason.\n- Job registration failure returns `Failed to start background task job(s): ...`; a batch that schedules only some jobs reports the failed ids in the immediate text and keeps the started ones running.\n- Child failures surface as `SingleResult.exitCode = 1` with `stderr`/`error` populated; the async job is marked failed but the delivery text still carries the output plus a follow-up/transcript hint.\n- If the child omits `yield`, `finalizeSubprocessOutput(...)` injects warnings such as `SYSTEM WARNING: Subagent exited without calling yield tool after 3 reminders.`\n- `agent://<id>` resolution errors are model-visible when another tool reads them: no session, no artifacts dir, missing id, conflicting extraction syntax, or invalid JSON for extraction.\n\n## Notes\n- Parallelism is parallel `task` calls in one assistant message — or, with `task.batch`, a `tasks[]` batch in one call; either way the session-scoped semaphore bounds the fan-out. With `async.enabled=true`, each spawn is an independent background job.\n- Shared background convention without batch mode: write it once to a `local://` file and reference that path in each assignment — subagents share the parent's `local://` root. With `task.batch`, the required `context` parameter carries the shared background directly into each spawn's system prompt.\n- Prefer messaging an existing agent (`irc`) over a fresh spawn for follow-up work: it already holds the relevant context. `irc` op:\"list\" shows idle/parked candidates; messaging a parked agent revives it. `history://<id>` shows what an agent has done.\n- `irc` availability is derived, not configured (`isIrcEnabled` in `packages/coding-agent/src/tools/irc.ts`): it exists exactly when there is someone to message — the session can spawn subagents, or it is a subagent itself. Messaging is the only follow-up path to a finished subagent, so task without irc would strand idle agents.\n- Subagents are internally synchronous: the executor forces `async.enabled = false` and `bash.autoBackground.enabled = false` in the child settings snapshot, so there are no fire-and-forget grandchildren.\n- Agent discovery precedence is first-wins by exact name: project `.omp` agents dir before the user `.omp` dir (task agents only load from `.omp` roots; `.claude`/`.codex`/`.gemini` agent dirs are skipped), Claude plugin agent dirs after config dirs, bundled agents last. Create-time discovery is memoized per cwd for the prompt description; execution-time discovery stays fresh.\n- Child sessions do not inherit conversation history. Built-in carry-over is the workspace tree/skills/context files, the shared `local://` root, and the approved-plan reference when one exists.\n- When the parent passes `mcpManager`, child sessions disable standalone MCP discovery and get proxy tools that reuse parent connections.\n- Branch-mode merge temporarily stashes the parent repo before cherry-picking; a stash-pop conflict does not unmerge the cherry-picked commits — they stay on HEAD, the stash entry is preserved, and the conflict is surfaced separately as `stashConflict`. Patch mode only applies the combined root patch when `git.patch.canApplyText(...)` succeeds; failures leave the `.patch` artifact for manual handling.\n- Nested git repos are diffed independently inside isolated workspaces and merged separately with `applyNestedPatches(...)`.\n- `agent://` ids are name-based (`Task` first, `Task-2`/`Task-3` only when the name repeats, nested like `Parent.Child`) by `AgentOutputManager`; this is what prevents artifact collisions across repeated or nested invocations.\n",
110
112
  "tools/todo.md": "# todo\n\n> Applies ordered mutations to the session todo list and returns a text summary plus the full phase/task state.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/todo.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/todo.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/index.ts` — registers tool, exposes session hooks, gates availability.\n - `packages/coding-agent/src/modes/controllers/event-controller.ts` — updates the visible todo UI on tool completion.\n - `packages/coding-agent/src/session/agent-session.ts` — stores cached phases, strips done/dropped tasks on session resume, emits failure reminders.\n - `packages/coding-agent/src/modes/controllers/todo-command-controller.ts` — `/todo` command path, custom-entry persistence, transcript reminder injection.\n - `packages/coding-agent/src/tools/render-utils.ts` — collapsed-preview cap for renderer trees.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `ops` | `TodoOpEntry[]` | Yes | Ordered operations to apply. `minItems: 1`.\n\n### `TodoOpEntry`\n\n| Op | Required fields | Optional fields | Effect |\n| --- | --- | --- | --- |\n| `init` | `list` **or** flat `items` | `phase` (names the phase for the flat `items` form; defaults to `Tasks`) | Replaces the entire list — with `list`, uses the given phases; with a flat `items` array, synthesizes one phase. Every new task starts `pending` before normalization. |\n| `start` | `task` | None | Marks one task `in_progress`; any other `in_progress` task is demoted to `pending`. |\n| `done` | `task` or `phase` or neither | None | Marks the target task, phase, or all tasks `completed`. |\n| `drop` | `task` or `phase` or neither | None | Marks the target task, phase, or all tasks `abandoned`. |\n| `rm` | `task` or `phase` or neither | None | Removes the target task, clears the phase's task list, or clears all task lists. |\n| `append` | `phase`, `items` | None | Appends new `pending` tasks to a phase; creates the phase if missing. |\n| `view` | None | None | Echoes the current list. A call whose ops are all `view` is read-only: no normalization, no state write. |\n\n### Fields used inside ops\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `op` | `\"init\" | \"start\" | \"done\" | \"rm\" | \"drop\" | \"append\" | \"view\"` | Yes | Operation discriminator. |\n| `list` | `{ phase: string; items: string[] }[]` | For `init` (unless a flat `items` list is given) | Full replacement payload. Each `items` array has `minItems: 1`. |\n| `task` | `string` | For `start`; for task-targeted `done`/`drop`/`rm` | Exact task content match. |\n| `phase` | `string` | For `append`; for phase-targeted `done`/`drop`/`rm`; optional for a flat `init` | Exact phase name match, except `append` lazily creates a missing phase and a flat `init` synthesizes one (default `Tasks`). |\n| `items` | `string[]` | For `append`; or as a flat `init` payload | Tasks to append, or the full task list for a flat `init`. `minItems: 1`. |\n\n## Outputs\nThe tool returns a single-shot `AgentToolResult`:\n\n- `content`: one text part containing the summary from `formatSummary(...)`.\n - Empty final state with no errors: `Todo list cleared.` (`Todo list is empty.` for a pure-`view` call).\n - Non-empty final state: remaining-item list, current phase progress, then a per-phase tree.\n - If any op produced validation/runtime errors, the summary starts with `Errors: ...` and the result is marked `isError: true`; the whole batch is discarded — the returned and persisted state stay at the pre-call list.\n- `details`:\n - `phases: TodoPhase[]`\n - `storage: \"session\" | \"memory\"`\n - `completedTasks?: TodoCompletionTransition[]` when a task changed from non-completed to `completed` during the batch\n\n`TodoPhase` / `TodoItem` state model:\n\n- `TodoPhase`: `{ name: string, tasks: TodoItem[] }`\n- `TodoItem`: `{ content: string, status: \"pending\" | \"in_progress\" | \"completed\" | \"abandoned\" }`\n\nThe TUI renderer (`todoToolRenderer`) merges call and result into one transcript block and renders phases as a tree. Collapsed transcript previews cap tree items at `PREVIEW_LIMITS.COLLAPSED_ITEMS` (`8`).\n\n## Flow\n1. `TodoTool.execute(...)` clones the current cached phases from `session.getTodoPhases?.() ?? []` (`packages/coding-agent/src/tools/todo.ts`).\n2. `applyParams(...)` walks `params.ops` in order and applies each entry with `applyEntry(...)`.\n3. Each op mutates the working phase array:\n - `initPhases(...)` rebuilds the list from scratch.\n - `start` resolves a task by exact `content`, demotes every other `in_progress` task to `pending`, then marks the target `in_progress`.\n - `done` / `drop` use `getTaskTargets(...)` to target one task, one phase, or every task.\n - `rm` removes one task, clears one phase's `tasks`, or clears all phases' task arrays.\n - `appendItems(...)` resolves or creates the target phase and pushes new `pending` tasks unless the same task content already exists anywhere.\n4. Missing task/phase references are recorded in an `errors` array by `resolveTaskOrError(...)` / `resolvePhaseOrError(...)`; execution continues through the rest of the batch, but any error discards the batch's mutations at the end.\n5. After the full batch, `normalizeInProgressTask(...)` enforces the single-active-task invariant:\n - if multiple tasks are `in_progress`, only the first stays active and the rest become `pending`;\n - if none are `in_progress`, the first `pending` task in phase/task order is auto-promoted to `in_progress`.\n6. `execute(...)` stores the updated phases with `session.setTodoPhases?.(...)` only when the batch produced no errors and was not pure-`view`; a failed batch is discarded wholesale (persisting a half-applied batch would make the natural retry hit \"already exists\"). `storage` is `\"session\"` when `session.getSessionFile()` exists, else `\"memory\"`.\n7. `getCompletionTransitions(...)` compares the previous and updated phases (skipped for failed or pure-`view` calls); newly completed tasks are returned in `details.completedTasks`.\n8. The agent runtime also watches `todo` tool results in `packages/coding-agent/src/session/agent-session.ts`; successful results refresh cached todos, failed results inject a hidden next-turn reminder telling the model that todo progress is not visible until it retries.\n9. The event controller updates the visible todo UI from `result.details.phases` on success, or shows a warning on error (`packages/coding-agent/src/modes/controllers/event-controller.ts`).\n\n## Modes / Variants\n### State transitions\n\n| Current status | `start` | `done` | `drop` | `rm` | `append` |\n| --- | --- | --- | --- | --- | --- |\n| `pending` | `in_progress` on target | `completed` | `abandoned` | Removed | New tasks enter as `pending` |\n| `in_progress` | Target stays `in_progress`; non-target active tasks become `pending` | `completed` | `abandoned` | Removed | No status change |\n| `completed` | Can be set back to `in_progress` if targeted | Stays `completed` | Becomes `abandoned` if targeted | Removed | No status change |\n| `abandoned` | Can be set back to `in_progress` if targeted | Becomes `completed` if targeted | Stays `abandoned` | Removed | No status change |\n\nNormalization then re-applies the single-active-task rule after the full op batch.\n\n### Op targeting rules\n- `done`, `drop`, `rm`:\n - `task` set: affect one exact-content task.\n - else `phase` set: affect every task in that exact-name phase.\n - else: affect every task in every phase.\n- `append` is the only op that creates a missing phase.\n- `init` discards previous phases entirely.\n\n### Markdown round-trip helpers\nThe same file also exposes non-tool helpers used by `/todo`:\n- `phasesToMarkdown(...)` serializes phases as headings plus checklist items (`[ ]`, `[/]`, `[x]`, `[-]`).\n- `markdownToPhases(...)` parses that format, defaults orphan tasks into a `Todos` phase, accepts `>` as an `in_progress` marker and `~` as `abandoned`, and runs the same normalization step.\n\n## Side Effects\n- Filesystem\n - None in the tool itself.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Mutates the session todo cache through `setTodoPhases`.\n - `storage` reports whether the session has a backing session file, but the tool does not append a custom session entry itself.\n - Successful tool-result messages carry `details.phases`; `getLatestTodoPhasesFromEntries(...)` can reconstruct state later from those transcript entries.\n - Failed `todo` results cause `agent-session` to enqueue a hidden next-turn reminder (`customType: \"todo-error-reminder\"`).\n- User-visible prompts / interactive UI\n - Transcript block is rendered by `todoToolRenderer` and merged with the call line.\n - `event-controller` updates the visible todo panel from successful results.\n - On error, `event-controller` shows `Todo update failed...`; the visible panel may stay stale until a later successful call.\n- Background work / cancellation\n - Session-level auto-clear of `completed`/`abandoned` tasks was removed (the timer mutated canonical phases between tool calls); the TUI todo widget still clears closed entries after `tasks.todoClearDelay` (display-only, `packages/coding-agent/src/modes/interactive-mode.ts`).\n\n## Limits & Caps\n- `ops` array: `minItems: 1` (`todoSchema`).\n- `init.list[*].items`: `minItems: 1`.\n- `append.items`: `minItems: 1`.\n- Renderer collapsed preview: `PREVIEW_LIMITS.COLLAPSED_ITEMS = 8` (`packages/coding-agent/src/tools/render-utils.ts`).\n- Auto-clear delay: `tasks.todoClearDelay` default `60` seconds; `< 0` disables auto-clear, `0` clears immediately. Display-only — applied by the TUI widget (`packages/coding-agent/src/modes/interactive-mode.ts`); the setting is inert at the session level.\n- Tool execution mode: `concurrency = \"exclusive\"`, `strict = true`, `loadMode = \"discoverable\"`.\n\n## Errors\n- Ordinary bad op payloads are accumulated as human-readable strings in `errors`; the result is marked `isError: true` and the whole batch is discarded — the returned and persisted state stay at the pre-call list.\n- Error strings come from the helpers in `packages/coding-agent/src/tools/todo.ts`, including:\n - `Missing list for init operation`\n - `Missing task content`\n - `Duplicate phase \"...\" in init list` / `Duplicate task \"...\" in init list`\n - `Task \"...\" not found` with an extra empty-list hint when applicable, or a hint that tasks are referenced by content (not `task-N` IDs) when the missing content looks like an ID\n - `Missing phase name`\n - `Phase \"...\" not found`\n - `Missing phase name for append operation`\n - `Missing items for append operation`\n - `Task \"...\" already exists`\n- Ops are processed in order and an early error does not stop later ops from being attempted, but any error in the batch discards every mutation the batch made.\n- Runtime-level tool failure is handled outside the tool body: `agent-session` injects a hidden reminder and the event controller warns the user that visible progress may be stale.\n- Idempotency is op-specific:\n - `init` is a full replacement; replaying the same payload yields the same state.\n - `start`, `done`, and `drop` are effectively idempotent on an existing target state, but `start` also demotes any other active task.\n - `rm` is not idempotent for targeted removals: the second call errors because the task or phase is gone.\n - `append` is not idempotent: duplicate task content is rejected with `Task \"...\" already exists`; the whole `append` op validates up front, so a batch with any duplicate appends nothing.\n\n## Notes\n- Task lookup is exact string equality inside the tool. The model-facing prompt says task content and phase names are identifiers and should stay unique; `append` enforces task uniqueness globally, and `init` rejects duplicate phase names and duplicate task contents in its payload.\n- `findTaskByContent(...)` returns the first matching task across phases. Duplicate task contents make later targeted ops ambiguous.\n- `normalizeInProgressTask(...)` runs after the whole batch, not after each op. A single call can intentionally build an intermediate invalid state and rely on final normalization.\n- `storage: \"session\"` means the session has a session-file backing; it does not mean this tool wrote a durable custom entry.\n- Reload persistence differs by path:\n - plain `todo` calls survive in transcript tool-result details;\n - `/todo` command edits additionally append `customType: \"user_todo_edit\"` entries and inject a visible-to-model `<system-reminder>` developer message describing the manual edit.\n- On session resume, `AgentSession.#syncTodoPhasesFromBranch()` strips `completed` and `abandoned` tasks before restoring the cached list. The `/todo` command works around that by reading the latest transcript/custom-entry state so historical done/dropped tasks still appear to the user.\n- Tool availability is gated by `todo.enabled`, and the registry excludes it when `includeYield` is enabled (`packages/coding-agent/src/tools/index.ts`).\n- Subagents do not inherit `todo`; `packages/coding-agent/src/task/executor.ts` filters it out as a parent-owned tool.\n",
111
- "tools/web_search.md": "# web_search\n\n> Run one web query through the first available search provider and return LLM-formatted answer, source URLs, and optional citations.\n\n## Source\n- Entry: `packages/coding-agent/src/web/search/index.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/web-search.md`\n- Key collaborators:\n - `packages/coding-agent/src/web/search/provider.ts` — lazy provider registry; availability chain.\n - `packages/coding-agent/src/web/search/types.ts` — unified `SearchResponse` / `SearchProviderError` types.\n - `packages/coding-agent/src/web/search/render.ts` — TUI renderer details type.\n - `packages/coding-agent/src/web/search/providers/base.ts` — provider interface and shared params contract.\n - `packages/coding-agent/src/web/search/providers/utils.ts` — credential lookup; source normalization.\n - `packages/coding-agent/src/web/search/providers/anthropic.ts` — Claude web-search provider.\n - `packages/coding-agent/src/web/search/providers/brave.ts` — Brave Search API adapter.\n - `packages/coding-agent/src/web/search/providers/codex.ts` — OpenAI Codex SSE adapter.\n - `packages/coding-agent/src/web/search/providers/exa.ts` — Exa API or MCP adapter.\n - `packages/coding-agent/src/web/search/providers/gemini.ts` — Gemini grounding SSE adapter.\n - `packages/coding-agent/src/web/search/providers/jina.ts` — Jina Reader search adapter.\n - `packages/coding-agent/src/web/search/providers/kagi.ts` — Kagi provider wrapper.\n - `packages/coding-agent/src/web/search/providers/kimi.ts` — Kimi search adapter.\n - `packages/coding-agent/src/web/search/providers/parallel.ts` — Parallel provider wrapper.\n - `packages/coding-agent/src/web/search/providers/perplexity.ts` — Perplexity API / OAuth adapter.\n - `packages/coding-agent/src/web/search/providers/searxng.ts` — self-hosted SearXNG adapter.\n - `packages/coding-agent/src/web/search/providers/synthetic.ts` — Synthetic search adapter.\n - `packages/coding-agent/src/web/search/providers/tavily.ts` — Tavily search adapter.\n - `packages/coding-agent/src/web/search/providers/zai.ts` — Z.AI remote MCP adapter.\n - `packages/coding-agent/src/web/parallel.ts` — Parallel search/extract HTTP client.\n - `packages/coding-agent/src/web/kagi.ts` — Kagi HTTP client.\n - `packages/coding-agent/src/tools/index.ts` — built-in tool registration and enable flag.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `query` | `string` | Yes | Search query, passed to providers unchanged. |\n| `recency` | `\"day\" \\| \"week\" \\| \"month\" \\| \"year\"` | No | Time filter. Only providers that implement it use it; code maps it for Brave, Perplexity, Tavily, SearXNG, and Kagi. |\n| `limit` | `number` | No | Max results to return. Usually becomes the provider request's result-count parameter when `num_search_results` is absent. |\n| `max_tokens` | `number` | No | Passed through as `maxOutputTokens` / `max_tokens` only by Anthropic, Gemini, and Perplexity API-key mode. Ignored by the other providers. |\n| `temperature` | `number` | No | Passed through only by Anthropic, Gemini, and Perplexity API-key mode. Ignored by the other providers. |\n| `num_search_results` | `number` | No | Requested upstream search breadth. For most providers this is the same count used for returned sources. Perplexity is the only adapter that keeps it distinct from `limit`. |\n\n## Outputs\nThe tool returns a single text content block plus structured `details`.\n\n- `content`: `[{ type: \"text\", text: string }]`\n- `details`: `SearchRenderDetails` from `packages/coding-agent/src/web/search/render.ts`\n - `response: SearchResponse`\n - `error?: string`\n\n`text` is produced by `formatForLLM()` in `packages/coding-agent/src/web/search/index.ts`:\n\n- If `response.answer` exists, it is emitted first.\n- If sources exist, one entry per source follows (the `## Sources` header with a source count is emitted only when an answer was also produced):\n - `[n] <title> (<formatted age or published date>)`\n - ` <url>`\n - optional snippet line truncated to 240 chars.\n- If citations exist, a `## Citations` section follows with URL/title plus optional cited text truncated to 240 chars.\n- If related questions exist, a `## Related` bullet list follows.\n- If search queries exist, a `Search queries: <n>` section follows, capped to the first 3 queries and 120 chars each.\n\nFailure output is not thrown at the tool boundary when providers are unavailable or provider attempts fail. Instead the tool returns:\n\n- `content[0].text = \"Error: ...\"`\n- `details.response.provider = <last attempted provider> | \"none\"`\n- `details.error = ...`\n\nStreaming: none. `WebSearchTool.execute()` forwards its `AbortSignal` into `executeSearch()`, and `executeSearch()` passes it to providers. If the signal is aborted during fallback handling, `throwIfAborted(signal)` rethrows the cancellation instead of returning an `\"Error: ...\"` text result.\n\n## Flow\n1. `WebSearchTool.execute()` in `packages/coding-agent/src/web/search/index.ts` delegates directly to `executeSearch()`.\n2. `executeSearch()` chooses a provider list:\n - if `params.provider` is set and not `\"auto\"`, it loads that provider with `getSearchProvider()`; if `isExplicitlyAvailable()` returns true, the list is `[that provider]`, otherwise it falls back to `resolveProviderChain(authStorage, \"auto\")`.\n - otherwise it calls `resolveProviderChain()` with the module-global preferred provider from `packages/coding-agent/src/web/search/provider.ts`.\n3. `resolveProviderChain()` lazily loads each provider module on demand and returns only available providers. If a preferred provider is set, it is tried first (gated by `isExplicitlyAvailable()`), then the static `SEARCH_PROVIDER_ORDER` excluding that provider, each gated by `isAvailable()`. Providers in the excluded set (`setExcludedSearchProviders()`) are skipped entirely, including as the preferred candidate.\n4. If no providers are available, `executeSearch()` returns `Error: No web search provider configured.` with `details.response.provider = \"none\"`.\n5. For each provider in order, `executeSearch()` calls `provider.search()` with:\n - `query`,\n - `limit`, `recency`, `temperature`, `maxOutputTokens`, `numSearchResults`,\n - `systemPrompt` from `packages/coding-agent/src/prompts/system/web-search.md`.\n6. A `SearchResponse` with no renderable content (`hasRenderableSearchContent()` returns false) is rejected as a `SearchProviderError` (status `204`) so the loop advances to the next provider. On the first response that has renderable content, `formatForLLM()` renders answer/sources/citations/related/search-queries into one text block and returns it with `details.response`.\n7. If a provider throws, `executeSearch()` records the error and tries the next provider. There is no provider-level parallel fan-out; fallback is sequential.\n8. After all candidates fail, `formatProviderError()` normalizes each error:\n - Anthropic `404` becomes `Anthropic web search returned 404 (model or endpoint not found).`\n - `401`/`403` become `<Provider> authorization failed ...` except Z.AI, which preserves its raw message.\n - other `SearchProviderError`s surface `error.message`.\n9. If more than one provider was attempted, the final message is `All web search providers failed: <provider/error>; ...`; otherwise it is just the normalized last error.\n\n## Modes / Variants\n- **Provider selection**\n - **Forced provider**: internal callers may pass `provider`; unavailable forced providers fall back to the auto chain instead of hard-failing (`packages/coding-agent/src/web/search/index.ts`). This field is not in the model-facing schema.\n - **Preferred provider**: `setPreferredSearchProvider()` sets a module-global default used by `resolveProviderChain()`. `packages/coding-agent/src/sdk.ts` and `packages/coding-agent/src/modes/controllers/selector-controller.ts` wire this from settings.\n - **Excluded providers**: `setExcludedSearchProviders()` records providers `resolveProviderChain()` must never return, including as fallbacks. Wired from the `providers.webSearchExclude` setting (`providers.webSearch` drives the preferred provider) in `packages/coding-agent/src/sdk.ts`, `packages/coding-agent/src/modes/interactive-mode.ts`, and `packages/coding-agent/src/modes/controllers/selector-controller.ts`.\n - **Auto chain order**: `tavily`, `perplexity`, `brave`, `jina`, `kimi`, `anthropic`, `gemini`, `codex`, `zai`, `exa`, `parallel`, `kagi`, `synthetic`, `searxng` (`SEARCH_PROVIDER_ORDER` in `packages/coding-agent/src/web/search/types.ts`).\n- **Provider adapters**\n - **Tavily** — `packages/coding-agent/src/web/search/providers/tavily.ts`\n - Availability: API key from env or `agent.db` via `findCredential()`.\n - Querying: POST `https://api.tavily.com/search`.\n - `recency` maps to Tavily `time_range`; code explicitly keeps `topic` at default general scope instead of narrowing to news.\n - `limit` / `num_search_results`: adapter uses `params.numSearchResults ?? params.limit`, clamped to `5..20` with default `5`.\n - Output: `answer`, `sources`, `requestId`, `authMode: \"api_key\"`.\n - **Perplexity** — `packages/coding-agent/src/web/search/providers/perplexity.ts`\n - Availability: auth precedence is `PERPLEXITY_COOKIES` -> OAuth token in `agent.db` -> `PERPLEXITY_API_KEY` / `PPLX_API_KEY` -> anonymous ask-endpoint fallback. `isAvailable()` gates the auto chain on credentials, but `isExplicitlyAvailable()` is always true, so explicit selection works unauthenticated.\n - OAuth/cookie/anonymous mode: POSTs to `https://www.perplexity.ai/rest/sse/perplexity_ask`, consumes SSE, merges partial events, extracts answer and source URLs, sets `authMode: \"oauth\"` (`\"anonymous\"` for the unauthenticated fallback).\n - API-key mode: POSTs to `https://api.perplexity.ai/chat/completions` with `model: \"sonar-pro\"`, `search_mode: \"web\"`, `num_search_results`, optional `search_recency_filter`, `max_tokens`, `temperature`.\n - `num_search_results` controls upstream API breadth only in API-key mode. `limit` is preserved separately as `num_results` and slices returned `sources` after parsing in both auth modes.\n - Output may include `answer`, `sources`, `citations`, `usage`, `model`, `requestId`, `authMode`.\n - **Brave** — `packages/coding-agent/src/web/search/providers/brave.ts`\n - Availability: `BRAVE_API_KEY` only.\n - Querying: GET `https://api.search.brave.com/res/v1/web/search` with `count`, `extra_snippets=true`, and `freshness=pd|pw|pm|py` for `recency`.\n - `limit` / `num_search_results`: `params.numSearchResults ?? params.limit`, clamped to `1..20`, default `10`.\n - Output: `sources`, `requestId`.\n - **Jina** — `packages/coding-agent/src/web/search/providers/jina.ts`\n - Availability: `JINA_API_KEY` only.\n - Querying: GET-like fetch to `https://s.jina.ai/<encoded query>` with bearer auth.\n - Ignores `recency`, `max_tokens`, and `temperature`.\n - `limit` / `num_search_results`: adapter slices sources to `params.numSearchResults ?? params.limit` when provided; otherwise returns all payload items.\n - Output: `sources` only.\n - **Kimi** — `packages/coding-agent/src/web/search/providers/kimi.ts`\n - Availability: `MOONSHOT_SEARCH_API_KEY`, `KIMI_SEARCH_API_KEY`, `MOONSHOT_API_KEY`, or `agent.db` credentials for `moonshot` / `kimi-code`.\n - Querying: POST to `MOONSHOT_SEARCH_BASE_URL` / `KIMI_SEARCH_BASE_URL` / default `https://api.kimi.com/coding/v1/search` with `text_query`, `limit`, `enable_page_crawling`, `timeout_seconds: 30`.\n - `limit` / `num_search_results`: `params.numSearchResults ?? params.limit`, clamped to `1..20`, default `10`.\n - Output: `sources`, `requestId`.\n - **Anthropic** — `packages/coding-agent/src/web/search/providers/anthropic.ts`\n - Availability: `ANTHROPIC_SEARCH_API_KEY` env var, otherwise `authStorage.hasAuth(\"anthropic\")`; search credentials come from `authStorage.getApiKey(\"anthropic\")` when no search-specific key is set.\n - Env overrides specific to search (do not affect chat completions):\n - `ANTHROPIC_SEARCH_API_KEY` — highest-priority search auth; overrides `ANTHROPIC_API_KEY` / OAuth / `ANTHROPIC_FOUNDRY_API_KEY` for the search call only.\n - `ANTHROPIC_SEARCH_BASE_URL` — search-only base URL for either `ANTHROPIC_SEARCH_API_KEY` or fallback Anthropic credentials; overrides `ANTHROPIC_BASE_URL` (and `FOUNDRY_BASE_URL` in Foundry mode); defaults to `https://api.anthropic.com`.\n - `ANTHROPIC_SEARCH_MODEL` — search model; defaults to `claude-haiku-4-5`.\n - Querying: Claude Messages API with web-search tool enabled.\n - `max_tokens` and `temperature` pass through.\n - `limit` and `num_search_results` are collapsed together before dispatch: `num_results = params.numSearchResults ?? params.limit`.\n - Output may include `answer`, `sources`, `citations`, `searchQueries`, `usage.searchRequests`, `model`, `requestId`.\n - **Gemini** — `packages/coding-agent/src/web/search/providers/gemini.ts`\n - Availability: OAuth credentials in `agent.db` for `google-gemini-cli` or `google-antigravity`.\n - Querying: SSE `streamGenerateContent` call with Google Search grounding enabled. Antigravity auth tries two fallback endpoints and retries `401/403/400 invalid auth` once after token refresh; `429/5xx` retry with exponential backoff and server-provided retry delay, capped by a `5 * 60 * 1000` ms rate-limit budget.\n - `max_tokens` and `temperature` pass through as `generationConfig.maxOutputTokens` / `generationConfig.temperature`.\n - `limit` and `num_search_results` are collapsed together before dispatch.\n - Output may include `answer`, `sources`, `citations`, `searchQueries`, `usage`, `model`.\n - **Codex** — `packages/coding-agent/src/web/search/providers/codex.ts`\n - Availability: OAuth credential for `openai-codex` in `agent.db` (`hasOAuth()`; expiry is not checked here — refresh is lazy in `searchCodex`).\n - Querying: SSE POST to `https://chatgpt.com/backend-api/codex/responses` with `tool_choice: { type: \"web_search\" }` and `search_context_size: \"high\"` by default.\n - Ignores `recency`, `max_tokens`, and `temperature` in this tool path.\n - `limit` and `num_search_results` are collapsed together before dispatch.\n - Output may include `answer`, `sources`, `usage`, `model`, `requestId`. If the streamed response has no `url_citation` annotations, the adapter falls back to scraping markdown links and bare URLs from the answer text.\n - **Z.AI** — `packages/coding-agent/src/web/search/providers/zai.ts`\n - Availability: env or `agent.db` credential for `zai`.\n - Querying: JSON-RPC `tools/call` against `https://api.z.ai/api/mcp/web_search_prime/mcp` for remote MCP tool `web_search_prime`.\n - Fallback chain inside the provider: tries `{query,count}`, then `{search_query,count}`, then `{search_query, search_engine:\"search-prime\", count}` when earlier attempts fail with argument-shape errors.\n - `limit` and `num_search_results` are collapsed together before dispatch.\n - Output may include parsed free-text `answer`, `sources`, `requestId`.\n - **Exa** — `packages/coding-agent/src/web/search/providers/exa.ts`\n - Availability: env or `agent.db` credential for `exa` admits Exa to the auto chain; settings must not explicitly disable `exa.enabled` or `exa.enableSearch`. Explicit selection (`providers.webSearch: exa`) reaches Exa even without a credential and falls back to public MCP.\n - Querying: POST `https://api.exa.ai/search` with the resolved Exa API key, otherwise JSON-RPC `tools/call` against `https://mcp.exa.ai/mcp` for remote MCP tool `web_search_exa`.\n - `limit` and `num_search_results` are collapsed together before dispatch.\n - Output: synthesized `answer` from up to 3 result summaries, `sources`, `requestId`.\n - **Parallel** — `packages/coding-agent/src/web/search/providers/parallel.ts`, `packages/coding-agent/src/web/parallel.ts`\n - Availability: env or `agent.db` credential for `parallel`.\n - Querying: POST `https://api.parallel.ai/v1beta/search` with `objective=query`, `search_queries=[query]`, `mode:\"fast\"`, `max_chars_per_result: 10000`, beta header `search-extract-2025-10-10`.\n - There is no provider fan-out here despite the name; the current adapter always sends a one-element `search_queries` array.\n - `limit` and `num_search_results` are collapsed together before dispatch, clamped to `1..40`, default `10`.\n - Output: `sources`, `requestId`.\n - **Kagi** — `packages/coding-agent/src/web/search/providers/kagi.ts`, `packages/coding-agent/src/web/kagi.ts`\n - Availability: env or `agent.db` credential for `kagi`.\n - Querying: POST `https://kagi.com/api/v1/search` with `Authorization: Bearer <key>` and JSON body `{ query, workflow: \"search\", limit, filters?: { after } }`. `recency` maps to `filters.after` as a UTC `YYYY-MM-DD` string (`day`/`week`/`month`/`year`).\n - `limit` and `num_search_results` are collapsed together before dispatch, clamped to `1..40`, default `10`.\n - Output: `sources` (concatenated `data.search` + `data.video` + `data.news` + `data.infobox`, with video/news/infobox results tagged in the title), `relatedQuestions` (`data.adjacent_question` + `data.related_search` `props.question`), `answer` (`data.direct_answer[0].snippet ?? title`), `requestId` (`meta.trace`).\n - **Synthetic** — `packages/coding-agent/src/web/search/providers/synthetic.ts`\n - Availability: env or `agent.db` credential for `synthetic`.\n - Querying: POST `https://api.synthetic.new/v2/search` with `{ query }`.\n - Ignores `recency`, `max_tokens`, and `temperature`.\n - `limit` and `num_search_results` are collapsed together before dispatch.\n - Output: `sources` only.\n - **SearXNG** — `packages/coding-agent/src/web/search/providers/searxng.ts`\n - Availability: endpoint from `searxng.endpoint` setting or `SEARXNG_ENDPOINT` env.\n - Querying: GET `<endpoint>/search?format=json&q=...`; optional settings add `categories` and `language`.\n - Auth precedence: Basic auth (`searxng.basicUsername` / `searxng.basicPassword` or env equivalents) over bearer token (`searxng.token` / `SEARXNG_TOKEN`). Basic credentials are validated for RFC 7617 restrictions.\n - `recency` maps to `time_range`; `week` is downgraded to `month` because SearXNG does not support week.\n - `limit` and `num_search_results` are collapsed together before dispatch, clamped to `1..20`, default `10`.\n - Output: `sources`, `relatedQuestions` from `suggestions`.\n\n## Side Effects\n- Network\n - Calls one or more external search providers over HTTPS until one succeeds or all fail.\n - Provider-specific transports include JSON POST, JSON GET, SSE streaming (Perplexity OAuth/API, Gemini, Codex), and JSON-RPC over HTTP (Z.AI).\n- Subprocesses / native bindings\n - None.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Uses a module-global provider-instance cache in `packages/coding-agent/src/web/search/provider.ts`.\n - Uses a module-global preferred-provider setting in the same file.\n - `packages/coding-agent/src/tools/index.ts` gates tool availability behind `session.settings.get(\"web_search.enabled\")`.\n- Background work / cancellation\n - Many provider adapters accept `AbortSignal`; `WebSearchTool.execute()` passes the tool call signal into `executeSearch()`, which forwards it as `params.signal` to providers and rethrows cancellation during fallback.\n\n## Limits & Caps\n- Provider auto-order length: 14 providers (`SEARCH_PROVIDER_ORDER` in `packages/coding-agent/src/web/search/types.ts`).\n- `formatForLLM()` truncates source snippets and citation text to 240 chars (`packages/coding-agent/src/web/search/index.ts`).\n- `formatForLLM()` emits at most 3 search queries, each truncated to 120 chars (`packages/coding-agent/src/web/search/index.ts`).\n- Brave result count: default `10`, max `20` (`DEFAULT_NUM_RESULTS`, `MAX_NUM_RESULTS` in `packages/coding-agent/src/web/search/providers/brave.ts`).\n- Tavily result count: default `5`, max `20` (`packages/coding-agent/src/web/search/providers/tavily.ts`).\n- Kimi result count: default `10`, max `20`; request timeout field fixed to `30` seconds (`packages/coding-agent/src/web/search/providers/kimi.ts`).\n- Parallel result count: default `10`, max `40`; per-result excerpt cap `10_000` chars (`packages/coding-agent/src/web/search/providers/parallel.ts`, `packages/coding-agent/src/web/parallel.ts`).\n- Kagi result count: default `10`, max `40` (`packages/coding-agent/src/web/search/providers/kagi.ts`).\n- SearXNG result count: default `10`, max `20` (`packages/coding-agent/src/web/search/providers/searxng.ts`).\n- Perplexity API-key mode defaults: `max_tokens = 8192`, `temperature = 0.2`, `num_search_results = 20` (`packages/coding-agent/src/web/search/providers/perplexity.ts`).\n- Anthropic defaults: model `claude-haiku-4-5`, `DEFAULT_MAX_TOKENS = 4096` when the provider omits `max_tokens` (`packages/coding-agent/src/web/search/providers/anthropic.ts`).\n- Gemini retries: up to `3` retries per endpoint, base delay `1000` ms, rate-limit delay budget `5 * 60 * 1000` ms (`packages/coding-agent/src/web/search/providers/gemini.ts`).\n\n## Errors\n- Tool-level no-provider case returns a normal tool result with `Error: No web search provider configured.`; it does not throw.\n- Tool-level all-failed case also returns a normal tool result with `Error: ...`; the message is either the single normalized provider error or a semicolon-separated summary of all failed providers.\n- Provider adapters usually throw `SearchProviderError(provider, message, status)` for HTTP or protocol failures.\n- Availability probes intentionally swallow lookup errors and report `false` in many providers via `isApiKeyAvailable()`.\n- Per-provider notable failures:\n - Anthropic: missing credentials throw a plain `Error`; a `404` is remapped to a special final message by `formatProviderError()`.\n - Perplexity: missing auth throws a plain `Error`; OAuth stream `error_code` events become `SearchProviderError(\"perplexity\", ...)`.\n - Gemini: auth refresh, endpoint fallback, and retry logic are internal; final exhausted failures surface as `SearchProviderError(\"gemini\", ...)`.\n - Codex and Gemini both fail if the HTTP response has no body after a `200`.\n - Z.AI treats malformed SSE/JSON-RPC payloads as provider errors and retries only argument-shape failures across request variants.\n - SearXNG `findAuth()` can throw configuration errors before any HTTP call if Basic auth fields are incomplete or invalid.\n\n## Notes\n- The model-facing schema does not expose `provider`, but internal callers can force one through `SearchQueryParams`.\n- `resolveProviderChain()` lazily imports provider modules and caches singleton instances. Just asking for labels via `getSearchProviderLabel()` does not trigger those imports.\n- Most providers treat `limit` and `num_search_results` as the same number because adapters pass `params.numSearchResults ?? params.limit`. Perplexity is the only implementation that preserves both concepts.\n- `recency` is implemented by Brave, Perplexity, Tavily, SearXNG, and Kagi; the model-facing prompt does not name specific providers.\n- `packages/coding-agent/src/config/settings-schema.ts` uses the shared `SEARCH_PROVIDER_PREFERENCES` / `SEARCH_PROVIDER_OPTIONS` metadata, so the settings selector and setup wizard expose `auto` plus every provider in the auto chain.\n- Exa uses `authStorage.getApiKey(\"exa\")`, then `EXA_API_KEY`, then unauthenticated `https://mcp.exa.ai/mcp` fallback.\n",
113
+ "tools/web_search.md": "# web_search\n\n> Run one web query through the first available search provider and return LLM-formatted answer, source URLs, and optional citations.\n\n## Source\n- Entry: `packages/coding-agent/src/web/search/index.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/web-search.md`\n- Key collaborators:\n - `packages/coding-agent/src/web/search/provider.ts` — lazy provider registry; availability chain.\n - `packages/coding-agent/src/web/search/types.ts` — unified `SearchResponse` / `SearchProviderError` types.\n - `packages/coding-agent/src/web/search/render.ts` — TUI renderer details type.\n - `packages/coding-agent/src/web/search/providers/base.ts` — provider interface and shared params contract.\n - `packages/coding-agent/src/web/search/providers/utils.ts` — credential lookup; source normalization.\n - `packages/coding-agent/src/web/search/providers/anthropic.ts` — Claude web-search provider.\n - `packages/coding-agent/src/web/search/providers/brave.ts` — Brave Search API adapter.\n - `packages/coding-agent/src/web/search/providers/codex.ts` — OpenAI Codex SSE adapter.\n - `packages/coding-agent/src/web/search/providers/exa.ts` — Exa API or MCP adapter.\n - `packages/coding-agent/src/web/search/providers/gemini.ts` — Gemini grounding SSE adapter.\n - `packages/coding-agent/src/web/search/providers/jina.ts` — Jina Reader search adapter.\n - `packages/coding-agent/src/web/search/providers/kagi.ts` — Kagi provider wrapper.\n - `packages/coding-agent/src/web/search/providers/kimi.ts` — Kimi search adapter.\n - `packages/coding-agent/src/web/search/providers/parallel.ts` — Parallel provider wrapper.\n - `packages/coding-agent/src/web/search/providers/perplexity.ts` — Perplexity API / OAuth adapter.\n - `packages/coding-agent/src/web/search/providers/searxng.ts` — self-hosted SearXNG adapter.\n - `packages/coding-agent/src/web/search/providers/synthetic.ts` — Synthetic search adapter.\n - `packages/coding-agent/src/web/search/providers/tavily.ts` — Tavily search adapter.\n - `packages/coding-agent/src/web/search/providers/zai.ts` — Z.AI remote MCP adapter.\n - `packages/coding-agent/src/web/parallel.ts` — Parallel search/extract HTTP client.\n - `packages/coding-agent/src/web/kagi.ts` — Kagi HTTP client.\n - `packages/coding-agent/src/tools/index.ts` — built-in tool registration and enable flag.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `query` | `string` | Yes | Search query, passed to providers unchanged. |\n| `recency` | `\"day\" \\| \"week\" \\| \"month\" \\| \"year\"` | No | Time filter. Only providers that implement it use it; code maps it for Brave, Perplexity, Tavily, SearXNG, and Kagi. |\n| `limit` | `number` | No | Max results to return. Usually becomes the provider request's result-count parameter when `num_search_results` is absent. |\n| `max_tokens` | `number` | No | Passed through as `maxOutputTokens` / `max_tokens` only by Anthropic, Gemini, and Perplexity API-key mode. Ignored by the other providers. |\n| `temperature` | `number` | No | Passed through only by Anthropic, Gemini, and Perplexity API-key mode. Ignored by the other providers. |\n| `num_search_results` | `number` | No | Requested upstream search breadth. For most providers this is the same count used for returned sources. Perplexity is the only adapter that keeps it distinct from `limit`. |\n\n## Outputs\nThe tool returns a single text content block plus structured `details`.\n\n- `content`: `[{ type: \"text\", text: string }]`\n- `details`: `SearchRenderDetails` from `packages/coding-agent/src/web/search/render.ts`\n - `response: SearchResponse`\n - `error?: string`\n\n`text` is produced by `formatForLLM()` in `packages/coding-agent/src/web/search/index.ts`:\n\n- If `response.answer` exists, it is emitted first.\n- If sources exist, one entry per source follows (the `## Sources` header with a source count is emitted only when an answer was also produced):\n - `[n] <title> (<formatted age or published date>)`\n - ` <url>`\n - optional snippet line truncated to 240 chars.\n- If citations exist, a `## Citations` section follows with URL/title plus optional cited text truncated to 240 chars.\n- If related questions exist, a `## Related` bullet list follows.\n- If search queries exist, a `Search queries: <n>` section follows, capped to the first 3 queries and 120 chars each.\n\nFailure output is not thrown at the tool boundary when providers are unavailable or provider attempts fail. Instead the tool returns:\n\n- `content[0].text = \"Error: ...\"`\n- `details.response.provider = <last attempted provider> | \"none\"`\n- `details.error = ...`\n\nStreaming: none. `WebSearchTool.execute()` forwards its `AbortSignal` into `executeSearch()`, and `executeSearch()` passes it to providers. If the signal is aborted during fallback handling, `throwIfAborted(signal)` rethrows the cancellation instead of returning an `\"Error: ...\"` text result.\n\n## Flow\n1. `WebSearchTool.execute()` in `packages/coding-agent/src/web/search/index.ts` delegates directly to `executeSearch()`.\n2. `executeSearch()` chooses a provider list:\n - if `params.provider` is set and not `\"auto\"`, it loads that provider with `getSearchProvider()`; if `isExplicitlyAvailable()` returns true, the list is `[that provider]`, otherwise it falls back to `resolveProviderChain(authStorage, \"auto\")`.\n - otherwise it calls `resolveProviderChain()` with the module-global preferred provider from `packages/coding-agent/src/web/search/provider.ts`.\n3. `resolveProviderChain()` lazily loads each provider module on demand and returns only available providers. If a preferred provider is set, it is tried first (gated by `isExplicitlyAvailable()`), then the static `SEARCH_PROVIDER_ORDER` excluding that provider, each gated by `isAvailable()`. Providers in the excluded set (`setExcludedSearchProviders()`) are skipped entirely, including as the preferred candidate.\n4. If no providers are available, `executeSearch()` returns `Error: No web search provider configured.` with `details.response.provider = \"none\"`.\n5. For each provider in order, `executeSearch()` calls `provider.search()` with:\n - `query`,\n - `limit`, `recency`, `temperature`, `maxOutputTokens`, `numSearchResults`,\n - `systemPrompt` from `packages/coding-agent/src/prompts/system/web-search.md`.\n6. A `SearchResponse` with no renderable content (`hasRenderableSearchContent()` returns false) is rejected as a `SearchProviderError` (status `204`) so the loop advances to the next provider. On the first response that has renderable content, `formatForLLM()` renders answer/sources/citations/related/search-queries into one text block and returns it with `details.response`.\n7. If a provider throws, `executeSearch()` records the error and tries the next provider. There is no provider-level parallel fan-out; fallback is sequential.\n8. After all candidates fail, `formatProviderError()` normalizes each error:\n - Anthropic `404` becomes `Anthropic web search returned 404 (model or endpoint not found).`\n - `401`/`403` become `<Provider> authorization failed ...` except Z.AI, which preserves its raw message.\n - other `SearchProviderError`s surface `error.message`.\n9. If more than one provider was attempted, the final message is `All web search providers failed: <provider/error>; ...`; otherwise it is just the normalized last error.\n\n## Modes / Variants\n- **Provider selection**\n - **Forced provider**: internal callers may pass `provider`; unavailable forced providers fall back to the auto chain instead of hard-failing (`packages/coding-agent/src/web/search/index.ts`). This field is not in the model-facing schema.\n - **Preferred provider**: `setPreferredSearchProvider()` sets a module-global default used by `resolveProviderChain()`. `packages/coding-agent/src/sdk.ts` and `packages/coding-agent/src/modes/controllers/selector-controller.ts` wire this from settings.\n - **Excluded providers**: `setExcludedSearchProviders()` records providers `resolveProviderChain()` must never return, including as fallbacks. Wired from the `providers.webSearchExclude` setting (`providers.webSearch` drives the preferred provider) in `packages/coding-agent/src/sdk.ts`, `packages/coding-agent/src/modes/interactive-mode.ts`, and `packages/coding-agent/src/modes/controllers/selector-controller.ts`.\n - **Auto chain order**: `perplexity`, `gemini`, `anthropic`, `codex`, `zai`, `exa`, `jina`, `kagi`, `tavily`, `brave`, `kimi`, `parallel`, `synthetic`, `searxng` (`SEARCH_PROVIDER_ORDER` in `packages/coding-agent/src/web/search/types.ts`).\n- **Provider adapters**\n - **Tavily** — `packages/coding-agent/src/web/search/providers/tavily.ts`\n - Availability: API key from env or `agent.db` via `findCredential()`.\n - Querying: POST `https://api.tavily.com/search`.\n - `recency` maps to Tavily `time_range`; code explicitly keeps `topic` at default general scope instead of narrowing to news.\n - `limit` / `num_search_results`: adapter uses `params.numSearchResults ?? params.limit`, clamped to `5..20` with default `5`.\n - Output: `answer`, `sources`, `requestId`, `authMode: \"api_key\"`.\n - **Perplexity** — `packages/coding-agent/src/web/search/providers/perplexity.ts`\n - Availability: auth precedence is `PERPLEXITY_COOKIES` -> OAuth token in `agent.db` -> `PERPLEXITY_API_KEY` / `PPLX_API_KEY` -> anonymous ask-endpoint fallback. `isAvailable()` gates the auto chain on credentials, but `isExplicitlyAvailable()` is always true, so explicit selection works unauthenticated.\n - OAuth/cookie/anonymous mode: POSTs to `https://www.perplexity.ai/rest/sse/perplexity_ask`, consumes SSE, merges partial events, extracts answer and source URLs, sets `authMode: \"oauth\"` (`\"anonymous\"` for the unauthenticated fallback).\n - API-key mode: POSTs to `https://api.perplexity.ai/chat/completions` with `model: \"sonar-pro\"`, `search_mode: \"web\"`, `num_search_results`, optional `search_recency_filter`, `max_tokens`, `temperature`.\n - `num_search_results` controls upstream API breadth only in API-key mode. `limit` is preserved separately as `num_results` and slices returned `sources` after parsing in both auth modes.\n - Output may include `answer`, `sources`, `citations`, `usage`, `model`, `requestId`, `authMode`.\n - **Brave** — `packages/coding-agent/src/web/search/providers/brave.ts`\n - Availability: `BRAVE_API_KEY` only.\n - Querying: GET `https://api.search.brave.com/res/v1/web/search` with `count`, `extra_snippets=true`, and `freshness=pd|pw|pm|py` for `recency`.\n - `limit` / `num_search_results`: `params.numSearchResults ?? params.limit`, clamped to `1..20`, default `10`.\n - Output: `sources`, `requestId`.\n - **Jina** — `packages/coding-agent/src/web/search/providers/jina.ts`\n - Availability: `JINA_API_KEY` only.\n - Querying: GET-like fetch to `https://s.jina.ai/<encoded query>` with bearer auth.\n - Ignores `recency`, `max_tokens`, and `temperature`.\n - `limit` / `num_search_results`: adapter slices sources to `params.numSearchResults ?? params.limit` when provided; otherwise returns all payload items.\n - Output: `sources` only.\n - **Kimi** — `packages/coding-agent/src/web/search/providers/kimi.ts`\n - Availability: `MOONSHOT_SEARCH_API_KEY`, `KIMI_SEARCH_API_KEY`, `MOONSHOT_API_KEY`, or `agent.db` credentials for `moonshot` / `kimi-code`.\n - Querying: POST to `MOONSHOT_SEARCH_BASE_URL` / `KIMI_SEARCH_BASE_URL` / default `https://api.kimi.com/coding/v1/search` with `text_query`, `limit`, `enable_page_crawling`, `timeout_seconds: 30`.\n - `limit` / `num_search_results`: `params.numSearchResults ?? params.limit`, clamped to `1..20`, default `10`.\n - Output: `sources`, `requestId`.\n - **Anthropic** — `packages/coding-agent/src/web/search/providers/anthropic.ts`\n - Availability: `ANTHROPIC_SEARCH_API_KEY` env var, otherwise `authStorage.hasAuth(\"anthropic\")`; search credentials come from `authStorage.getApiKey(\"anthropic\")` when no search-specific key is set.\n - Env overrides specific to search (do not affect chat completions):\n - `ANTHROPIC_SEARCH_API_KEY` — highest-priority search auth; overrides `ANTHROPIC_API_KEY` / OAuth / `ANTHROPIC_FOUNDRY_API_KEY` for the search call only.\n - `ANTHROPIC_SEARCH_BASE_URL` — search-only base URL for either `ANTHROPIC_SEARCH_API_KEY` or fallback Anthropic credentials; overrides `ANTHROPIC_BASE_URL` (and `FOUNDRY_BASE_URL` in Foundry mode); defaults to `https://api.anthropic.com`.\n - `ANTHROPIC_SEARCH_MODEL` — search model; defaults to `claude-haiku-4-5`.\n - Querying: Claude Messages API with web-search tool enabled.\n - `max_tokens` and `temperature` pass through.\n - `limit` and `num_search_results` are collapsed together before dispatch: `num_results = params.numSearchResults ?? params.limit`.\n - Output may include `answer`, `sources`, `citations`, `searchQueries`, `usage.searchRequests`, `model`, `requestId`.\n - **Gemini** — `packages/coding-agent/src/web/search/providers/gemini.ts`\n - Availability: OAuth credentials in `agent.db` for `google-gemini-cli` or `google-antigravity`.\n - Querying: SSE `streamGenerateContent` call with Google Search grounding enabled. Antigravity auth tries two fallback endpoints and retries `401/403/400 invalid auth` once after token refresh; `429/5xx` retry with exponential backoff and server-provided retry delay, capped by a `5 * 60 * 1000` ms rate-limit budget.\n - `max_tokens` and `temperature` pass through as `generationConfig.maxOutputTokens` / `generationConfig.temperature`.\n - `limit` and `num_search_results` are collapsed together before dispatch.\n - Output may include `answer`, `sources`, `citations`, `searchQueries`, `usage`, `model`.\n - **Codex** — `packages/coding-agent/src/web/search/providers/codex.ts`\n - Availability: OAuth credential for `openai-codex` in `agent.db` (`hasOAuth()`; expiry is not checked here — refresh is lazy in `searchCodex`).\n - Querying: SSE POST to `https://chatgpt.com/backend-api/codex/responses` with `tool_choice: { type: \"web_search\" }` and `search_context_size: \"high\"` by default.\n - Ignores `recency`, `max_tokens`, and `temperature` in this tool path.\n - `limit` and `num_search_results` are collapsed together before dispatch.\n - Output may include `answer`, `sources`, `usage`, `model`, `requestId`. If the streamed response has no `url_citation` annotations, the adapter falls back to scraping markdown links and bare URLs from the answer text.\n - **Z.AI** — `packages/coding-agent/src/web/search/providers/zai.ts`\n - Availability: env or `agent.db` credential for `zai`.\n - Querying: JSON-RPC `tools/call` against `https://api.z.ai/api/mcp/web_search_prime/mcp` for remote MCP tool `web_search_prime`.\n - Fallback chain inside the provider: tries `{query,count}`, then `{search_query,count}`, then `{search_query, search_engine:\"search-prime\", count}` when earlier attempts fail with argument-shape errors.\n - `limit` and `num_search_results` are collapsed together before dispatch.\n - Output may include parsed free-text `answer`, `sources`, `requestId`.\n - **Exa** — `packages/coding-agent/src/web/search/providers/exa.ts`\n - Availability: env or `agent.db` credential for `exa` admits Exa to the auto chain; settings must not explicitly disable `exa.enabled` or `exa.enableSearch`. Explicit selection (`providers.webSearch: exa`) reaches Exa even without a credential and falls back to public MCP.\n - Querying: POST `https://api.exa.ai/search` with the resolved Exa API key, otherwise JSON-RPC `tools/call` against `https://mcp.exa.ai/mcp` for remote MCP tool `web_search_exa`.\n - `limit` and `num_search_results` are collapsed together before dispatch.\n - Output: synthesized `answer` from up to 3 result summaries, `sources`, `requestId`.\n - **Parallel** — `packages/coding-agent/src/web/search/providers/parallel.ts`, `packages/coding-agent/src/web/parallel.ts`\n - Availability: env or `agent.db` credential for `parallel`.\n - Querying: POST `https://api.parallel.ai/v1beta/search` with `objective=query`, `search_queries=[query]`, `mode:\"fast\"`, `max_chars_per_result: 10000`, beta header `search-extract-2025-10-10`.\n - There is no provider fan-out here despite the name; the current adapter always sends a one-element `search_queries` array.\n - `limit` and `num_search_results` are collapsed together before dispatch, clamped to `1..40`, default `10`.\n - Output: `sources`, `requestId`.\n - **Kagi** — `packages/coding-agent/src/web/search/providers/kagi.ts`, `packages/coding-agent/src/web/kagi.ts`\n - Availability: env or `agent.db` credential for `kagi`.\n - Querying: POST `https://kagi.com/api/v1/search` with `Authorization: Bearer <key>` and JSON body `{ query, workflow: \"search\", limit, filters?: { after } }`. `recency` maps to `filters.after` as a UTC `YYYY-MM-DD` string (`day`/`week`/`month`/`year`).\n - `limit` and `num_search_results` are collapsed together before dispatch, clamped to `1..40`, default `10`.\n - Output: `sources` (concatenated `data.search` + `data.video` + `data.news` + `data.infobox`, with video/news/infobox results tagged in the title), `relatedQuestions` (`data.adjacent_question` + `data.related_search` `props.question`), `answer` (`data.direct_answer[0].snippet ?? title`), `requestId` (`meta.trace`).\n - **Synthetic** — `packages/coding-agent/src/web/search/providers/synthetic.ts`\n - Availability: env or `agent.db` credential for `synthetic`.\n - Querying: POST `https://api.synthetic.new/v2/search` with `{ query }`.\n - Ignores `recency`, `max_tokens`, and `temperature`.\n - `limit` and `num_search_results` are collapsed together before dispatch.\n - Output: `sources` only.\n - **SearXNG** — `packages/coding-agent/src/web/search/providers/searxng.ts`\n - Availability: endpoint from `searxng.endpoint` setting or `SEARXNG_ENDPOINT` env.\n - Querying: GET `<endpoint>/search?format=json&q=...`; optional settings add `categories` and `language`.\n - Auth precedence: Basic auth (`searxng.basicUsername` / `searxng.basicPassword` or env equivalents) over bearer token (`searxng.token` / `SEARXNG_TOKEN`). Basic credentials are validated for RFC 7617 restrictions.\n - `recency` maps to `time_range`; `week` is downgraded to `month` because SearXNG does not support week.\n - `limit` and `num_search_results` are collapsed together before dispatch, clamped to `1..20`, default `10`.\n - Output: `sources`, `relatedQuestions` from `suggestions`.\n\n## Side Effects\n- Network\n - Calls one or more external search providers over HTTPS until one succeeds or all fail.\n - Provider-specific transports include JSON POST, JSON GET, SSE streaming (Perplexity OAuth/API, Gemini, Codex), and JSON-RPC over HTTP (Z.AI).\n- Subprocesses / native bindings\n - None.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Uses a module-global provider-instance cache in `packages/coding-agent/src/web/search/provider.ts`.\n - Uses a module-global preferred-provider setting in the same file.\n - `packages/coding-agent/src/tools/index.ts` gates tool availability behind `session.settings.get(\"web_search.enabled\")`.\n- Background work / cancellation\n - Many provider adapters accept `AbortSignal`; `WebSearchTool.execute()` passes the tool call signal into `executeSearch()`, which forwards it as `params.signal` to providers and rethrows cancellation during fallback.\n\n## Limits & Caps\n- Provider auto-order length: 14 providers (`SEARCH_PROVIDER_ORDER` in `packages/coding-agent/src/web/search/types.ts`).\n- `formatForLLM()` truncates source snippets and citation text to 240 chars (`packages/coding-agent/src/web/search/index.ts`).\n- `formatForLLM()` emits at most 3 search queries, each truncated to 120 chars (`packages/coding-agent/src/web/search/index.ts`).\n- Brave result count: default `10`, max `20` (`DEFAULT_NUM_RESULTS`, `MAX_NUM_RESULTS` in `packages/coding-agent/src/web/search/providers/brave.ts`).\n- Tavily result count: default `5`, max `20` (`packages/coding-agent/src/web/search/providers/tavily.ts`).\n- Kimi result count: default `10`, max `20`; request timeout field fixed to `30` seconds (`packages/coding-agent/src/web/search/providers/kimi.ts`).\n- Parallel result count: default `10`, max `40`; per-result excerpt cap `10_000` chars (`packages/coding-agent/src/web/search/providers/parallel.ts`, `packages/coding-agent/src/web/parallel.ts`).\n- Kagi result count: default `10`, max `40` (`packages/coding-agent/src/web/search/providers/kagi.ts`).\n- SearXNG result count: default `10`, max `20` (`packages/coding-agent/src/web/search/providers/searxng.ts`).\n- Perplexity API-key mode defaults: `max_tokens = 8192`, `temperature = 0.2`, `num_search_results = 20` (`packages/coding-agent/src/web/search/providers/perplexity.ts`).\n- Anthropic defaults: model `claude-haiku-4-5`, `DEFAULT_MAX_TOKENS = 4096` when the provider omits `max_tokens` (`packages/coding-agent/src/web/search/providers/anthropic.ts`).\n- Gemini retries: up to `3` retries per endpoint, base delay `1000` ms, rate-limit delay budget `5 * 60 * 1000` ms (`packages/coding-agent/src/web/search/providers/gemini.ts`).\n\n## Errors\n- Tool-level no-provider case returns a normal tool result with `Error: No web search provider configured.`; it does not throw.\n- Tool-level all-failed case also returns a normal tool result with `Error: ...`; the message is either the single normalized provider error or a semicolon-separated summary of all failed providers.\n- Provider adapters usually throw `SearchProviderError(provider, message, status)` for HTTP or protocol failures.\n- Availability probes intentionally swallow lookup errors and report `false` in many providers via `isApiKeyAvailable()`.\n- Per-provider notable failures:\n - Anthropic: missing credentials throw a plain `Error`; a `404` is remapped to a special final message by `formatProviderError()`.\n - Perplexity: missing auth throws a plain `Error`; OAuth stream `error_code` events become `SearchProviderError(\"perplexity\", ...)`.\n - Gemini: auth refresh, endpoint fallback, and retry logic are internal; final exhausted failures surface as `SearchProviderError(\"gemini\", ...)`.\n - Codex and Gemini both fail if the HTTP response has no body after a `200`.\n - Z.AI treats malformed SSE/JSON-RPC payloads as provider errors and retries only argument-shape failures across request variants.\n - SearXNG `findAuth()` can throw configuration errors before any HTTP call if Basic auth fields are incomplete or invalid.\n\n## Notes\n- The model-facing schema does not expose `provider`, but internal callers can force one through `SearchQueryParams`.\n- `resolveProviderChain()` lazily imports provider modules and caches singleton instances. Just asking for labels via `getSearchProviderLabel()` does not trigger those imports.\n- Most providers treat `limit` and `num_search_results` as the same number because adapters pass `params.numSearchResults ?? params.limit`. Perplexity is the only implementation that preserves both concepts.\n- `recency` is implemented by Brave, Perplexity, Tavily, SearXNG, and Kagi; the model-facing prompt does not name specific providers.\n- `packages/coding-agent/src/config/settings-schema.ts` uses the shared `SEARCH_PROVIDER_PREFERENCES` / `SEARCH_PROVIDER_OPTIONS` metadata, so the settings selector and setup wizard expose `auto` plus every provider in the auto chain.\n- Exa uses `authStorage.getApiKey(\"exa\")`, then `EXA_API_KEY`, then unauthenticated `https://mcp.exa.ai/mcp` fallback.\n",
112
114
  "tools/write.md": "# write\n\n> Create or overwrite a file, writable internal resource, archive entry, SQLite row, or merge-conflict resolution.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/write.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/write.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/archive-reader.ts` — parse `archive.ext:entry` selectors.\n - `packages/coding-agent/src/tools/sqlite-reader.ts` — detect SQLite paths and perform row insert/update/delete.\n - `packages/coding-agent/src/tools/conflict-detect.ts` — parse `conflict://` URIs and splice recorded merge-conflict regions.\n - `packages/coding-agent/src/lsp/index.ts` — format-on-write and diagnostics writethrough.\n - `packages/coding-agent/src/tools/auto-generated-guard.ts` — block overwriting generated files.\n - `packages/coding-agent/src/tools/fs-cache-invalidation.ts` — invalidate shared FS scan caches after writes.\n - `packages/coding-agent/src/tools/plan-mode-guard.ts` — resolve paths and enforce plan-mode write policy.\n\n## Inputs\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `path` | `string` | Yes | Target path. Plain file path writes a filesystem file. Writable internal URLs are delegated to their handler. `archive.ext:inner/path` writes an archive entry for `.tar`, `.tar.gz`, `.tgz`, or `.zip`. `db.sqlite:table` inserts a row. `db.sqlite:table:key` updates or deletes a row. `conflict://<id>` resolves a recorded merge conflict; `conflict://*` bulk-resolves every registered conflict. |\n| `content` | `string` | Yes | Full replacement file content, archive entry content, internal-resource content, conflict replacement, or SQLite row payload. SQLite non-delete writes must parse as a JSON5 object. Empty or whitespace-only content deletes a SQLite row when `path` includes a row key. |\n\nWorked examples:\n\n```text\npath: \"src/generated/config.json\"\ncontent: \"{\\n \\\"enabled\\\": true\\n}\\n\"\n```\n\n```text\npath: \"fixtures/archive.zip:templates/email.txt\"\ncontent: \"hello\\n\"\n```\n\n```text\npath: \"data/app.sqlite:users:42\"\ncontent: \"{name: 'Ada', active: true}\"\n```\n\n## Outputs\nSingle-shot result.\n\n- Success always returns a text block.\n - Plain file write: `Successfully wrote <chars> bytes to <relative-path>` (the count is `cleanContent.length`, not encoded byte length).\n - Internal URL write: `Successfully wrote <chars> bytes to <url>`.\n - Archive write: `Successfully wrote <chars> bytes to <relative-archive-path>:<entry-path>`.\n - SQLite write: one of `Inserted row into <table>`, `Updated row '<key>' in <table>`, `No row updated ...`, `Deleted row ...`, `No row deleted ...`.\n - Conflict resolution: conflict-specific success text, with fresh hashline snapshot headers when applicable.\n- If hashline prefixes were copied from `read` output and stripped first, the first text block gets an extra note.\n- In hashline display mode, plain file writes (including ACP bridge writes) and conflict resolutions prepend a fresh `[<relative-path>#TAG]` header so the next `edit` has a current snapshot tag without an extra `read`. Bulk conflict resolutions append a `Snapshots:` block listing one header per successfully written file.\n- Plain file writes may also return `details.diagnostics` plus `details.meta.diagnostics` when LSP diagnostics-on-write is enabled, and `details.madeExecutable` when a newly written shebang file is chmodded executable.\n- SQLite writes use `toolResult(...).sourcePath(...)`, so `details.meta.sourcePath` points at the database file.\n- Archive writes set `details.resolvedPath` to the archive's absolute path; internal URL writes return empty `details`.\n\n## Flow\n1. `WriteTool.execute()` in `packages/coding-agent/src/tools/write.ts` strips pasted `[PATH#HASH]` headers and `LINE:` hashline prefixes from `content` when the session is in hashline display mode.\n2. If `path` is an internal URL whose handler exposes `write`, the tool delegates directly to `handler.write(...)` and returns.\n3. `conflict://...` paths are handled next by the merge-conflict resolver. Scope reads such as `conflict://<id>/ours` are rejected as read-only; writable conflict URIs must omit the scope.\n4. It calls `#resolveArchiveWritePath()` next. That uses `parseArchivePathCandidates()` from `packages/coding-agent/src/tools/archive-reader.ts`, checks candidate archive files on disk (longest match first), and falls back to the shortest candidate archive path even when the archive file does not exist yet.\n5. Archive writes call `enforcePlanModeWrite(..., { op: exists ? \"update\" : \"create\" })`, then `#writeArchiveEntry()`.\n - The parent directory of the archive file is created with `fs.mkdir(..., { recursive: true })`.\n - `.zip` archives are read with `fflate.unzipSync()`, the target entry is replaced in an in-memory map, and the archive is rewritten with `fflate.zipSync()` + `Bun.write()`.\n - `.tar`, `.tar.gz`, and `.tgz` archives are read with `Bun.Archive`, existing entries are copied into an object map, the target entry is replaced, and `Bun.Archive.write()` rewrites the archive.\n - `invalidateFsScanAfterWrite()` runs on the archive file path.\n6. If the path is not treated as an archive, `execute()` calls `#resolveSqliteWritePath()`. That uses `parseSqlitePathCandidates()` and `isSqliteFile()` from `packages/coding-agent/src/tools/sqlite-reader.ts`. Existing non-SQLite files suppress the SQLite path interpretation.\n7. SQLite writes call `enforcePlanModeWrite(..., { op: \"update\" })`, then `#writeSqliteRow()`.\n - The database must already exist; missing DBs throw `SQLite database '<path>' not found`.\n - The tool opens `new Database(..., { create: false, strict: true })` and sets `PRAGMA busy_timeout = 3000`.\n - Whitespace-only `content` with a row key deletes a row.\n - Non-empty `content` is parsed with `Bun.JSON5.parse()`, must be a JSON object, and is routed to insert/update helpers from `packages/coding-agent/src/tools/sqlite-reader.ts`.\n - `invalidateFsScanAfterWrite()` runs on the DB path and the connection is closed in `finally`.\n8. Otherwise the tool treats `path` as a plain filesystem file.\n - `enforcePlanModeWrite(..., { op: \"create\" })` runs before path resolution.\n - Existing files are checked by `assertEditableFile()` to block overwriting detected generated files.\n - ACP bridge writeTextFile is tried first when available; otherwise the session’s writethrough callback writes content. With LSP enabled and `lsp.formatOnWrite` / `lsp.diagnosticsOnWrite` settings on, `createLspWritethrough()` may format content, sync it through LSP servers, save it, and collect diagnostics. Otherwise `writethroughNoop()` writes directly with `Bun.write()` or `file.write()`.\n - `maybeMarkExecutableForShebang()` may chmod the file executable when content starts with `#!`.\n - `invalidateFsScanAfterWrite()` runs on the file path.\n9. The tool returns a text result and optional diagnostics / executable metadata.\n\n## Modes / Variants\n### Plain file path\n- Target is any path that does not resolve as an archive selector and does not resolve as an existing-or-new SQLite selector.\n- Existing files are overwritten.\n- `write.ts` does not call `fs.mkdir()` on this path; explicit parent-directory creation only exists in the archive branch, but `Bun.write()` itself creates missing parent directories for plain file writes.\n\nExample:\n\n```text\npath: \"tmp/output.txt\"\ncontent: \"hello\\n\"\n```\n\n### Archive entry write\n- Selector syntax: `archive.ext:inner/path`.\n- Supported archive suffixes come from `parseArchivePathCandidates()`: `.tar`, `.tar.gz`, `.tgz`, `.zip`.\n- The inner path is normalized to `/`, strips empty and `.` segments, rejects `..`, and rejects directory targets ending in `/`.\n- Rewrites the whole archive file after replacing one entry.\n- Creates the parent directory for the archive file if needed.\n\nExample:\n\n```text\npath: \"build/assets.tar.gz:css/app.css\"\ncontent: \"body { color: black; }\\n\"\n```\n\n### SQLite table insert\n- Selector syntax: `db.sqlite:table`.\n- `content` must parse as a JSON5 object.\n- Empty object is allowed and becomes `INSERT INTO <table> DEFAULT VALUES`.\n- Query parameters are rejected for SQLite writes.\n\nExample:\n\n```text\npath: \"data/app.db:users\"\ncontent: \"{name: 'Ada', active: true}\"\n```\n\n### SQLite row update / delete\n- Selector syntax: `db.sqlite:table:key`.\n- Non-empty `content` updates the row.\n- Empty or whitespace-only `content` deletes the row.\n- Row lookup uses the single-column primary key if present; otherwise it falls back to `rowid`. Composite primary keys and `WITHOUT ROWID` tables are rejected for key-based writes.\n\nExample update:\n\n```text\npath: \"data/app.sqlite:users:42\"\ncontent: \"{email: 'ada@example.com'}\"\n```\n\nExample delete:\n\n```text\npath: \"data/app.sqlite:users:42\"\ncontent: \"\"\n```\n\n## Side Effects\n- Filesystem\n - Creates or overwrites plain files.\n - Rewrites entire archive files when writing an archive entry.\n - Explicitly creates parent directories (via `fs.mkdir`) for archive files only; plain file writes get parent directories from `Bun.write()`.\n - Mutates existing SQLite databases; never creates a new SQLite DB.\n - Resolves conflict markers in files for `conflict://...` writes.\n - May chmod a shebang file executable after a successful plain-file write.\n- Subprocesses / native bindings\n - Uses Bun SQLite bindings via `bun:sqlite`.\n - Uses Bun archive APIs and lazily imports `fflate` for ZIP reads/writes.\n - May talk to configured LSP servers through `packages/coding-agent/src/lsp/index.ts`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Invalidates shared filesystem scan cache entries through `invalidateFsScanAfterWrite()`.\n - Enforces plan-mode write restrictions before mutating the target.\n- Background work / cancellation\n - Marks the tool `concurrency = \"exclusive\"` in `WriteTool`.\n - LSP writethrough can schedule deferred diagnostics fetches after a timeout, but plain `write.ts` only consumes the immediate return value.\n\n## Limits & Caps\n- `WriteTool` itself exposes no byte cap beyond storing `content` in memory and, for archives, rebuilding the archive in memory.\n- Generated-file detection reads at most `CHECK_BYTE_COUNT = 1024` bytes and `HEADER_LINE_LIMIT = 40` header lines from an existing file in `packages/coding-agent/src/tools/auto-generated-guard.ts`.\n- SQLite writes set `PRAGMA busy_timeout = 3000`.\n- LSP writethrough uses a `5_000` ms operation timeout in `runLspWritethrough()` and may schedule a deferred diagnostics fetch with `AbortSignal.timeout(25_000)` in `scheduleDeferredDiagnosticsFetch()`.\n- Shebang executable handling depends on host filesystem chmod support.\n\n## Errors\n- Invalid archive subpaths throw `ToolError` with messages such as:\n - `Archive write path must target a file inside the archive`\n - `Archive write path must target a file, not a directory`\n - `Archive path cannot contain '..'`\n- SQLite path parsing throws on unsupported forms:\n - `SQLite write paths do not support query parameters`\n - `SQLite write path must target a table`\n - `SQLite row writes require a non-empty row key`\n- Missing SQLite DBs surface as `SQLite database '<path>' not found`.\n- SQLite content errors are model-visible `ToolError`s, including invalid JSON5, non-object payloads, unknown columns, non-scalar values, empty update objects, composite primary keys, and `WITHOUT ROWID` tables.\n- Existing plain files may be rejected by `assertEditableFile()` when they look generated.\n- Conflict scope writes such as `conflict://<id>/ours` are rejected as read-only; invalid conflict IDs or missing conflict history surface as `ToolError`s from the conflict resolver.\n- Archive read/write failures and unexpected SQLite exceptions are wrapped in `ToolError(error.message)`.\n- If no LSP server matches or LSP formatting/diagnostics times out, file writes still fall back to writing content; diagnostics may be omitted.\n\n## Notes\n- Archive path detection runs before SQLite detection. A path that matches an archive selector is never treated as SQLite.\n- SQLite detection declines when an existing file with a `.sqlite` / `.db` suffix is present but does not have SQLite magic bytes; then the path falls back to a plain file write.\n- ZIP entry content is encoded with `new TextEncoder().encode(content)` in `#writeArchiveEntry()`. Non-ZIP archive writes pass the string directly to `Bun.Archive.write()`.\n- The prompt forbids two common anti-patterns: using `write` for routine edits that should use `edit`, and creating `*.md` / `README` files unless explicitly requested. It also forbids emojis unless requested.\n- Plain file and internal URL writes report `cleanContent.length` as “bytes”, which is UTF-16 code units in JS, not an on-disk byte measurement.\n- `stripWriteContent()` only removes hashline prefixes when the session’s file display mode has `hashLines` enabled; otherwise content is written unchanged.\n",
113
115
  "tree.md": "# `/tree` Command Reference\n\n`/tree` opens the interactive **Session Tree** navigator. It lets you jump to any entry in the current session file and continue from that point.\n\nThis is an in-file leaf move, not a new session export.\n\n## What `/tree` does\n\n- Builds a tree from current session entries (`SessionManager.getTree()`)\n- Opens `TreeSelectorComponent` with keyboard navigation, filters, and search\n- On selection, calls `AgentSession.navigateTree(targetId, { summarize, customInstructions })`\n- Rebuilds visible chat from the new leaf path\n- Optionally prefills editor text when selecting a user/custom message\n\nPrimary implementation:\n\n- `src/slash-commands/builtin-registry.ts` (`/tree`, `/branch` command routing)\n- `src/modes/controllers/input-controller.ts` (keybinding wiring, double-escape behavior)\n- `src/modes/controllers/selector-controller.ts` (tree UI launch + summary prompt flow)\n- `src/modes/components/tree-selector.ts` (navigation, filters, search, labels, rendering)\n- `src/session/agent-session.ts` (`navigateTree` leaf switching + optional summary)\n- `src/session/session-manager.ts` (`getTree`, `branch`, `branchWithSummary`, `resetLeaf`, label persistence)\n\n## How to open it\n\nAny of the following opens the same selector:\n\n- `/tree`\n- configured keybinding for the `app.session.tree` action\n- double-escape on empty editor when `doubleEscapeAction = \"tree\"` (default)\n- `/branch` when `doubleEscapeAction = \"tree\"` (routes to tree selector instead of user-only branch picker)\n\n## Tree UI model\n\nThe tree is rendered from session entry parent pointers (`id` / `parentId`).\n\n- Children are sorted by timestamp ascending (older first, newer lower)\n- Active branch (path from root to current leaf) is marked with a bullet\n- Labels (if present) render as `[label]` before node text\n- If multiple roots exist (orphaned/broken parent chains), they are shown under a virtual branching root\n\n```text\nExample tree view (active path marked with •):\n\n├─ user: \"Start task\"\n│ └─ assistant: \"Plan\"\n│ ├─ • user: \"Try approach A\"\n│ │ └─ • assistant: \"A result\"\n│ │ └─ • [milestone] user: \"Continue A\"\n│ └─ user: \"Try approach B\"\n│ └─ assistant: \"B result\"\n```\n\nThe selector recenters around current selection and shows up to:\n\n- `max(5, floor(terminalHeight / 2))` rows\n\n## Keybindings inside tree selector\n\n- `Up` / `Down`: move selection (wraps)\n- `Left` / `Right`: page up / page down\n- `Enter`: select node\n- `Esc`: clear search if active; otherwise close selector\n- `Ctrl+C`: close selector\n- `Type`: append to search query\n- `Backspace`: delete search character\n- `Shift+L`: edit/clear label on selected entry\n- `Ctrl+O`: cycle filter forward\n- `Shift+Ctrl+O`: cycle filter backward\n- `Alt+D/T/U/L/A`: jump directly to specific filter mode\n\n## Filters and search semantics\n\nFilter modes (`TreeList`):\n\n1. `default`\n2. `no-tools`\n3. `user-only`\n4. `labeled-only`\n5. `all`\n\n### `default`\n\nShows conversational nodes plus any entry types not explicitly suppressed. It hides these setting/bookkeeping entry types:\n\n- `label`\n- `custom`\n- `model_change`\n- `thinking_level_change`\n\nOther internal entry types that are not rendered specially may appear as blank rows in current code.\n\n### `no-tools`\n\nSame as `default`, plus hides `toolResult` messages.\n\n### `user-only`\n\nOnly `message` entries where role is `user`.\n\n### `labeled-only`\n\nOnly entries that currently resolve to a label.\n\n### `all`\n\nEverything in the session tree, including bookkeeping/custom entries.\n\n### Tool-only assistant node behavior\n\nAssistant messages that contain **only tool calls** (no text) are hidden by default in all filtered views unless:\n\n- message is error/aborted (`stopReason` not `stop`/`toolUse`), or\n- it is the current leaf (always kept visible)\n\n### Search behavior\n\n- Query is tokenized by spaces\n- Matching is fuzzy (subsequence) and case-insensitive (`fuzzyMatch`)\n- All tokens must match (AND semantics)\n- Searchable text includes label, role, and type-specific content (message text, branch summary text, custom type, tool command snippets, etc.)\n\n## Selection outcomes (important)\n\n`navigateTree` computes new leaf behavior from selected entry type:\n\n### Selecting `user` message\n\n- New leaf becomes selected entry’s `parentId`\n- If parent is `null` (root user message), leaf resets to root (`resetLeaf()`)\n- Selected message text is copied to editor for editing/resubmit\n\n### Selecting `custom_message`\n\n- Same leaf rule as user messages (`parentId`)\n- Text content is extracted and copied to editor\n\n### Selecting non-user node (assistant/tool/summary/compaction/custom bookkeeping/etc.)\n\n- New leaf becomes selected node id\n- Editor is not prefilled\n\n### Selecting current leaf\n\n- No-op; selector closes with “Already at this point”\n\n```text\nSelection decision (simplified):\n\nselected node\n │\n ├─ is current leaf? ── yes ──> close selector (no-op)\n │\n ├─ is user/custom_message? ── yes ──> leaf := parentId (or resetLeaf for root)\n │ + prefill editor text\n │\n └─ otherwise ──> leaf := selected node id\n + no editor prefill\n```\n\n## Summary-on-switch flow\n\nSummary prompt is controlled by `branchSummary.enabled` (default: `false`).\n\nWhen enabled, after picking a node the UI asks:\n\n- `No summary`\n- `Summarize`\n- `Summarize with custom prompt`\n\nFlow details:\n\n- Escape in summary prompt reopens tree selector\n- Custom prompt cancellation returns to summary choice loop\n- During summarization, UI shows loader and binds `Esc` to `abortBranchSummary()`\n- If summarization aborts, tree selector reopens and no move is applied\n\n`navigateTree` internals:\n\n- Collects abandoned-branch entries from old leaf to common ancestor\n- Emits `session_before_tree` (extensions can cancel or inject summary)\n- Uses default summarizer only if requested and needed\n- Applies move with:\n - `branchWithSummary(...)` when summary exists\n - `branch(newLeafId)` for non-root move without summary\n - `resetLeaf()` for root move without summary\n- Replaces agent conversation with rebuilt session context\n- Emits `session_tree`\n\nNote: if user requests summary but there is nothing to summarize, navigation proceeds without creating a summary entry.\n\n## Labels\n\nLabel edits in tree UI call `appendLabelChange(targetId, label)`.\n\n- non-empty label sets/updates resolved label\n- empty label clears it\n- labels are stored as append-only `label` entries\n- tree nodes display resolved label state, not raw label-entry history\n\n## `/tree` vs adjacent operations\n\n| Operation | Scope | Result |\n| --------- | ------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| `/tree` | Current session file | Moves leaf to selected point (same file) |\n| `/branch` | Usually current session file -> new session file | By default branches from selected **user** message into a new session file; if `doubleEscapeAction = \"tree\"`, `/branch` opens tree navigation UI instead |\n| `/fork` | Whole current session | Duplicates session into a new persisted session file |\n| `/resume` | Session list | Switches to another session file |\n\nKey distinction: `/tree` is a navigation/repositioning tool inside one session file. `/branch`, `/fork`, and `/resume` all change session-file context.\n\n## Operator workflows\n\n### Re-run from an earlier user prompt without losing current branch\n\n1. `/tree`\n2. search/select earlier user message\n3. choose `No summary` (or summarize if needed)\n4. edit prefilled text in editor\n5. submit\n\nEffect: new branch grows from selected point within same session file.\n\n### Leave current branch with context breadcrumb\n\n1. enable `branchSummary.enabled`\n2. `/tree` and select target node\n3. choose `Summarize` (or custom prompt)\n\nEffect: a `branch_summary` entry is appended at the target position before continuing.\n\n### Investigate hidden bookkeeping entries\n\n1. `/tree`\n2. press `Alt+A` (all)\n3. search for `model`, `thinking`, `custom`, or labels\n\nEffect: inspect full internal timeline, not just conversational nodes.\n\n### Bookmark pivot points for later jumps\n\n1. `/tree`\n2. move to entry\n3. `Shift+L` and set label\n4. later use `Alt+L` (`labeled-only`) to jump quickly\n\nEffect: fast navigation among durable branch landmarks.\n",
114
116
  "ttsr-injection-lifecycle.md": "# TTSR Injection Lifecycle\n\nThis document covers the current Time Traveling Stream Rules (TTSR) runtime path from rule discovery to stream interruption, retry injection, extension notifications, and session-state handling.\n\n## Implementation files\n\n- [`../src/sdk.ts`](../packages/coding-agent/src/sdk.ts)\n- [`../src/export/ttsr.ts`](../packages/coding-agent/src/export/ttsr.ts)\n- [`../src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts)\n- [`../src/session/session-manager.ts`](../packages/coding-agent/src/session/session-manager.ts)\n- [`../src/prompts/system/ttsr-interrupt.md`](../packages/coding-agent/src/prompts/system/ttsr-interrupt.md)\n- [`../src/capability/index.ts`](../packages/coding-agent/src/capability/index.ts)\n- [`../src/extensibility/extensions/types.ts`](../packages/coding-agent/src/extensibility/extensions/types.ts)\n- [`../src/extensibility/hooks/types.ts`](../packages/coding-agent/src/extensibility/hooks/types.ts)\n- [`../src/extensibility/custom-tools/types.ts`](../packages/coding-agent/src/extensibility/custom-tools/types.ts)\n- [`../src/modes/controllers/event-controller.ts`](../packages/coding-agent/src/modes/controllers/event-controller.ts)\n\n## 1. Discovery feed and rule registration\n\nAt session creation, `createAgentSession()` loads discovered rules, constructs a `TtsrManager`, and buckets rules through `bucketRules(...)`:\n\n```ts\nconst ttsrSettings = settings.getGroup(\"ttsr\");\nconst ttsrManager = new TtsrManager(ttsrSettings);\nconst rulesResult = await loadCapability<Rule>(ruleCapability.id, { cwd });\nconst { rulebookRules, alwaysApplyRules } = bucketRules(\n rulesResult.items,\n ttsrManager,\n {\n builtinRules: ttsrSettings.builtinRules,\n disabledRules: ttsrSettings.disabledRules,\n },\n);\n```\n\n`bucketRules(...)` drops names listed in `ttsr.disabledRules`, drops embedded `builtin-defaults` rules when `ttsr.builtinRules === false`, registers accepted TTSR rules, and then routes the remaining rules to always-apply/rulebook buckets.\n\n### Pre-registration dedupe behavior\n\n`loadCapability(\"rules\")` deduplicates by `rule.name` with first-wins semantics (higher provider priority first). Shadowed duplicates are removed before TTSR registration.\n\n### `TtsrManager.addRule()` behavior\n\nRegistration is skipped when:\n\n- TTSR is disabled (`ttsr.enabled === false`)\n- both `rule.condition` (regex) and `rule.astCondition` (ast-grep patterns) are absent, or every regex condition fails to compile and there are no AST conditions\n- a rule with the same `rule.name` was already registered in this manager\n- the rule scope excludes all monitored streams\n\nInvalid regex conditions and unreachable scopes are logged as warnings and ignored; session startup continues. If a TTSR rule defines `globs`, those globs are compiled as a global file-path gate for matching.\n\n### AST conditions (`astCondition`)\n\nA rule may carry `astCondition`: a list of [ast-grep](https://ast-grep.github.io/) patterns (OR'd, same as regex `condition`), matched structurally instead of textually. A repeated metavariable inside one pattern requires both occurrences to be equal (`if ($X) clearTimeout($X)` matches but `if ($X) clearTimeout($Y)` does not).\n\nAST conditions only evaluate on **edit/write tool-argument streams** — they need a language, which is inferred from the file extension on the tool's path argument, and they match against the tool's reconstructed source snapshot (`matcherDigest`), not the raw wire delta. Matching is performed in memory by the native `astMatch` engine (no temp files) with Smart strictness. Streams without a usable file path (prose, thinking, path-less tool calls) skip AST conditions entirely. A rule may mix `condition` and `astCondition`; the regex paths keep working on every scope while AST paths apply only to those tool streams.\n\n### Setting gating\n\n`TtsrSettings.enabled` gates the manager: when `ttsr.enabled === false`, `addRule()` refuses registration and `checkDelta()`/`checkSnapshot()`/`checkAstSnapshot()`/`hasRules()`/`hasAstRules()` all return empty/false, so no matching runs.\n\n## 2. Streaming monitor lifecycle\n\nTTSR detection runs inside `AgentSession.#handleAgentEvent`.\n\n### Turn start\n\nOn `turn_start`, the stream buffer is reset:\n\n- `ttsrManager.resetBuffer()`\n\n### During stream (`message_update`)\n\nWhen assistant updates arrive and rules exist:\n\n- monitor `text_delta`, `thinking_delta`, and `toolcall_delta`\n- for tools exposing `matcherDigest` (edit/write), replace the scoped buffer with the reconstructed source snapshot and call `checkSnapshot(snapshot, matchContext)`; otherwise append the delta into a source/tool scoped manager buffer and call `checkDelta(delta, matchContext)` (synchronous regex matching either way)\n- for edit/write tool streams, when `hasAstRules()` is true, `await checkAstSnapshot(snapshot, matchContext)` (asynchronous AST matching)\n\n`checkDelta()`/`checkSnapshot()` iterate registered rules and return all matching rules that pass scope, global path-glob, regex condition, and repeat policy checks. `checkAstSnapshot()` applies the same scope/path/repeat gates, then runs each candidate rule's `astCondition` patterns against the snapshot via the native `astMatch` engine. It is throttled per stream key: an identical consecutive snapshot (common when only non-source arguments change between deltas) is skipped without re-running the matcher. Both paths feed their matches through the same trigger-decision handler.\n\n## 3. Trigger decision and immediate abort path\n\nWhen one or more rules match and at least one matched rule allows interruption:\n\n1. Matched rules are deduplicated into `#pendingTtsrInjections`.\n2. `#ttsrAbortPending = true` and a TTSR resume gate is created.\n3. `agent.abort()` is called immediately.\n4. `ttsr_triggered` event is emitted asynchronously (fire-and-forget).\n5. retry work is scheduled via the post-prompt task scheduler with a 50ms delay.\n\nAbort is not blocked on extension callbacks.\n\n## 4. Retry scheduling, context mode, and reminder injection\n\nAfter the 50ms timeout:\n\n1. `#ttsrAbortPending = false`\n2. read `ttsrManager.getSettings().contextMode`\n3. if `contextMode === \"discard\"`, drop the targeted partial assistant output with `agent.replaceMessages(...slice(0, targetAssistantIndex))`\n4. build injection content from pending rules using `ttsr-interrupt.md` template\n5. append and persist a hidden `custom_message`/runtime custom message with `customType: \"ttsr-injection\"` and `details.rules`\n6. mark those rule names injected, persist a `ttsr_injection` entry, and call `agent.continue()` to retry generation\n\nTemplate payload is:\n\n```xml\n<system-interrupt reason=\"rule_violation\" rule=\"{{name}}\" path=\"{{path}}\">\n...\n{{content}}\n</system-interrupt>\n```\n\nPending injections are cleared after content generation.\n\n### `contextMode` behavior on partial output\n\n- `discard`: partial/aborted assistant message is removed before retry.\n- `keep`: partial assistant output remains in conversation state; reminder is appended after it.\n\n### Non-interrupting matches\n\nNon-interrupting matches split by `matchContext.source`:\n\n- **`source === \"tool\"` (tool-source match).** The rule is bucketed into `#perToolTtsrInjections`, keyed by the matched tool call's `id`. There is **no** deferred follow-up turn and the stream is not aborted. When the tool actually produces a result, the `afterToolCall` hook prepends a rendered `ttsr-tool-reminder.md` block to `ctx.result.content` (a single `text` block inserted ahead of the tool's own content), and persists a `ttsr_injection` entry with the consumed rule names. The template payload is:\n\n ```xml\n <system-reminder reason=\"rule_violation\" rule=\"{{name}}\" path=\"{{path}}\">\n ...\n {{content}}\n </system-reminder>\n ```\n\n- **`source === \"text\"` / `\"thinking\"` (prose-source match).** Behavior is unchanged: the rule is queued in `#pendingTtsrInjections` and, after a successful non-error, non-aborted assistant message, `AgentSession` injects the hidden `ttsr-injection` custom message as a follow-up and schedules continuation.\n\nWithin a single matching batch, each rule is attached to exactly one sibling tool call — if multiple sibling tool calls would satisfy the same rule, deduplication picks one and the others are left untouched. Multiple distinct rules can still fold onto the same tool call.\n\n#### Implications for tool authors and transcript readers\n\n- The tool's own `toolResult` content is preserved verbatim; the reminder is **prepended** as an additional leading text block. Renderers that assume `content[0]` is the tool's primary output must scan past any block whose text begins with `<system-reminder reason=\"rule_violation\"` (or filter on the wrapper tag) to find the real payload.\n- The reminder is in-band on the tool result, not a separate `custom_message`/`ttsr-injection` entry. Transcript readers looking for non-interrupting TTSR activity on tool-source rules MUST inspect tool results (and the persisted `ttsr_injection` entry list), not just synthetic injection entries.\n- A single tool result may carry reminders for several rules concatenated with a blank line between rendered templates.\n- If the assistant message ends with `stopReason === \"aborted\"` or `\"error\"` before the matched tools run, the pending per-tool buckets are cleared — those rules are **not** persisted as injected and remain eligible to re-trigger on a future turn (subject to repeat policy).\n\n## 5. Repeat policy and gap logic\n\n`TtsrManager` tracks `#messageCount` and per-rule `lastInjectedAt`.\n\n### `repeatMode: \"once\"`\n\nA rule can trigger only once after it has an injection record.\n\n### `repeatMode: \"after-gap\"`\n\nA rule can re-trigger only when:\n\n- `messageCount - lastInjectedAt >= repeatGap`\n\n`messageCount` increments on `turn_end`, so gap is measured in completed turns, not stream chunks.\n\n## 6. Event emission and extension/hook surfaces\n\n### Session event\n\n`AgentSessionEvent` includes:\n\n```ts\n{ type: \"ttsr_triggered\"; rules: Rule[] }\n```\n\n### Extension runner\n\n`#emitSessionEvent()` routes the event to:\n\n- extension listeners (`ExtensionRunner.emit({ type: \"ttsr_triggered\", rules })`)\n- local session subscribers\n\n### Hook and custom-tool typing\n\n- extension API exposes `on(\"ttsr_triggered\", ...)`\n- hook API exposes `on(\"ttsr_triggered\", ...)`\n- custom tools receive `onSession({ reason: \"ttsr_triggered\", rules })`\n\n### Interactive-mode rendering difference\n\nInteractive mode uses `session.isTtsrAbortPending` to suppress showing the aborted assistant stop reason as a visible failure during TTSR interruption, and renders a `TtsrNotificationComponent` when the event arrives.\n\n## 7. Persistence and resume state (current implementation)\n\n`SessionManager` persists injected-rule state:\n\n- entry type: `ttsr_injection`\n- append API: `appendTtsrInjection(ruleNames)`\n- query API: `getInjectedTtsrRules()`\n- context reconstruction includes `SessionContext.injectedTtsrRules`\n\n`TtsrManager` supports restoration via `restoreInjected(ruleNames)`.\n\n### Current wiring status\n\nIn the current runtime path:\n\n- interrupted injections append a hidden `custom_message` with `customType: \"ttsr-injection\"` and append a `ttsr_injection` entry via `appendTtsrInjection(...)`\n- deferred non-interrupting prose-source injections are marked/persisted when their queued custom message reaches `message_end`\n- non-interrupting tool-source injections are marked at match time and persisted via `appendTtsrInjection(...)` from the `afterToolCall` hook when the matched tool's result is produced\n- `createAgentSession()` restores `existingSession.injectedTtsrRules` into `ttsrManager`\n\nNet effect: injected-rule suppression is persisted/restored across session reload/resume for the current branch path.\n\n## 8. Race boundaries and ordering guarantees\n\n### Abort vs retry callback\n\n- abort is synchronous from TTSR handler perspective (`agent.abort()` called immediately)\n- retry is deferred by timer (`50ms`)\n- extension notification is asynchronous and intentionally not awaited before abort/retry scheduling\n\n### Multiple matches in same stream window\n\n`checkDelta()` returns all currently matching eligible rules for that scoped buffer. Pending injections are deduplicated by rule name before injection.\n\n### Between abort and continue\n\nDuring the timer window, state can change (user interruption, mode actions, additional events). The retry call is best-effort: `agent.continue()` is awaited in a try/catch; on failure the error is swallowed and the TTSR resume gate is resolved.\n\n## 9. Edge cases summary\n\n- Invalid `condition` regex: skipped with warning; other conditions/rules continue.\n- Duplicate rule names at capability layer: lower-priority duplicates are shadowed before registration.\n- Duplicate names at manager layer: second registration is ignored.\n- `ttsr.disabledRules`: listed names are dropped before TTSR registration and are not surfaced through always-apply/rulebook buckets.\n- `ttsr.builtinRules: false`: embedded `builtin-defaults` rules are dropped before TTSR registration; user/project rules still load.\n- `globs` on a TTSR rule require the stream match context to include at least one matching file path.\n- `contextMode: \"keep\"`: partial violating output can remain in context before reminder retry.\n- `interruptMode: \"never\"`: prose-source matches queue a deferred hidden injection after a successful assistant message; tool-source matches fold an in-band `<system-reminder>` into the matched tool call's `toolResult` content via the `afterToolCall` hook (no mid-stream abort, no separate follow-up turn).\n- Tool-source non-interrupting buckets are cleared when the parent assistant message ends with `stopReason === \"aborted\"` or `\"error\"`, so rules whose target tool never produced a result remain eligible to re-trigger.\n- Repeat-after-gap depends on turn count increments at `turn_end`; mid-turn chunks do not advance gap counters.\n",