@oh-my-pi/pi-coding-agent 15.0.0 → 15.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (165) hide show
  1. package/CHANGELOG.md +79 -0
  2. package/examples/extensions/plan-mode.ts +0 -1
  3. package/package.json +10 -10
  4. package/scripts/build-binary.ts +5 -0
  5. package/src/autoresearch/helpers.ts +17 -0
  6. package/src/autoresearch/tools/log-experiment.ts +9 -17
  7. package/src/autoresearch/tools/run-experiment.ts +2 -17
  8. package/src/capability/skill.ts +7 -0
  9. package/src/cli/list-models.ts +1 -1
  10. package/src/cli/shell-cli.ts +3 -13
  11. package/src/cli/update-cli.ts +1 -1
  12. package/src/cli.ts +10 -29
  13. package/src/commands/commit.ts +10 -0
  14. package/src/commit/agentic/tools/propose-changelog.ts +8 -1
  15. package/src/commit/analysis/conventional.ts +8 -66
  16. package/src/commit/map-reduce/reduce-phase.ts +6 -65
  17. package/src/commit/pipeline.ts +2 -2
  18. package/src/commit/shared-llm.ts +89 -0
  19. package/src/config/config-file.ts +210 -0
  20. package/src/config/model-equivalence.ts +8 -11
  21. package/src/config/model-registry.ts +44 -3
  22. package/src/config/model-resolver.ts +1 -4
  23. package/src/config/settings-schema.ts +82 -1
  24. package/src/config/settings.ts +1 -1
  25. package/src/config.ts +3 -219
  26. package/src/discovery/claude-plugins.ts +19 -7
  27. package/src/edit/renderer.ts +7 -1
  28. package/src/eval/js/executor.ts +3 -0
  29. package/src/eval/js/shared/rewrite-imports.ts +2 -2
  30. package/src/eval/py/executor.ts +5 -0
  31. package/src/eval/py/runner.py +42 -11
  32. package/src/eval/py/runtime.ts +1 -0
  33. package/src/exa/factory.ts +2 -2
  34. package/src/exa/mcp-client.ts +74 -1
  35. package/src/exec/bash-executor.ts +5 -1
  36. package/src/export/html/template.generated.ts +1 -1
  37. package/src/export/html/template.js +0 -11
  38. package/src/extensibility/extensions/get-commands-handler.ts +77 -0
  39. package/src/extensibility/extensions/runner.ts +1 -1
  40. package/src/extensibility/extensions/types.ts +89 -223
  41. package/src/extensibility/hooks/types.ts +89 -314
  42. package/src/extensibility/plugins/legacy-pi-compat.ts +48 -31
  43. package/src/extensibility/shared-events.ts +343 -0
  44. package/src/extensibility/skills.ts +9 -0
  45. package/src/goals/index.ts +3 -0
  46. package/src/goals/runtime.ts +500 -0
  47. package/src/goals/state.ts +37 -0
  48. package/src/goals/tools/goal-tool.ts +237 -0
  49. package/src/hashline/anchors.ts +2 -2
  50. package/src/hashline/input.ts +2 -1
  51. package/src/hashline/parser.ts +27 -3
  52. package/src/hindsight/mental-models.ts +1 -1
  53. package/src/internal-urls/agent-protocol.ts +1 -20
  54. package/src/internal-urls/artifact-protocol.ts +1 -19
  55. package/src/internal-urls/docs-index.generated.ts +11 -12
  56. package/src/internal-urls/registry-helpers.ts +25 -0
  57. package/src/internal-urls/router.ts +8 -0
  58. package/src/internal-urls/types.ts +21 -0
  59. package/src/lsp/config.ts +15 -6
  60. package/src/lsp/defaults.json +6 -2
  61. package/src/main.ts +11 -2
  62. package/src/mcp/oauth-flow.ts +20 -0
  63. package/src/modes/acp/acp-agent.ts +327 -95
  64. package/src/modes/components/assistant-message.ts +14 -8
  65. package/src/modes/components/bash-execution.ts +24 -63
  66. package/src/modes/components/custom-message.ts +14 -40
  67. package/src/modes/components/eval-execution.ts +27 -57
  68. package/src/modes/components/execution-shared.ts +102 -0
  69. package/src/modes/components/hook-message.ts +17 -49
  70. package/src/modes/components/mcp-add-wizard.ts +26 -5
  71. package/src/modes/components/message-frame.ts +88 -0
  72. package/src/modes/components/model-selector.ts +1 -1
  73. package/src/modes/components/session-observer-overlay.ts +6 -2
  74. package/src/modes/components/session-selector.ts +1 -1
  75. package/src/modes/components/status-line/segments.ts +93 -8
  76. package/src/modes/components/status-line/types.ts +4 -0
  77. package/src/modes/components/status-line.ts +28 -10
  78. package/src/modes/components/tool-execution.ts +7 -8
  79. package/src/modes/controllers/command-controller-shared.ts +108 -0
  80. package/src/modes/controllers/command-controller.ts +13 -4
  81. package/src/modes/controllers/event-controller.ts +36 -7
  82. package/src/modes/controllers/extension-ui-controller.ts +3 -2
  83. package/src/modes/controllers/input-controller.ts +13 -0
  84. package/src/modes/controllers/mcp-command-controller.ts +56 -61
  85. package/src/modes/controllers/ssh-command-controller.ts +18 -57
  86. package/src/modes/interactive-mode.ts +624 -52
  87. package/src/modes/print-mode.ts +16 -86
  88. package/src/modes/rpc/host-uris.ts +235 -0
  89. package/src/modes/rpc/rpc-mode.ts +41 -88
  90. package/src/modes/rpc/rpc-types.ts +57 -0
  91. package/src/modes/runtime-init.ts +116 -0
  92. package/src/modes/theme/defaults/dark-poimandres.json +3 -0
  93. package/src/modes/theme/defaults/light-poimandres.json +3 -0
  94. package/src/modes/theme/theme.ts +24 -6
  95. package/src/modes/types.ts +14 -3
  96. package/src/modes/utils/context-usage.ts +13 -13
  97. package/src/modes/utils/ui-helpers.ts +10 -3
  98. package/src/plan-mode/approved-plan.ts +35 -1
  99. package/src/prompts/goals/goal-budget-limit.md +16 -0
  100. package/src/prompts/goals/goal-continuation.md +28 -0
  101. package/src/prompts/goals/goal-mode-active.md +23 -0
  102. package/src/prompts/system/plan-mode-active.md +5 -5
  103. package/src/prompts/system/plan-mode-tool-decision-reminder.md +1 -1
  104. package/src/prompts/tools/bash.md +6 -0
  105. package/src/prompts/tools/github.md +4 -4
  106. package/src/prompts/tools/goal.md +13 -0
  107. package/src/prompts/tools/hashline.md +101 -117
  108. package/src/prompts/tools/read.md +55 -36
  109. package/src/prompts/tools/resolve.md +6 -5
  110. package/src/sdk.ts +12 -5
  111. package/src/session/agent-session.ts +428 -106
  112. package/src/session/blob-store.ts +36 -3
  113. package/src/session/messages.ts +67 -2
  114. package/src/session/session-manager.ts +131 -12
  115. package/src/session/session-storage.ts +33 -15
  116. package/src/session/streaming-output.ts +309 -13
  117. package/src/slash-commands/builtin-registry.ts +18 -0
  118. package/src/ssh/ssh-executor.ts +5 -0
  119. package/src/system-prompt.ts +4 -2
  120. package/src/task/discovery.ts +5 -2
  121. package/src/task/executor.ts +19 -8
  122. package/src/task/index.ts +3 -0
  123. package/src/task/render.ts +21 -15
  124. package/src/task/types.ts +4 -0
  125. package/src/tools/ast-edit.ts +21 -120
  126. package/src/tools/ast-grep.ts +21 -119
  127. package/src/tools/bash-command-fixup.ts +47 -0
  128. package/src/tools/bash-interactive.ts +9 -1
  129. package/src/tools/bash.ts +66 -19
  130. package/src/tools/browser/attach.ts +3 -3
  131. package/src/tools/browser/launch.ts +81 -18
  132. package/src/tools/browser/registry.ts +1 -5
  133. package/src/tools/browser/render.ts +2 -2
  134. package/src/tools/browser/tab-supervisor.ts +51 -14
  135. package/src/tools/conflict-detect.ts +15 -4
  136. package/src/tools/eval.ts +12 -2
  137. package/src/tools/find.ts +20 -38
  138. package/src/tools/gh.ts +44 -10
  139. package/src/tools/index.ts +22 -11
  140. package/src/tools/inspect-image.ts +3 -10
  141. package/src/tools/job.ts +16 -7
  142. package/src/tools/output-meta.ts +202 -37
  143. package/src/tools/path-utils.ts +125 -2
  144. package/src/tools/read.ts +548 -237
  145. package/src/tools/render-utils.ts +92 -0
  146. package/src/tools/renderers.ts +2 -0
  147. package/src/tools/resolve.ts +72 -44
  148. package/src/tools/search.ts +120 -186
  149. package/src/tools/ssh.ts +3 -2
  150. package/src/tools/write.ts +64 -9
  151. package/src/utils/file-mentions.ts +1 -1
  152. package/src/utils/image-loading.ts +7 -3
  153. package/src/utils/image-resize.ts +32 -43
  154. package/src/vim/parser.ts +0 -17
  155. package/src/vim/render.ts +1 -1
  156. package/src/vim/types.ts +1 -1
  157. package/src/web/search/providers/anthropic.ts +5 -0
  158. package/src/web/search/providers/exa.ts +3 -0
  159. package/src/web/search/providers/gemini.ts +40 -95
  160. package/src/web/search/providers/jina.ts +5 -2
  161. package/src/web/search/providers/zai.ts +5 -2
  162. package/src/prompts/tools/exit-plan-mode.md +0 -6
  163. package/src/tools/exit-plan-mode.ts +0 -97
  164. package/src/utils/fuzzy.ts +0 -108
  165. package/src/utils/image-convert.ts +0 -27
@@ -1,6 +1,6 @@
1
1
  // Auto-generated by scripts/generate-docs-index.ts - DO NOT EDIT
2
2
 
3
- export const EMBEDDED_DOC_FILENAMES: readonly string[] = ["ERRATA-GPT5-HARMONY.md","bash-tool-runtime.md","blob-artifact-architecture.md","compaction.md","config-usage.md","custom-tools.md","environment-variables.md","extension-loading.md","extensions.md","fs-scan-cache-architecture.md","gemini-manifest-extensions.md","handoff-generation-pipeline.md","hooks.md","marketplace.md","mcp-config.md","mcp-protocol-transports.md","mcp-runtime-lifecycle.md","mcp-server-tool-authoring.md","memory.md","models.md","natives-addon-loader-runtime.md","natives-architecture.md","natives-binding-contract.md","natives-build-release-debugging.md","natives-media-system-utils.md","natives-rust-task-cancellation.md","natives-shell-pty-process.md","natives-text-search-pipeline.md","non-compaction-retry-policy.md","notebook-tool-runtime.md","plugin-manager-installer-plumbing.md","porting-from-pi-mono.md","porting-to-natives.md","provider-streaming-internals.md","python-repl.md","render-mermaid.md","resolve-tool-runtime.md","rpc.md","rulebook-matching-pipeline.md","sdk.md","secrets.md","session-operations-export-share-fork-resume.md","session-switching-and-recent-listing.md","session-tree-plan.md","session.md","skills.md","skills/authoring-extensions.md","skills/authoring-hooks.md","skills/authoring-marketplaces.md","skills/examples/hello-extension/README.md","skills/examples/mini-marketplace/README.md","skills/examples/safety-hook/README.md","slash-command-internals.md","task-agent-discovery.md","theme.md","tools/ask.md","tools/ast-edit.md","tools/ast-grep.md","tools/bash.md","tools/browser.md","tools/calc.md","tools/checkpoint.md","tools/debug.md","tools/edit.md","tools/eval.md","tools/exit_plan_mode.md","tools/find.md","tools/github.md","tools/inspect_image.md","tools/irc.md","tools/job.md","tools/lsp.md","tools/read.md","tools/recall.md","tools/recipe.md","tools/reflect.md","tools/render_mermaid.md","tools/resolve.md","tools/retain.md","tools/rewind.md","tools/search.md","tools/search_tool_bm25.md","tools/ssh.md","tools/task.md","tools/todo_write.md","tools/web_search.md","tools/write.md","tree.md","ttsr-injection-lifecycle.md","tui-runtime-internals.md","tui.md"];
3
+ export const EMBEDDED_DOC_FILENAMES: readonly string[] = ["ERRATA-GPT5-HARMONY.md","bash-tool-runtime.md","blob-artifact-architecture.md","compaction.md","config-usage.md","custom-tools.md","environment-variables.md","extension-loading.md","extensions.md","fs-scan-cache-architecture.md","gemini-manifest-extensions.md","handoff-generation-pipeline.md","hooks.md","marketplace.md","mcp-config.md","mcp-protocol-transports.md","mcp-runtime-lifecycle.md","mcp-server-tool-authoring.md","memory.md","models.md","natives-addon-loader-runtime.md","natives-architecture.md","natives-binding-contract.md","natives-build-release-debugging.md","natives-media-system-utils.md","natives-rust-task-cancellation.md","natives-shell-pty-process.md","natives-text-search-pipeline.md","non-compaction-retry-policy.md","notebook-tool-runtime.md","plugin-manager-installer-plumbing.md","porting-from-pi-mono.md","porting-to-natives.md","provider-streaming-internals.md","python-repl.md","render-mermaid.md","resolve-tool-runtime.md","rpc.md","rulebook-matching-pipeline.md","sdk.md","secrets.md","session-operations-export-share-fork-resume.md","session-switching-and-recent-listing.md","session-tree-plan.md","session.md","skills.md","skills/authoring-extensions.md","skills/authoring-hooks.md","skills/authoring-marketplaces.md","skills/examples/hello-extension/README.md","skills/examples/mini-marketplace/README.md","skills/examples/safety-hook/README.md","slash-command-internals.md","task-agent-discovery.md","theme.md","tools/ask.md","tools/ast-edit.md","tools/ast-grep.md","tools/bash.md","tools/browser.md","tools/calc.md","tools/checkpoint.md","tools/debug.md","tools/edit.md","tools/eval.md","tools/find.md","tools/github.md","tools/inspect_image.md","tools/irc.md","tools/job.md","tools/lsp.md","tools/read.md","tools/recall.md","tools/recipe.md","tools/reflect.md","tools/render_mermaid.md","tools/resolve.md","tools/retain.md","tools/rewind.md","tools/search.md","tools/search_tool_bm25.md","tools/ssh.md","tools/task.md","tools/todo_write.md","tools/web_search.md","tools/write.md","tree.md","ttsr-injection-lifecycle.md","tui-runtime-internals.md","tui.md"];
4
4
 
5
5
  export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
6
6
  "ERRATA-GPT5-HARMONY.md": "# ERRATA — GPT-5 Harmony-Header Leakage\n\n## 1. The problem\n\nOpenAI frames tool calls in the Harmony chat protocol:\n\n```\n<|start|>assistant<|channel|>commentary to=functions.<NAME><|message|>{ARGS}<|call|>\n```\n\n`<|channel|>commentary to=functions.NAME` is the **routing header** —\ncontrol tokens consumed by the runtime to dispatch the call. These\ntokens never appear as content under normal operation; the runtime\nstrips them.\n\nThe defect: gpt-5 models occasionally emit, **as ordinary content\ninside `{ARGS}`**, the **plain-text shadow** of these routing tokens —\nthe same characters without the `<|…|>` brackets — and continue\nproducing more pseudo-routing structure (channel name, body marker,\nmultilingual spam, fake tool-result framing). The contamination lives\ninside the visible tool argument and is dispatched to the tool as if it\nwere intended content.\n\n**Critical detail.** The actual `<|start|>` / `<|channel|>` /\n`<|message|>` / `<|call|>` special tokens almost never appear in tool\nargs. What leaks is the bracket-less spelling — `analysis to=functions.X\ncode …` — because OpenAI applies a logit mask suppressing the\ncontrol-token IDs inside the args region. The mass that would have gone\nto those special tokens redistributes onto the un-bracketed plain-text\nrepresentation the model also learned. This makes the leak structurally\ninvisible to the routing parser and lands it in the tool input verbatim.\n\nManifestation in tool args (real corpus example):\n\n```\n~ add_function(iso, ctx, ns, \"installSystemChangeObserver\",\n os_install_system_change_observer);】【\"】【analysis to=functions.edit\n code above เงินไทยฟรีuser to=functions.edit code …\n```\n\nThe leading code is real and intended. Everything after the first\nnon-Latin token through the next clean structural boundary is corruption.\n\n---\n\n## 2. Observed statistics & failure modes\n\nSource: `~/.omp/stats.db` (`ss_tool_calls`, `ss_assistant_msgs`), through\n2026-05-10. 1.05M tool calls scanned.\n\n### 2.1 Rate\n\n| Model | Leaks in tool args | Calls | per million |\n|------------------|-------------------:|--------:|------------:|\n| gpt-5.4 | 37 | 226,957 | 163 |\n| gpt-5.3-codex | 17 | 112,243 | 151 |\n| gpt-5.5 | 2 | 80,750 | 25 |\n| gpt-5.2-codex | 0 | — | — |\n\nPlus 15 hits in assistant visible text / thinking blobs.\n\n### 2.2 Tool distribution\n\n| Tool | Hits |\n|---------------------|-----:|\n| `edit` | 38 |\n| `eval` | 11 |\n| `report_tool_issue` | 3 |\n| `grep`/`read`/`search`/`yield` | 1 each |\n\nConcentrated in tools with free-form (non-JSON-schema) argument formats.\n\n### 2.3 Leak shape (deterministic)\n\n```\nLEAK ::= JUNK_PREFIX MARKER CHANNEL_BODY (LEAK)?\nMARKER ::= \"to=functions.\" TOOL_NAME\nCHANNEL_BODY ::= \" code \" (SPAM | reasoning_prose | fake_tool_output)*\nJUNK_PREFIX ::= (GLITCH_TOKEN | CHANNEL_WORD | NON_LATIN_RUN | \"}\" | \"】【\")+\n```\n\n**Cascading is common.** Of 96 marker occurrences across 71 contaminated\nrecords, 39 contain ≥2 markers and 7 contain ≥3 — the model emits\nmultiple fake `to=functions.X code …` blocks back-to-back, often with\nfake `code_output\\nCell N:\\n…` framing between them. Once the\nplain-text scaffolding is in the residual stream, the prefix now *looks\nlike* a fresh tool envelope start, so the macro prior over continuations\nkeeps voting for more scaffolding. Self-amplifying.\n\n### 2.4 Glitch tokens\n\nSingle-token identifiers in `o200k_base` whose embeddings appear to be\nnear-init from underrepresentation in post-training. ASCII residue\nimmediately before the marker in the natural corpus:\n\n| Surface string | Single-token | Token ID | Hits in corpus |\n|-------------------|:-:|---------:|---:|\n| `Japgolly` | ✅ | 199,745 | 1 |\n| `Jsii` | ✅ | 114,318 | (subtoken of `Jsii_commentary`) |\n| `Jsii_commentary` | — (3 toks) | — | 2 |\n| `changedFiles` | — (2 toks) | — | 8 |\n| `RTLU` | — (2 toks) | — | 3 |\n\n`Japgolly` is in the last 0.13% of the vocabulary — the same family of\nGitHub-corpus residue that produced `SolidGoldMagikarp` in the 2023\nGPT-2 vocabulary (Rumbelow & Watkins). `SolidGoldMagikarp` itself\ntokenizes to 5 tokens in `o200k_base` — that specific token was retired,\nbut the class wasn't.\n\nFor the multi-token entries, the corpus-level signature is the surface\nstring; the underlying glitch trigger is a sub-token (e.g. `Jsii` inside\n`Jsii_commentary`). The detector list (`G` signal) keys on the surface\nstrings.\n\nStable across unrelated sessions. Treated as a high-precision detector\nsignal.\n\n### 2.5 Channel-word leakage\n\n`analysis` (5), `assistant` (5), `commentary` (3), `user` (1) appear\ndirectly preceding `to=`. Always bare words; never `<|channel|>analysis`\nor any other bracketed form. Consistent with §1 — the brackets are\nmasked, the words are not.\n\n### 2.6 Non-Latin spam residue\n\n96 marker hits, by script: CJK 40, Cyrillic 12, Telugu/Kannada/Malayalam\n18, Thai 8, Georgian 7, Armenian 7, Arabic 1. Recurring fragments are\nChinese gambling SEO (`大发时时彩`, `天天中彩票`), Georgian/Abkhaz junk,\nand Thai casino spam — well-known low-quality crawl residue.\n\nThis is the same script distribution observed in the controlled\nreproduction (§7.3), independent of the prompt's natural language.\n\n### 2.7 Failure-mode breakdown for the `edit` tool\n\nThe `edit` tool exists in two variants in the corpus:\n\n| Variant | Calls | Recovery |\n|--------------------------|------:|----------|\n| Patch-DSL (`@PATH`/anchor/`~payload`) | 27 | **Recoverable** by op-truncation (§3.3) |\n| JSON-schema (`{path,edits:[…]}`) | 11 | **Not recoverable** — contamination is escaped *inside* JSON strings, parser accepts it cleanly, content would be written verbatim into source files |\n\nFor Patch-DSL leaks specifically:\n\n- 20/27 cases: contamination on the last input line; nothing follows.\n- 7/27 cases: contamination mid-input; what follows is one of: a\n duplicate replay of an earlier file/anchor, intended content for a\n *different* tool call (the model started its next call inline), or\n pure hallucination. Post-contamination content is never trustworthy.\n\n### 2.8 Mechanism (confirmed)\n\n**Prior collapse from null-embedding glitch tokens, into a\ncontrol-token-masked basin whose mass redistributes onto the\nplain-text shadow of the Harmony protocol.**\n\nStep by step:\n\n1. The model is mid-`{ARGS}` of a Harmony tool call. The runtime applies\n a logit mask suppressing structural control tokens (`<|channel|>`,\n `<|message|>`, `<|call|>`, `<|start|>`, `<|end|>`) inside the args\n region. Without this mask, normal generation would constantly\n hallucinate envelope-closes; with it, those token IDs have logit\n `-∞` in args.\n2. A glitch token `g` is sampled. By construction `g` was in the BPE\n merge corpus but barely in LM/RL training, so its **input embedding\n `e_g` ≈ near-init noise of small norm**.\n3. At position t+1, the residual update `h_{t+1} ≈ LN(h_t + e_g + Attn +\n MLP)` is dominated by the prefix-derived terms; the just-emitted-token\n signal is effectively absent. Generation diversity normally comes\n from `e_x` steering the residual into different sub-regions —\n stripped here.\n4. The next-token distribution therefore collapses onto the **conditional\n prior over continuations of the prefix, with local conditioning\n removed**. In a tool-calling rollout context, that prior is sharply\n peaked on Harmony scaffolding (control tokens + routing tokens) —\n that's what RL trained.\n5. The mask zeros the control-token IDs. Mass redistributes onto the\n **next-best continuation**: the un-bracketed surface-form spelling of\n the same protocol (`analysis`, `commentary`, ` to=functions.X`,\n ` code `). This spelling is unmasked because those characters are\n ordinary tokens.\n6. Once a few tokens of plain-text scaffolding land in the residual\n stream, the prefix now resembles a fresh envelope start. The macro\n prior keeps voting for more scaffolding. Cascading (§2.3) follows.\n7. Multilingual spam after the marker is the same prior-collapse\n continuation, drawn from the training neighborhood of the glitch\n token (often ESL/auto-generated multilingual web junk — exactly the\n crawl residue in §2.6).\n\n**Two corollaries the corpus data demanded but only the experiment\nexplained:**\n\n- **The brackets never appear** (§1, §2.5). The mask is what makes the\n leak land in plain text instead of as a real envelope-close.\n- **Counterintuitive grammar dependency** (§7.4). The leak is *worse* in\n formats closest to OpenAI's training distribution. Off-distribution\n custom grammars dampen the macro-prior basin; the official\n `*** Begin Patch` format is the strongest collapse target.\n\nThe 2023 SolidGoldMagikarp paper documented mechanism (1)+(2)+(4). The\nnew piece is (5): when constrained decoding masks the natural collapse\ntarget, the mass laundered through the un-masked plain-text shadow\nbecomes a structurally-invisible exfiltration channel.",
@@ -9,7 +9,7 @@ export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
9
9
  "compaction.md": "# Compaction and Branch Summaries\n\nCompaction and branch summaries are the two mechanisms that keep long sessions usable without losing prior work context.\n\n- **Compaction** rewrites old history into a summary on the current branch.\n- **Branch summary** captures abandoned branch context during `/tree` navigation.\n\nBoth are persisted as session entries and converted back into user-context messages when rebuilding LLM input.\n\n## Key implementation files\n\n- `src/session/compaction/compaction.ts`\n- `src/session/compaction/branch-summarization.ts`\n- `src/session/compaction/pruning.ts`\n- `src/session/compaction/utils.ts`\n- `src/session/session-manager.ts`\n- `src/session/agent-session.ts`\n- `src/session/messages.ts`\n- `src/extensibility/hooks/types.ts`\n- `src/config/settings-schema.ts`\n\n## Session entry model\n\nCompaction and branch summaries are first-class session entries, not plain assistant/user messages.\n\n- `CompactionEntry`\n - `type: \"compaction\"`\n - `summary`, optional `shortSummary`\n - `firstKeptEntryId` (compaction boundary)\n - `tokensBefore`\n - optional `details`, `preserveData`, `fromExtension`\n- `BranchSummaryEntry`\n - `type: \"branch_summary\"`\n - `fromId`, `summary`\n - optional `details`, `fromExtension`\n\nWhen context is rebuilt (`buildSessionContext`):\n\n1. Latest compaction on the active path is converted to one `compactionSummary` message.\n2. Kept entries from `firstKeptEntryId` to the compaction point are re-included.\n3. Later entries on the path are appended.\n4. `branch_summary` entries are converted to `branchSummary` messages.\n5. `custom_message` entries are converted to `custom` messages.\n\nThose custom roles are then transformed into LLM-facing user messages in `convertToLlm()` using the static templates:\n\n- `prompts/compaction/compaction-summary-context.md`\n- `prompts/compaction/branch-summary-context.md`\n\n## Compaction pipeline\n\n### Triggers\n\nCompaction/context maintenance can run in four ways:\n\n1. **Manual context compaction**: `/compact [instructions]` calls `AgentSession.compact(...)`.\n2. **Automatic overflow recovery**: after a same-model assistant error that matches context overflow.\n3. **Automatic threshold maintenance**: after a successful turn when context exceeds the resolved threshold.\n4. **Idle maintenance**: `runIdleCompaction()` can invoke the same auto-maintenance path with reason `\"idle\"`.\n\n### Compaction shape (visual)\n\n```text\nBefore compaction:\n\n entry: 0 1 2 3 4 5 6 7 8 9\n ┌─────┬─────┬─────┬──────┬─────┬─────┬──────┬──────┬─────┬──────┐\n │ hdr │ usr │ ass │ tool │ usr │ ass │ tool │ tool │ ass │ tool │\n └─────┴─────┴─────┴──────┴─────┴─────┴──────┴──────┴─────┴──────┘\n └────────┬───────┘ └──────────────┬──────────────┘\n messagesToSummarize kept messages\n ↑\n firstKeptEntryId (entry 4)\n\nAfter compaction (new entry appended):\n\n entry: 0 1 2 3 4 5 6 7 8 9 10\n ┌─────┬─────┬─────┬──────┬─────┬─────┬──────┬──────┬─────┬──────┬─────┐\n │ hdr │ usr │ ass │ tool │ usr │ ass │ tool │ tool │ ass │ tool │ cmp │\n └─────┴─────┴─────┴──────┴─────┴─────┴──────┴──────┴─────┴──────┴─────┘\n └──────────┬──────┘ └──────────────────────┬───────────────────┘\n not sent to LLM sent to LLM\n ↑\n starts from firstKeptEntryId\n\nWhat the LLM sees:\n\n ┌────────┬─────────┬─────┬─────┬──────┬──────┬─────┬──────┐\n │ system │ summary │ usr │ ass │ tool │ tool │ ass │ tool │\n └────────┴─────────┴─────┴─────┴──────┴──────┴─────┴──────┘\n ↑ ↑ └─────────────────┬────────────────┘\n prompt from cmp messages from firstKeptEntryId\n```\n\n### Overflow-retry vs threshold/idle maintenance\n\nThe automatic paths are intentionally different:\n\n- **Overflow recovery**\n - Trigger: current-model assistant error is detected as context overflow and the error is not older than the latest compaction.\n - The failing assistant error message is removed from active agent state before retry.\n - Context promotion is tried first; if a configured larger model is available, the agent switches model and retries without compacting.\n - If promotion is unavailable and compaction is enabled, context-full compaction runs with `reason: \"overflow\"` and `willRetry: true`; handoff strategy is not used for overflow.\n - On success, agent auto-continues (`agent.continue()`) after compaction.\n\n- **Threshold maintenance**\n - Trigger: successful, non-error assistant message whose adjusted context tokens exceed `resolveThresholdTokens(...)`.\n - Tool-output pruning can reduce the measured token count before threshold comparison.\n - Context promotion is tried before compaction.\n - If promotion is unavailable, auto maintenance runs with `reason: \"threshold\"` and `willRetry: false`.\n - With `compaction.strategy: \"handoff\"`, threshold maintenance starts a new handoff session instead of writing a compaction entry; if handoff returns no document without aborting, it falls back to context-full compaction.\n - On success, if `compaction.autoContinue !== false`, schedules an agent-authored developer auto-continue prompt from `prompts/system/auto-continue.md`.\n\n- **Idle maintenance**\n - Trigger: `runIdleCompaction()` when not streaming or already compacting.\n - Uses `reason: \"idle\"` and does not auto-continue afterward.\n\n### Pre-compaction pruning\n\nBefore compaction checks, tool-result pruning may run (`pruneToolOutputs`).\n\nDefault prune policy:\n\n- Protect newest `40_000` tool-output tokens.\n- Require at least `20_000` total estimated savings.\n- Never prune tool results from `skill` or `read`.\n\nPruned tool results are replaced with:\n\n- `[Output truncated - N tokens]`\n\nIf pruning changes entries, session storage is rewritten and agent message state is refreshed before compaction decisions.\n\n### Boundary and cut-point logic\n\n`prepareCompaction()` only considers entries since the last compaction entry (if any).\n\n1. Find previous compaction index.\n2. Compute `boundaryStart = prevCompactionIndex + 1`.\n3. Adapt `keepRecentTokens` using measured usage ratio when available.\n4. Run `findCutPoint()` over the boundary window.\n\nValid cut points include:\n\n- message entries with roles: `user`, `assistant`, `bashExecution`, `hookMessage`, `branchSummary`, `compactionSummary`\n- `custom_message` entries\n- `branch_summary` entries\n\nHard rule: never cut at `toolResult`.\n\nIf there are non-message metadata entries immediately before the cut point (`model_change`, `thinking_level_change`, labels, etc.), they are pulled into the kept region by moving cut index backward until a message or compaction boundary is hit.\n\n### Split-turn handling\n\nIf cut point is not at a user-turn start, compaction treats it as a split turn.\n\nTurn start detection treats these as user-turn boundaries:\n\n- `message.role === \"user\"`\n- `message.role === \"bashExecution\"`\n- `custom_message` entry\n- `branch_summary` entry\n\nSplit-turn compaction generates two summaries:\n\n1. History summary (`messagesToSummarize`)\n2. Turn-prefix summary (`turnPrefixMessages`)\n\nFinal stored summary is merged as:\n\n```markdown\n<history summary>\n\n---\n\n**Turn Context (split turn):**\n\n<turn prefix summary>\n```\n\n### Summary generation\n\n`compact(...)` builds summaries from serialized conversation text:\n\n1. Convert messages via `convertToLlm()`.\n2. Serialize with `serializeConversation()`.\n3. Wrap in `<conversation>...</conversation>`.\n4. Optionally include `<previous-summary>...</previous-summary>`.\n5. Optionally inject hook context as `<additional-context>` list.\n6. Execute summarization prompt with `SUMMARIZATION_SYSTEM_PROMPT`.\n\nPrompt selection:\n\n- first compaction: `compaction-summary.md`\n- iterative compaction with prior summary: `compaction-update-summary.md`\n- split-turn second pass: `compaction-turn-prefix.md`\n- short UI summary: `compaction-short-summary.md`\n\nRemote summarization modes:\n\n- If `compaction.remoteEndpoint` is set and remote compaction is enabled, local summary generation POSTs:\n - `{ systemPrompt, prompt }`\n- Expects JSON containing at least `{ summary }`.\n- For OpenAI/OpenAI Codex models, compaction first tries the provider-native `/responses/compact` endpoint when remote compaction is enabled. It preserves provider replacement history in `preserveData.openaiRemoteCompaction` and falls back to local summarization if that native request fails.\n\n### File-operation context in summaries\n\nCompaction tracks cumulative file activity using assistant tool calls:\n\n- `read(path)` → read set\n- `write(path)` → modified set\n- `edit(path)` → modified set\n\nCumulative behavior:\n\n- Includes prior compaction details only when prior entry is pi-generated (`fromExtension !== true`).\n- In split turns, includes turn-prefix file ops too.\n- `readFiles` excludes files also modified.\n\nSummary text gets file tags appended via prompt template:\n\n```xml\n<read-files>\n...\n</read-files>\n<modified-files>\n...\n</modified-files>\n```\n\n### Persist and reload\n\nAfter summary generation (or hook-provided summary), agent session:\n\n1. Appends `CompactionEntry` with `appendCompaction(...)` for context-full maintenance; handoff strategy creates a new session and injects a handoff `custom_message` instead.\n2. Rebuilds display context from the active leaf via `buildDisplaySessionContext()`.\n3. Replaces live agent messages with rebuilt context.\n4. Emits `session_compact` hook event.\n\n## Branch summarization pipeline\n\nBranch summarization is tied to tree navigation, not token overflow.\n\n### Trigger\n\nDuring `navigateTree(...)`:\n\n1. Compute abandoned entries from old leaf to common ancestor using `collectEntriesForBranchSummary(...)`.\n2. If caller requested summary (`options.summarize`), generate summary before switching leaf.\n3. If summary exists, attach it at the navigation target using `branchWithSummary(...)`.\n\nOperationally this is commonly driven by `/tree` flow when `branchSummary.enabled` is enabled.\n\n### Branch switch shape (visual)\n\n```text\nTree before navigation:\n\n ┌─ B ─ C ─ D (old leaf, being abandoned)\n A ───┤\n └─ E ─ F (target)\n\nCommon ancestor: A\nEntries to summarize: B, C, D\n\nAfter navigation with summary:\n\n ┌─ B ─ C ─ D ─ [summary of B,C,D]\n A ───┤\n └─ E ─ F (new leaf)\n```\n\n### Preparation and token budget\n\n`generateBranchSummary(...)` computes budget as:\n\n- `tokenBudget = model.contextWindow - branchSummary.reserveTokens`\n\n`prepareBranchEntries(...)` then:\n\n1. First pass: collect cumulative file ops from all summarized entries, including prior pi-generated `branch_summary` details.\n2. Second pass: walk newest → oldest, adding messages until token budget is reached.\n3. Prefer preserving recent context.\n4. May still include large summary entries near budget edge for continuity.\n\nCompaction entries are included as messages (`compactionSummary`) during branch summarization input.\n\n### Summary generation and persistence\n\nBranch summarization:\n\n1. Converts and serializes selected messages.\n2. Wraps in `<conversation>`.\n3. Uses custom instructions if supplied, otherwise `branch-summary.md`.\n4. Calls summarization model with `SUMMARIZATION_SYSTEM_PROMPT`.\n5. Prepends `branch-summary-preamble.md`.\n6. Appends file-operation tags.\n\nResult is stored as `BranchSummaryEntry` with optional details (`readFiles`, `modifiedFiles`).\n\n## Extension and hook touchpoints\n\n### `session_before_compact`\n\nPre-compaction hook.\n\nCan:\n\n- cancel compaction (`{ cancel: true }`)\n- provide full custom compaction payload (`{ compaction: CompactionResult }`)\n\n### `session.compacting`\n\nPrompt/context customization hook for default compaction.\n\nCan return:\n\n- `prompt` (override base summary prompt)\n- `context` (extra context lines injected into `<additional-context>`)\n- `preserveData` (stored on compaction entry)\n\n### `session_compact`\n\nPost-compaction notification with saved `compactionEntry` and `fromExtension` flag.\n\n### `session_before_tree`\n\nRuns on tree navigation before default branch summary generation.\n\nCan:\n\n- cancel navigation\n- provide custom `{ summary: { summary, details } }` used when user requested summarization\n\n### `session_tree`\n\nPost-navigation event exposing new/old leaf and optional summary entry.\n\n## Runtime behavior and failure semantics\n\n- Manual compaction aborts current agent operation first.\n- `abortCompaction()` cancels both manual and auto-compaction controllers.\n- Auto compaction emits start/end session events for UI/state updates.\n- Auto compaction can try multiple model candidates and retry transient failures; long retry delays prefer the next candidate when one is available.\n- Overflow errors are excluded from generic retry path because they are handled by context promotion/compaction.\n- If auto-compaction fails:\n - overflow path emits `Context overflow recovery failed: ...`\n - threshold path emits `Auto-compaction failed: ...`\n- Branch summarization can be cancelled via abort signal (e.g., Escape), returning canceled/aborted navigation result.\n\n## Settings and defaults\n\nFrom `settings-schema.ts`:\n\n- `compaction.enabled` = `true`\n- `compaction.strategy` = `\"context-full\"` (`\"handoff\"` and `\"off\"` are also supported)\n- `compaction.reserveTokens` = `16384`\n- `compaction.keepRecentTokens` = `20000`\n- `compaction.autoContinue` = `true`\n- `compaction.remoteEnabled` = `true`\n- `compaction.remoteEndpoint` = `undefined`\n- `compaction.thresholdPercent` = `-1` and `compaction.thresholdTokens` = `-1`; when no positive override is set, the threshold is `contextWindow - max(15% of contextWindow, reserveTokens)`\n- `compaction.idleEnabled` = `true`\n- `branchSummary.enabled` = `false`\n- `branchSummary.reserveTokens` = `16384`\n\nThese values are consumed at runtime by `AgentSession` and compaction/branch summarization modules.\n",
10
10
  "config-usage.md": "# Configuration Discovery and Resolution\n\nThis document describes how the coding-agent resolves configuration today: which roots are scanned, how precedence works, and how resolved config is consumed by settings, skills, hooks, tools, and extensions.\n\n## Scope\n\nPrimary implementation:\n\n- `packages/coding-agent/src/config.ts`\n- `packages/coding-agent/src/config/settings.ts`\n- `packages/coding-agent/src/config/settings-schema.ts`\n- `packages/coding-agent/src/discovery/builtin.ts`\n- `packages/coding-agent/src/discovery/helpers.ts`\n\nKey integration points:\n\n- `packages/coding-agent/src/capability/index.ts`\n- `packages/coding-agent/src/discovery/index.ts`\n- `packages/coding-agent/src/extensibility/skills.ts`\n- `packages/coding-agent/src/extensibility/hooks/loader.ts`\n- `packages/coding-agent/src/extensibility/custom-tools/loader.ts`\n- `packages/coding-agent/src/extensibility/extensions/loader.ts`\n\n---\n\n## Resolution flow (visual)\n\n```text\n Generic helper order (`config.ts`)\n┌───────────────────────────────────────┐\n│ 1) ~/.omp/agent, ~/.claude, ... │\n│ 2) <cwd>/.omp, <cwd>/.claude, ... │\n└───────────────────────────────────────┘\n │\n ▼\n capability providers enumerate items\n (native provider scans project .omp before user .omp;\n other providers have their own loading rules)\n │\n ▼\n provider priority sort + capability dedup\n │\n ▼\n subsystem-specific consumption\n (settings, skills, hooks, tools, extensions)\n```\n\n## 1) Config roots and source order\n\n## Canonical roots\n\n`src/config.ts` defines a fixed source priority list:\n\n1. `.omp` (native)\n2. `.claude`\n3. `.codex`\n4. `.gemini`\n\nUser-level bases:\n\n- `~/.omp/agent`\n- `~/.claude`\n- `~/.codex`\n- `~/.gemini`\n\nProject-level bases:\n\n- `<cwd>/.omp`\n- `<cwd>/.claude`\n- `<cwd>/.codex`\n- `<cwd>/.gemini`\n\n`CONFIG_DIR_NAME` is `.omp` (`packages/utils/src/dirs.ts`).\n\n## Important constraint\n\nThe generic helpers in `src/config.ts` do **not** include `.pi` in source discovery order.\n\n---\n\n## 2) Core discovery helpers (`src/config.ts`)\n\n## `getConfigDirs(subpath, options)`\n\nReturns ordered entries:\n\n- User-level entries first (by source priority)\n- Then project-level entries (by same source priority)\n\nOptions:\n\n- `user` (default `true`)\n- `project` (default `true`)\n- `cwd` (default `getProjectDir()`)\n- `existingOnly` (default `false`)\n\nThis API is used for directory-based config lookups (commands, hooks, tools, agents, etc.).\n\n## `findConfigFile(subpath, options)` / `findConfigFileWithMeta(...)`\n\nSearches for the first existing file across ordered bases, returns first match (path-only or path+metadata).\n\n## `findAllNearestProjectConfigDirs(subpath, cwd)`\n\nWalks parent directories upward and returns the **nearest existing directory per source base** (`.omp`, `.claude`, `.codex`, `.gemini`), then sorts results by source priority.\n\nUse this when project config should be inherited from ancestor directories (monorepo/nested workspace behavior).\n\n---\n\n## 3) File config wrapper (`ConfigFile<T>` in `src/config.ts`)\n\n`ConfigFile<T>` is the schema-validated loader for single config files.\n\nSupported formats:\n\n- `.yml` / `.yaml`\n- `.json` / `.jsonc`\n\nBehavior:\n\n- Validates parsed data with AJV against a provided TypeBox schema.\n- Caches load result until `invalidate()`.\n- Returns tri-state result via `tryLoad()`:\n - `ok`\n - `not-found`\n - `error` (`ConfigError` with schema/parse context)\n\nLegacy migration still supported:\n\n- If target path is `.yml`/`.yaml`, a sibling `.json` is auto-migrated once (`migrateJsonToYml`).\n\n---\n\n## 4) Settings resolution model (`src/config/settings.ts`)\n\nThe runtime settings model is layered:\n\n1. Global settings: `~/.omp/agent/config.yml`\n2. Project settings: discovered via settings capability (`settings.json` from providers)\n3. Runtime overrides: in-memory, non-persistent\n4. Schema defaults: from `SETTINGS_SCHEMA`\n\nEffective read path:\n\n`defaults <- global <- project <- overrides`\n\nWrite behavior:\n\n- `settings.set(...)` writes to the **global** layer (`config.yml`) and queues background save.\n- Project settings are read-only from capability discovery.\n\n## Migration behavior still active\n\nOn startup, if `config.yml` is missing:\n\n1. Migrate from `~/.omp/agent/settings.json` (renamed to `.bak` on success)\n2. Merge with legacy DB settings from `agent.db`\n3. Write merged result to `config.yml`\n\nField-level migrations in `#migrateRawSettings`:\n\n- `queueMode` -> `steeringMode`\n- `ask.timeout` milliseconds -> seconds when old value looks like ms (`> 1000`)\n- Legacy flat `theme: \"...\"` -> `theme.dark/theme.light` structure\n\n---\n\n## 5) Capability/discovery integration\n\nMost non-core config loading flows through the capability registry (`src/capability/index.ts` + `src/discovery/index.ts`).\n\n## Provider ordering\n\nProviders are sorted by numeric priority (higher first). Example priorities:\n\n- Native OMP (`builtin.ts`): `100`\n- Claude: `80`\n- Codex / agents / Claude marketplace: `70`\n- Gemini: `60`\n\n```text\nProvider precedence (higher wins)\n\nnative (.omp) priority 100\nclaude priority 80\ncodex / agents / ... priority 70\ngemini priority 60\n```\n\n## Dedup semantics\n\nCapabilities define a `key(item)`:\n\n- same key => first item wins (higher-priority/earlier-loaded item)\n- no key (`undefined`) => no dedup, all items retained\n\nRelevant keys:\n\n- skills: `name`\n- tools: `name`\n- hooks: `${type}:${tool}:${name}`\n- extension modules: `name`\n- extensions: `name`\n- settings: no dedup (all items preserved)\n\n---\n\n## 6) Native `.omp` provider behavior (`packages/coding-agent/src/discovery/builtin.ts`)\n\nNative provider (`id: native`) reads native config from:\n\n- project: `<cwd>/.omp/...`\n- user: `~/.omp/agent/...`\n\n### Directory admission rules\n\n- Slash commands, rules, prompts, instructions, hooks, tools, extensions, extension modules, and settings use a project/user root only when the root directory exists and is non-empty.\n- Skills scan `<ancestor>/.omp/skills` for each ancestor from the current working directory up to the repo root/home boundary, plus `~/.omp/agent/skills`, without requiring the root `.omp` directory itself to be non-empty.\n- `SYSTEM.md` and `AGENTS.md` read user-level files directly and use nearest-ancestor project `.omp` lookup for project files, but the project `.omp` directory must be non-empty.\n\n### Scope-specific loading\n\n- Skills: `<ancestor>/.omp/skills/*/SKILL.md` and `~/.omp/agent/skills/*/SKILL.md`\n- Slash commands: `commands/*.md`\n- Rules: `rules/*.{md,mdc}`\n- Prompts: `prompts/*.md`\n- Instructions: `instructions/*.md`\n- Hooks: `hooks/pre/*`, `hooks/post/*`\n- Tools: `tools/*.{json,md,ts,js,sh,bash,py}` and `tools/<name>/index.ts`\n- Extension modules: discovered under `extensions/` (+ legacy `settings.json.extensions` string array)\n- Extensions: `extensions/<name>/gemini-extension.json`\n- Settings capability: `settings.json`\n\n### Nearest-project lookup nuance\n\n## For `SYSTEM.md` and `AGENTS.md`, native provider uses nearest-ancestor project `.omp` directory search (walk-up) and still requires the project `.omp` dir to be non-empty.\n\n## 7) How major subsystems consume config\n\n## Settings subsystem\n\n- `Settings.init()` loads global `config.yml` + discovered project `settings.json` capability items.\n- Only capability items with `level === \"project\"` are merged into project layer.\n\n## Skills subsystem\n\n- `extensibility/skills.ts` loads via `loadCapability(skillCapability.id, { cwd })`.\n- Applies source toggles and filters (`ignoredSkills`, `includeSkills`, custom dirs).\n- Legacy-named toggles still exist (`skills.enablePiUser`, `skills.enablePiProject`) but they gate the native provider (`provider === \"native\"`).\n\n## Hooks subsystem\n\n- `discoverAndLoadHooks()` resolves hook paths from hook capability + explicit configured paths.\n- Then loads modules via Bun import.\n\n## Tools subsystem\n\n- `discoverAndLoadCustomTools()` resolves tool paths from tool capability + plugin tool paths + explicit configured paths.\n- Declarative `.md/.json` tool files are metadata only; executable loading expects code modules.\n\n## Extensions subsystem\n\n- `discoverAndLoadExtensions()` resolves extension modules from extension-module capability plus explicit paths.\n- Current implementation intentionally keeps only capability items with `_source.provider === \"native\"` before loading.\n\n---\n\n## 8) Precedence rules to rely on\n\nUse this mental model:\n\n1. Source directory ordering from `config.ts` determines candidate path order.\n2. Capability provider priority determines cross-provider precedence.\n3. Capability key dedup determines collision behavior (first wins for keyed capabilities).\n4. Subsystem-specific merge logic can further change effective precedence (especially settings).\n\n### Settings-specific caveat\n\nSettings capability items are not deduplicated; `Settings.#loadProjectSettings()` deep-merges project items in returned order. Because merge applies later item values over earlier values, effective override behavior depends on provider emission order, not just capability key semantics.\n\n---\n\n## 9) Legacy/compatibility behaviors still present\n\n- `ConfigFile` JSON -> YAML migration for YAML-targeted files.\n- Settings migration from `settings.json` and `agent.db` to `config.yml`.\n- Settings key migrations (`queueMode`, `ask.timeout`, flat `theme`, `task.isolation.enabled`, `statusLine.plan_mode`).\n- Legacy setting names `skills.enablePiUser` / `skills.enablePiProject` are still active gates for native skill source.\n\nIf these compatibility paths are removed in code, update this document immediately; several runtime behaviors still depend on them today.\n",
11
11
  "custom-tools.md": "# Custom Tools\n\nCustom tools are model-callable functions that plug into the same tool execution pipeline as built-in tools.\n\nA custom tool is a TypeScript/JavaScript module that exports a factory. The factory receives a host API (`CustomToolAPI`) and returns one tool or an array of tools.\n\n## What this is (and is not)\n\n- **Custom tool**: callable by the model during a turn (`execute` + TypeBox schema).\n- **Extension**: lifecycle/event framework that can register tools and intercept/modify events.\n- **Hook**: external pre/post command scripts.\n- **Skill**: static guidance/context package, not executable tool code.\n\nIf you need the model to call code directly, use a custom tool.\n\n## Integration paths in current code\n\nThere are two active integration styles:\n\n1. **SDK-provided custom tools** (`options.customTools`)\n - Wrapped into agent tools via `CustomToolAdapter` or extension wrappers.\n - Always included in the initial active tool set in SDK bootstrap.\n\n2. **Filesystem-discovered modules via loader API** (`discoverAndLoadCustomTools` / `loadCustomTools`)\n - Exposed as library APIs in `src/extensibility/custom-tools/loader.ts`.\n - Host code can call these to discover and load tool modules from config/provider/plugin paths.\n\n```text\nModel tool call flow\n\nLLM tool call\n │\n ▼\nTool registry (built-ins + custom tool adapters)\n │\n ▼\nCustomTool.execute(toolCallId, params, onUpdate, ctx, signal)\n │\n ├─ onUpdate(...) -> streamed partial result\n └─ return result -> final tool content/details\n```\n\n## Discovery locations (loader API)\n\n`discoverAndLoadCustomTools(configuredPaths, cwd, builtInToolNames)` merges:\n\n1. Capability providers (`toolCapability`), including:\n - Native OMP config (`~/.omp/agent/tools`, `.omp/tools`)\n - Claude config (`~/.claude/tools`, `.claude/tools`)\n - Codex config (`~/.codex/tools`, `.codex/tools`)\n - Claude marketplace plugin cache provider\n2. Installed plugin manifests (`~/.omp/plugins/node_modules/*` via plugin loader)\n3. Explicit configured paths passed to the loader\n\n### Important behavior\n\n- Duplicate resolved paths are deduplicated.\n- Tool name conflicts are rejected against built-ins and already-loaded custom tools.\n- `.md` and `.json` files are discovered as tool metadata by some providers, but the executable module loader rejects them as runnable tools.\n- Relative configured paths are resolved from `cwd`; `~` is expanded.\n\n## Module contract\n\nA custom tool module must export a function (default export preferred):\n\n```ts\nimport type { CustomToolFactory } from \"@oh-my-pi/pi-coding-agent\";\n\nconst factory: CustomToolFactory = (pi) => ({\n name: \"repo_stats\",\n label: \"Repo Stats\",\n description: \"Counts tracked TypeScript files\",\n parameters: pi.typebox.Type.Object({\n glob: pi.typebox.Type.Optional(\n pi.typebox.Type.String({ default: \"**/*.ts\" }),\n ),\n }),\n\n async execute(toolCallId, params, onUpdate, ctx, signal) {\n onUpdate?.({\n content: [{ type: \"text\", text: \"Scanning files...\" }],\n details: { phase: \"scan\" },\n });\n\n const result = await pi.exec(\n \"git\",\n [\"ls-files\", params.glob ?? \"**/*.ts\"],\n { signal, cwd: pi.cwd },\n );\n if (result.killed) {\n throw new Error(\"Scan was cancelled\");\n }\n if (result.code !== 0) {\n throw new Error(result.stderr || \"git ls-files failed\");\n }\n\n const files = result.stdout.split(\"\\n\").filter(Boolean);\n return {\n content: [{ type: \"text\", text: `Found ${files.length} files` }],\n details: { count: files.length, sample: files.slice(0, 10) },\n };\n },\n\n onSession(event) {\n if (event.reason === \"shutdown\") {\n // cleanup resources if needed\n }\n },\n});\n\nexport default factory;\n```\n\nFactory return type:\n\n- `CustomTool`\n- `CustomTool[]`\n- `Promise<CustomTool | CustomTool[]>`\n\n## API surface passed to factories (`CustomToolAPI`)\n\nFrom `types.ts` and `loader.ts`:\n\n- `cwd`: host working directory\n- `exec(command, args, options?)`: process execution helper\n- `ui`: UI context (can be no-op in headless modes)\n- `hasUI`: `false` in non-interactive flows\n- `logger`: shared file logger\n- `typebox`: injected `@sinclair/typebox`\n- `pi`: injected `@oh-my-pi/pi-coding-agent` exports\n- `pushPendingAction(action)`: register a preview action for hidden `resolve` tool (`docs/resolve-tool-runtime.md`)\n\nLoader starts with a no-op UI context and requires host code to call `setUIContext(...)` when real UI is ready.\n\n## Execution contract and typing\n\n`CustomTool.execute` signature:\n\n```ts\nexecute(toolCallId, params, onUpdate, ctx, signal);\n```\n\n- `params` is statically typed from your TypeBox schema via `Static<TParams>`.\n- Runtime argument validation happens before execution in the agent loop.\n- `onUpdate` emits partial results for UI streaming.\n- `ctx` includes session/model state and an `abort()` helper.\n- `signal` carries cancellation.\n\n`CustomToolAdapter` bridges this to the agent tool interface and forwards calls in the correct argument order.\n\n## How tools are exposed to the model\n\n- Tools are wrapped into `AgentTool` instances (`CustomToolAdapter` or extension wrappers).\n- They are inserted into the session tool registry by name.\n- In SDK bootstrap, custom and extension-registered tools are force-included in the initial active set.\n- CLI `--tools` currently validates only built-in tool names; custom tool inclusion is handled through discovery/registration paths and SDK options.\n\n## Rendering hooks\n\nOptional rendering hooks:\n\n- `renderCall(args, options, theme)`\n- `renderResult(result, options, theme, args?)`\n\nRuntime behavior in TUI:\n\n- If hooks exist, tool output is rendered inside a `Box` container.\n- `renderResult` receives `{ expanded, isPartial, spinnerFrame? }`.\n- Renderer errors are caught and logged; UI falls back to default text rendering.\n\n## Session/state handling\n\nOptional `onSession(event, ctx)` receives session lifecycle events, including:\n\n- `start`, `switch`, `branch`, `tree`, `shutdown`\n- `auto_compaction_start`, `auto_compaction_end`\n- `auto_retry_start`, `auto_retry_end`\n- `ttsr_triggered`, `todo_reminder`\n\nUse `ctx.sessionManager` to reconstruct state from history when branch/session context changes.\n\n## Failures and cancellation semantics\n\n### Synchronous/async failures\n\n- Throwing (or rejected promises) in `execute` is treated as tool failure.\n- Agent runtime converts failures into tool result messages with `isError: true` and error text content.\n- With extension wrappers, `tool_result` handlers can further rewrite content/details and even override error status.\n\n### Cancellation\n\n- Agent abort propagates through `AbortSignal` to `execute`.\n- Forward `signal` to subprocess work (`pi.exec(..., { signal })`) for cooperative cancellation.\n- `ctx.abort()` lets a tool request abort of the current agent operation.\n\n### onSession errors\n\n- `onSession` errors are caught and logged as warnings; they do not crash the session.\n\n## Real constraints to design for\n\n- Tool names must be globally unique in the active registry.\n- Prefer deterministic, schema-shaped outputs in `details` for renderer/state reconstruction.\n- Guard UI usage with `pi.hasUI`.\n- Treat `.md`/`.json` in tool directories as metadata, not executable modules.\n",
12
- "environment-variables.md": "# Environment Variables (Current Runtime Reference)\n\nThis reference is derived from current code paths in:\n\n- `packages/coding-agent/src/**`\n- `packages/ai/src/**` (provider/auth resolution used by coding-agent)\n- `packages/utils/src/**` and `packages/tui/src/**` where those vars directly affect coding-agent runtime\n\nIt documents only active behavior.\n\n## Resolution model and precedence\n\nMost runtime lookups use `$env` from `@oh-my-pi/pi-utils` (`packages/utils/src/env.ts`).\n\n`$env` loading order:\n\n1. Existing process environment (`Bun.env`)\n2. Project `.env` (`$PWD/.env`) for keys not already set\n3. Agent `.env` (`~/.omp/agent/.env`, respecting `PI_CONFIG_DIR` / `PI_CODING_AGENT_DIR`) for keys not already set\n4. Config-root `.env` (`~/.omp/.env`, respecting `PI_CONFIG_DIR`) for keys not already set\n5. Home `.env` (`~/.env`) for keys not already set\n\nAdditional rule inside each `.env` file: `OMP_*` keys are mirrored to `PI_*` keys in that parsed file.\n\n---\n\n## 1) Model/provider authentication\n\nThese are consumed via `getEnvApiKey()` (`packages/ai/src/stream.ts`) unless noted otherwise.\n\n### Core provider credentials\n\n| Variable | Used for | Required when | Notes / precedence |\n| ------------------------------- | ------------------------------------------------ | -------------------------------------------------------------- | --------------------------------------------------------------------------------------------------- |\n| `ANTHROPIC_OAUTH_TOKEN` | Anthropic API auth | Using Anthropic with OAuth token auth | Takes precedence over `ANTHROPIC_API_KEY` for provider auth resolution |\n| `ANTHROPIC_API_KEY` | Anthropic API auth | Using Anthropic without OAuth token | Fallback after `ANTHROPIC_OAUTH_TOKEN` |\n| `ANTHROPIC_FOUNDRY_API_KEY` | Anthropic via Azure Foundry / enterprise gateway | `CLAUDE_CODE_USE_FOUNDRY` enabled | Takes precedence over `ANTHROPIC_OAUTH_TOKEN` and `ANTHROPIC_API_KEY` when Foundry mode is enabled |\n| `OPENAI_API_KEY` | OpenAI auth | Using OpenAI-family providers without explicit apiKey argument | Used by OpenAI Completions/Responses providers |\n| `GEMINI_API_KEY` | Google Gemini auth | Using `google` provider models | Primary key for Gemini provider mapping |\n| `GOOGLE_API_KEY` | Gemini image tool auth fallback | Using `gemini_image` tool without `GEMINI_API_KEY` | Used by coding-agent image tool fallback path |\n| `GROQ_API_KEY` | Groq auth | Using Groq models | |\n| `CEREBRAS_API_KEY` | Cerebras auth | Using Cerebras models | |\n| `FIREWORKS_API_KEY` | Fireworks auth | Using Fireworks models | |\n| `TOGETHER_API_KEY` | Together auth | Using `together` provider | |\n| `HUGGINGFACE_HUB_TOKEN` | Hugging Face auth | Using `huggingface` provider | Primary Hugging Face token env var |\n| `HF_TOKEN` | Hugging Face auth | Using `huggingface` provider | Fallback when `HUGGINGFACE_HUB_TOKEN` is unset |\n| `SYNTHETIC_API_KEY` | Synthetic auth | Using Synthetic models | |\n| `NVIDIA_API_KEY` | NVIDIA auth | Using `nvidia` provider | |\n| `NANO_GPT_API_KEY` | NanoGPT auth | Using `nanogpt` provider | |\n| `VENICE_API_KEY` | Venice auth | Using `venice` provider | |\n| `LITELLM_API_KEY` | LiteLLM auth | Using `litellm` provider | OpenAI-compatible LiteLLM proxy key |\n| `LM_STUDIO_API_KEY` | LM Studio auth (optional) | Using `lm-studio` provider with authenticated hosts | Local LM Studio usually runs without auth; any non-empty token works when a key is required |\n| `OLLAMA_API_KEY` | Ollama auth (optional) | Using `ollama` provider with authenticated hosts | Local Ollama usually runs without auth; any non-empty token works when a key is required |\n| `LLAMA_CPP_API_KEY` | llama.cpp auth (optional) | Using `llama.cpp` provider with authenticated hosts | Local llama.cpp usually runs without auth; any non-empty token works when a key is configured |\n| `XIAOMI_API_KEY` | Xiaomi MiMo auth | Using `xiaomi` provider | |\n| `MOONSHOT_API_KEY` | Moonshot auth | Using `moonshot` provider | |\n| `XAI_API_KEY` | xAI auth | Using xAI models | |\n| `OPENROUTER_API_KEY` | OpenRouter auth | Using OpenRouter models | Also used by image tool when preferred/auto provider is OpenRouter |\n| `MISTRAL_API_KEY` | Mistral auth | Using Mistral models | |\n| `ZAI_API_KEY` | z.ai auth | Using z.ai models | Also used by z.ai web search provider |\n| `MINIMAX_API_KEY` | MiniMax auth | Using `minimax` provider | |\n| `MINIMAX_CODE_API_KEY` | MiniMax Code auth | Using `minimax-code` provider | |\n| `MINIMAX_CODE_CN_API_KEY` | MiniMax Code CN auth | Using `minimax-code-cn` provider | |\n| `OPENCODE_API_KEY` | OpenCode auth | Using `opencode-go` / `opencode-zen` models | |\n| `QIANFAN_API_KEY` | Qianfan auth | Using `qianfan` provider | |\n| `QWEN_OAUTH_TOKEN` | Qwen Portal auth | Using `qwen-portal` with OAuth token | Takes precedence over `QWEN_PORTAL_API_KEY` |\n| `QWEN_PORTAL_API_KEY` | Qwen Portal auth | Using `qwen-portal` with API key | Fallback after `QWEN_OAUTH_TOKEN` |\n| `ZENMUX_API_KEY` | ZenMux auth | Using `zenmux` provider | Used for ZenMux OpenAI and Anthropic-compatible routes |\n| `VLLM_API_KEY` | vLLM auth/discovery opt-in | Using `vllm` provider (local OpenAI-compatible servers) | Any non-empty value works for no-auth local servers |\n| `CURSOR_ACCESS_TOKEN` | Cursor provider auth | Using Cursor provider | |\n| `AI_GATEWAY_API_KEY` | Vercel AI Gateway auth | Using `vercel-ai-gateway` provider | |\n| `CLOUDFLARE_AI_GATEWAY_API_KEY` | Cloudflare AI Gateway auth | Using `cloudflare-ai-gateway` provider | Base URL must be configured as `https://gateway.ai.cloudflare.com/v1/<account>/<gateway>/anthropic` |\n| `ALIBABA_CODING_PLAN_API_KEY` | Alibaba Coding Plan auth | Using `alibaba-coding-plan` provider | |\n| `DEEPSEEK_API_KEY` | DeepSeek auth | Using DeepSeek models | |\n| `KILO_API_KEY` | Kilo auth | Using Kilo models | |\n| `OLLAMA_CLOUD_API_KEY` | Ollama Cloud auth | Using `ollama-cloud` provider | |\n| `GITLAB_TOKEN` | GitLab Duo auth | Using `gitlab-duo` provider | |\n\n### GitHub/Copilot token chains\n\n| Variable | Used for | Chain |\n| ---------------------- | ------------------------------------------------ | ---------------------------------------------------- |\n| `COPILOT_GITHUB_TOKEN` | GitHub Copilot provider auth | `COPILOT_GITHUB_TOKEN` → `GH_TOKEN` → `GITHUB_TOKEN` |\n| `GH_TOKEN` | Copilot fallback; GitHub API auth in web scraper | In web scraper: `GITHUB_TOKEN` → `GH_TOKEN` |\n| `GITHUB_TOKEN` | Copilot fallback; GitHub API auth in web scraper | In web scraper: checked before `GH_TOKEN` |\n\n---\n\n## 2) Provider-specific runtime configuration\n\n### Anthropic Foundry Gateway (Azure / enterprise proxy)\n\nWhen `CLAUDE_CODE_USE_FOUNDRY` is enabled, Anthropic requests switch to Foundry mode:\n\n- Base URL resolves from `FOUNDRY_BASE_URL` (fallback remains model/default base URL if unset).\n- API key resolution for provider `anthropic` becomes:\n `ANTHROPIC_FOUNDRY_API_KEY` → `ANTHROPIC_OAUTH_TOKEN` → `ANTHROPIC_API_KEY`.\n- `ANTHROPIC_CUSTOM_HEADERS` is parsed as comma/newline-separated `key: value` pairs and merged into request headers.\n- TLS client/server material can be injected from env values:\n `NODE_EXTRA_CA_CERTS`, `CLAUDE_CODE_CLIENT_CERT`, `CLAUDE_CODE_CLIENT_KEY`.\n Each accepts either:\n - a filesystem path to PEM content, or\n - inline PEM (including escaped `\\n` sequences).\n\n| Variable | Value type | Behavior |\n| --------------------------- | ---------------------------------------------- | ----------------------------------------------------------------------------- |\n| `CLAUDE_CODE_USE_FOUNDRY` | Boolean-like string (`1`, `true`, `yes`, `on`) | Enables Foundry mode for Anthropic provider |\n| `FOUNDRY_BASE_URL` | URL string | Anthropic endpoint base URL in Foundry mode |\n| `ANTHROPIC_FOUNDRY_API_KEY` | Token string | Used for `Authorization: Bearer <token>` |\n| `ANTHROPIC_CUSTOM_HEADERS` | Header list string | Extra headers; format `header-a: value, header-b: value` or newline-separated |\n| `NODE_EXTRA_CA_CERTS` | PEM path or inline PEM | Extra CA chain for server certificate validation |\n| `CLAUDE_CODE_CLIENT_CERT` | PEM path or inline PEM | mTLS client certificate |\n| `CLAUDE_CODE_CLIENT_KEY` | PEM path or inline PEM | mTLS client private key (must be paired with cert) |\n\n### Amazon Bedrock\n\n| Variable | Default / behavior |\n| ------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------- |\n| `AWS_REGION` | Primary region source |\n| `AWS_DEFAULT_REGION` | Fallback if `AWS_REGION` unset |\n| `AWS_PROFILE` | Enables named profile auth path |\n| `AWS_ACCESS_KEY_ID` + `AWS_SECRET_ACCESS_KEY` | Enables IAM key auth path |\n| `AWS_BEARER_TOKEN_BEDROCK` | Enables bearer token auth path |\n| `AWS_CONTAINER_CREDENTIALS_RELATIVE_URI` / `AWS_CONTAINER_CREDENTIALS_FULL_URI` | Enables ECS task credential path |\n| `AWS_WEB_IDENTITY_TOKEN_FILE` + `AWS_ROLE_ARN` | Enables web identity auth path |\n| `AWS_BEDROCK_SKIP_AUTH` | If `1`, injects dummy credentials (proxy/non-auth scenarios) |\n| `AWS_BEDROCK_FORCE_HTTP1` | If `1`, forces Node HTTP/1 request handler |\n| `HTTPS_PROXY` / `HTTP_PROXY` / `ALL_PROXY` | Routes Bedrock runtime and AWS SSO credential calls through the configured proxy using HTTP/1 |\n| `NO_PROXY` | Excludes matching hosts from proxy routing when a proxy variable is configured |\n\nRegion fallback in provider code: `options.region` → `AWS_REGION` → `AWS_DEFAULT_REGION` → `us-east-1`.\n\n### Azure OpenAI Responses\n\n| Variable | Default / behavior |\n| ---------------------------------- | --------------------------------------------------------------------------- |\n| `AZURE_OPENAI_API_KEY` | Required unless API key passed as option |\n| `AZURE_OPENAI_API_VERSION` | Default `v1` |\n| `AZURE_OPENAI_BASE_URL` | Direct base URL override |\n| `AZURE_OPENAI_RESOURCE_NAME` | Used to construct base URL: `https://<resource>.openai.azure.com/openai/v1` |\n| `AZURE_OPENAI_DEPLOYMENT_NAME_MAP` | Optional mapping string: `modelId=deploymentName,model2=deployment2` |\n\nBase URL resolution: option `azureBaseUrl` → env `AZURE_OPENAI_BASE_URL` → option/env resource name → `model.baseUrl`.\n\n### Google Vertex AI\n\n| Variable | Required? | Notes |\n| -------------------------------- | ------------------------------ | ------------------------------------------------------------------------------------------------------------------------- |\n| `GOOGLE_CLOUD_PROJECT` | Yes (unless passed in options) | Fallback: `GCLOUD_PROJECT` |\n| `GCLOUD_PROJECT` | Fallback | Used as alternate project ID source |\n| `GOOGLE_CLOUD_PROJECT_ID` | OAuth login helper only | Used by Gemini CLI OAuth project discovery |\n| `GOOGLE_CLOUD_LOCATION` | Yes (unless passed in options) | No default in provider |\n| `GOOGLE_CLOUD_API_KEY` | Conditional | Direct Vertex API-key auth; otherwise ADC fallback can authenticate when project and location are set |\n| `GOOGLE_APPLICATION_CREDENTIALS` | Conditional | If set, file must exist; otherwise ADC fallback path is checked (`~/.config/gcloud/application_default_credentials.json`) |\n\n### Kimi\n\n| Variable | Default / behavior |\n| ---------------------- | -------------------------------------------------------- |\n| `KIMI_CODE_OAUTH_HOST` | Primary OAuth host override |\n| `KIMI_OAUTH_HOST` | Fallback OAuth host override |\n| `KIMI_CODE_BASE_URL` | Overrides Kimi usage endpoint base URL (`usage/kimi.ts`) |\n\nOAuth host chain: `KIMI_CODE_OAUTH_HOST` → `KIMI_OAUTH_HOST` → `https://auth.kimi.com`.\n\n### Gemini CLI compatibility\n\n| Variable | Default / behavior |\n| -------------------------- | --------------------------------------------------------------- |\n| `PI_AI_GEMINI_CLI_VERSION` | Overrides Gemini CLI user-agent version tag (`0.35.3` if unset) |\n\n### OpenAI Codex responses (feature/debug controls)\n\n| Variable | Behavior |\n| ------------------------------------ | ---------------------------------------------------- |\n| `PI_CODEX_DEBUG` | `1`/`true` enables Codex provider debug logging |\n| `PI_CODEX_WEBSOCKET` | `1`/`true` enables websocket transport preference |\n| `PI_CODEX_WEBSOCKET_V2` | `1`/`true` enables websocket v2 path |\n| `PI_CODEX_WEBSOCKET_IDLE_TIMEOUT_MS` | Positive integer override (default 300000) |\n| `PI_CODEX_WEBSOCKET_RETRY_BUDGET` | Non-negative integer override (default 5) |\n| `PI_CODEX_WEBSOCKET_RETRY_DELAY_MS` | Positive integer base backoff override (default 500) |\n| `PI_OPENAI_STREAM_IDLE_TIMEOUT_MS` | Positive integer OpenAI stream idle timeout override |\n\n### Cursor provider debug\n\n| Variable | Behavior |\n| ------------------ | ------------------------------------------------------------------------ |\n| `DEBUG_CURSOR` | Enables provider debug logs; `2`/`verbose` for detailed payload snippets |\n| `DEBUG_CURSOR_LOG` | Optional file path for JSONL debug log output |\n\n### Prompt cache compatibility switch\n\n| Variable | Behavior |\n| -------------------- | ----------------------------------------------------------------------------------------------------------------- |\n| `PI_CACHE_RETENTION` | If `long`, enables long retention where supported (`anthropic`, `openai-responses`, Bedrock retention resolution) |\n\n---\n\n## 3) Web search subsystem\n\n### Search provider credentials\n\n| Variable | Used by |\n| --------------------------------------------------- | ------------------------------------------------------------- |\n| `EXA_API_KEY` | Exa search provider and Exa MCP tools |\n| `BRAVE_API_KEY` | Brave search provider |\n| `PERPLEXITY_API_KEY` | Perplexity search provider API-key mode |\n| `PERPLEXITY_COOKIES` | Perplexity cookie-auth search mode |\n| `TAVILY_API_KEY` | Tavily search provider |\n| `ZAI_API_KEY` | z.ai search provider (also checks stored OAuth in `agent.db`) |\n| `OPENAI_API_KEY` / Codex OAuth in DB | Codex search provider availability/auth |\n| `PI_CODEX_WEB_SEARCH_MODEL` | Codex search provider model override |\n| `MOONSHOT_SEARCH_API_KEY` / `KIMI_SEARCH_API_KEY` | Kimi/Moonshot search provider env auth |\n| `MOONSHOT_SEARCH_BASE_URL` / `KIMI_SEARCH_BASE_URL` | Kimi/Moonshot search endpoint override |\n| `KAGI_API_KEY` | Kagi search provider |\n| `JINA_API_KEY` | Jina search provider |\n| `PARALLEL_API_KEY` | Parallel search provider |\n| `SEARXNG_ENDPOINT`, `SEARXNG_TOKEN` | SearXNG endpoint and optional bearer token |\n| `SEARXNG_BASIC_USERNAME`, `SEARXNG_BASIC_PASSWORD` | SearXNG HTTP Basic Auth credentials |\n\nSearXNG also reads the equivalent `searxng.endpoint`, `searxng.token`, `searxng.basicUsername`, and `searxng.basicPassword` settings from `~/.omp/agent/config.yml`; environment variables are fallbacks.\n\n### Anthropic web search auth chain\n\nAnthropic web search uses `findAnthropicAuth()` from `packages/ai/src/utils/anthropic-auth.ts` in this order:\n\n1. `ANTHROPIC_SEARCH_API_KEY` (+ optional `ANTHROPIC_SEARCH_BASE_URL`)\n2. `ANTHROPIC_FOUNDRY_API_KEY` when `CLAUDE_CODE_USE_FOUNDRY` is enabled\n3. Anthropic OAuth credentials from `agent.db` (must not expire within 5-minute buffer)\n4. Anthropic API-key credentials from `agent.db`\n5. Generic Anthropic env fallback: provider key (`ANTHROPIC_FOUNDRY_API_KEY` in Foundry mode, otherwise `ANTHROPIC_OAUTH_TOKEN`/`ANTHROPIC_API_KEY`) + optional `ANTHROPIC_BASE_URL` (`FOUNDRY_BASE_URL` when Foundry mode is enabled)\n\nRelated vars:\n\n| Variable | Default / behavior |\n| --------------------------- | ---------------------------------------------------- |\n| `ANTHROPIC_SEARCH_API_KEY` | Highest-priority explicit search key |\n| `ANTHROPIC_SEARCH_BASE_URL` | Defaults to `https://api.anthropic.com` when omitted |\n| `ANTHROPIC_SEARCH_MODEL` | Defaults to `claude-haiku-4-5` |\n| `ANTHROPIC_BASE_URL` | Generic fallback base URL for tier-4 auth path |\n\n### Perplexity OAuth flow behavior flag\n\n| Variable | Behavior |\n| ------------------- | ------------------------------------------------------------------------------- |\n| `PI_AUTH_NO_BORROW` | If set, disables macOS native-app token borrowing path in Perplexity login flow |\n\n---\n\n## 4) Python tooling and kernel runtime\n\n| Variable | Default / behavior |\n| ------------------------- | ------------------------------------------------------------------------------------------------------------------- |\n| `PI_PY` | Eval backend override: `0`/`bash`=JavaScript only, `1`/`py`=Python only, `mix`/`both`=both; invalid values ignored |\n| `PI_PYTHON_SKIP_CHECK` | If `1`, skips Python interpreter availability checks (subprocess runner still starts on demand) |\n| `PI_PYTHON_INTEGRATION` | If `1`, opts gated integration tests in (e.g. `python-runner.integration.test.ts`) into running against real Python |\n| `PI_PYTHON_IPC_TRACE` | If `1`, logs NDJSON frames exchanged with the Python runner subprocess |\n| `VIRTUAL_ENV` | Highest-priority venv path for Python runtime resolution |\n\nExtra conditional behavior:\n\n- If `BUN_ENV=test` or `NODE_ENV=test`, Python availability checks are treated as OK and warming is skipped.\n- Python env filtering denies common API keys and allows safe base vars + `LC_`, `XDG_`, `PI_` prefixes.\n\n---\n\n## 5) Agent/runtime behavior toggles\n\n| Variable | Default / behavior |\n| ---------------------------- | -------------------------------------------------------------------------------------------------- |\n| `PI_SMOL_MODEL` | Ephemeral model-role override for `smol` (CLI `--smol` takes precedence) |\n| `PI_SLOW_MODEL` | Ephemeral model-role override for `slow` (CLI `--slow` takes precedence) |\n| `PI_PLAN_MODEL` | Ephemeral model-role override for `plan` (CLI `--plan` takes precedence) |\n| `PI_NO_TITLE` | If set (any non-empty value), disables auto session title generation on first user message |\n| `NULL_PROMPT` | If `true`, system prompt builder returns empty string |\n| `PI_BLOCKED_AGENT` | Blocks a specific subagent type in task tool |\n| `PI_SUBPROCESS_CMD` | Overrides subagent spawn command (`omp` / `omp.cmd` resolution bypass) |\n| `PI_TASK_MAX_OUTPUT_BYTES` | Max captured output bytes per subagent (default `500000`) |\n| `PI_TASK_MAX_OUTPUT_LINES` | Max captured output lines per subagent (default `5000`) |\n| `PI_TIMING` | If `1`, enables startup/tool timing instrumentation logs |\n| `PI_DEBUG_STARTUP` | Enables startup stage debug prints to stderr in multiple startup paths |\n| `PI_PACKAGE_DIR` | Overrides package asset base dir resolution (docs/examples/changelog path lookup) |\n| `PI_DISABLE_LSPMUX` | If `1`, disables lspmux detection/integration and forces direct LSP server spawning |\n| `PI_RPC_EMIT_TITLE` | Boolean-like flag enabling title events in RPC mode |\n| `SMITHERY_URL` | Smithery web URL override (default `https://smithery.ai`) |\n| `SMITHERY_API_URL` | Smithery API base URL override (default `https://api.smithery.ai`) |\n| `PUPPETEER_EXECUTABLE_PATH` | Browser tool Chromium executable override |\n| `LM_STUDIO_BASE_URL` | Default implicit LM Studio discovery base URL override (`http://127.0.0.1:1234/v1` if unset) |\n| `OLLAMA_BASE_URL` | Default implicit Ollama discovery base URL override (`http://127.0.0.1:11434` if unset) |\n| `LLAMA_CPP_BASE_URL` | Default implicit Llama.cpp discovery base URL override (`http://127.0.0.1:8080` if unset) |\n| `PI_EDIT_VARIANT` | Forces edit tool variant when valid (`patch`, `replace`, `hashline`, `atom`, `vim`, `apply_patch`) |\n| `PI_FORCE_IMAGE_PROTOCOL` | Forces supported image protocol (`kitty`, `iterm2`/`iterm`, `sixel`, `none`) where used |\n| `PI_ALLOW_SIXEL_PASSTHROUGH` | Allows SIXEL passthrough when `PI_FORCE_IMAGE_PROTOCOL=sixel` |\n| `PI_NO_PTY` | If `1`, disables interactive PTY path for bash tool |\n\n`PI_NO_PTY` is also set internally when CLI `--no-pty` is used.\n\n---\n\n## 6) Storage and config root paths\n\nThese are consumed via `@oh-my-pi/pi-utils/dirs` and affect where coding-agent stores data.\n\n| Variable | Default / behavior |\n| --------------------- | ----------------------------------------------------------------------------- |\n| `PI_CONFIG_DIR` | Config root dirname under home (default `.omp`) |\n| `PI_CODING_AGENT_DIR` | Full override for agent directory (default `~/<PI_CONFIG_DIR or .omp>/agent`) |\n| `PWD` | Used when matching canonical current working directory in path helpers |\n\n---\n\n## 7) Shell/tool execution environment\n\n(From `packages/utils/src/procmgr.ts` and coding-agent bash tool integration.)\n\n| Variable | Behavior |\n| -------------------------- | ------------------------------------------------------------------------------ |\n| `PI_BASH_NO_CI` | Suppresses automatic `CI=true` injection into spawned shell env |\n| `CLAUDE_BASH_NO_CI` | Legacy alias fallback for `PI_BASH_NO_CI` |\n| `PI_BASH_NO_LOGIN` | Disables login-shell mode; shell args become `['-c']` instead of `['-l','-c']` |\n| `CLAUDE_BASH_NO_LOGIN` | Legacy alias fallback for `PI_BASH_NO_LOGIN` |\n| `PI_SHELL_PREFIX` | Optional command prefix wrapper |\n| `CLAUDE_CODE_SHELL_PREFIX` | Legacy alias fallback for `PI_SHELL_PREFIX` |\n| `VISUAL` | Preferred external editor command |\n| `EDITOR` | Fallback external editor command |\n\nCurrent implementation: `PI_BASH_NO_LOGIN`/`CLAUDE_BASH_NO_LOGIN` are active; when either is set, `getShellArgs()` returns `['-c']`.\n\n---\n\n## 8) UI/theme/session detection (auto-detected env)\n\nThese are read as runtime signals; they are usually set by the terminal/OS rather than manually configured.\n\n| Variable | Used for |\n| ------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------- |\n| `COLORTERM`, `TERM`, `WT_SESSION` | Color capability detection (theme color mode) |\n| `COLORFGBG` | Terminal background light/dark auto-detection |\n| `TERM_PROGRAM`, `TERM_PROGRAM_VERSION`, `TERMINAL_EMULATOR` | Terminal identity in system prompt/context |\n| `KDE_FULL_SESSION`, `XDG_CURRENT_DESKTOP`, `DESKTOP_SESSION`, `XDG_SESSION_DESKTOP`, `GDMSESSION`, `WINDOWMANAGER` | Desktop/window-manager detection in system prompt/context |\n| `KITTY_WINDOW_ID`, `TMUX_PANE`, `TERM_SESSION_ID`, `WT_SESSION` | Stable per-terminal session breadcrumb IDs |\n| `SHELL`, `ComSpec`, `TERM_PROGRAM`, `TERM` | System info diagnostics |\n| `APPDATA`, `XDG_CONFIG_HOME` | lspmux config path resolution |\n| `HOME` | Path shortening in MCP command UI |\n\n---\n\n## 9) TUI runtime flags (shared package, affects coding-agent UX)\n\n| Variable | Behavior |\n| ------------------------- | ------------------------------------------------------------------------------------- |\n| `PI_NOTIFICATIONS` | `off` / `0` / `false` suppress desktop notifications |\n| `PI_TUI_WRITE_LOG` | If set, logs TUI writes to file |\n| `PI_HARDWARE_CURSOR` | If `1`, enables hardware cursor mode |\n| `PI_CLEAR_ON_SHRINK` | If `1`, clears empty rows when content shrinks |\n| `PI_DEBUG_REDRAW` | If `1`, enables redraw debug logging |\n| `PI_TUI_DEBUG` | If `1`, enables deep TUI debug dump path |\n| `PI_FORCE_IMAGE_PROTOCOL` | Forces terminal image protocol detection (`kitty`, `iterm2`/`iterm`, `sixel`, `none`) |\n\n---\n\n## 10) Commit generation controls\n\n| Variable | Behavior |\n| ------------------------- | ------------------------------------------------------------------- |\n| `PI_COMMIT_TEST_FALLBACK` | If `true` (case-insensitive), force commit fallback generation path |\n| `PI_COMMIT_NO_FALLBACK` | If `true`, disables fallback when agent returns no proposal |\n| `PI_COMMIT_MAP_REDUCE` | If `false`, disables map-reduce commit analysis path |\n| `DEBUG` | If set, commit agent error stack traces are printed |\n\n---\n\n## Security-sensitive variables\n\nTreat these as secrets; do not log or commit them:\n\n- Provider/API keys and OAuth/bearer credentials (all `*_API_KEY`, `*_TOKEN`, OAuth access/refresh tokens)\n- Cloud credentials (`AWS_*`, `GOOGLE_APPLICATION_CREDENTIALS` path may expose service-account material)\n- Search/provider auth vars (`EXA_API_KEY`, `BRAVE_API_KEY`, `PERPLEXITY_API_KEY`, Anthropic search keys)\n- Foundry mTLS material (`CLAUDE_CODE_CLIENT_CERT`, `CLAUDE_CODE_CLIENT_KEY`, `NODE_EXTRA_CA_CERTS` when it points to private CA bundles)\n\nPython runtime also explicitly strips many common key vars before spawning kernel subprocesses (`packages/coding-agent/src/eval/py/runtime.ts`).\n",
12
+ "environment-variables.md": "# Environment Variables (Current Runtime Reference)\n\nThis reference is derived from current code paths in:\n\n- `packages/coding-agent/src/**`\n- `packages/ai/src/**` (provider/auth resolution used by coding-agent)\n- `packages/utils/src/**` and `packages/tui/src/**` where those vars directly affect coding-agent runtime\n\nIt documents only active behavior.\n\n## Resolution model and precedence\n\nMost runtime lookups use `$env` from `@oh-my-pi/pi-utils` (`packages/utils/src/env.ts`).\n\n`$env` loading order:\n\n1. Existing process environment (`Bun.env`)\n2. Project `.env` (`$PWD/.env`) for keys not already set\n3. Agent `.env` (`~/.omp/agent/.env`, respecting `PI_CONFIG_DIR` / `PI_CODING_AGENT_DIR`) for keys not already set\n4. Config-root `.env` (`~/.omp/.env`, respecting `PI_CONFIG_DIR`) for keys not already set\n5. Home `.env` (`~/.env`) for keys not already set\n\nAdditional rule inside each `.env` file: `OMP_*` keys are mirrored to `PI_*` keys in that parsed file.\n\n---\n\n## 1) Model/provider authentication\n\nThese are consumed via `getEnvApiKey()` (`packages/ai/src/stream.ts`) unless noted otherwise.\n\n### Core provider credentials\n\n| Variable | Used for | Required when | Notes / precedence |\n| ------------------------------- | ------------------------------------------------ | -------------------------------------------------------------- | --------------------------------------------------------------------------------------------------- |\n| `ANTHROPIC_OAUTH_TOKEN` | Anthropic API auth | Using Anthropic with OAuth token auth | Takes precedence over `ANTHROPIC_API_KEY` for provider auth resolution |\n| `ANTHROPIC_API_KEY` | Anthropic API auth | Using Anthropic without OAuth token | Fallback after `ANTHROPIC_OAUTH_TOKEN` |\n| `ANTHROPIC_FOUNDRY_API_KEY` | Anthropic via Azure Foundry / enterprise gateway | `CLAUDE_CODE_USE_FOUNDRY` enabled | Takes precedence over `ANTHROPIC_OAUTH_TOKEN` and `ANTHROPIC_API_KEY` when Foundry mode is enabled |\n| `OPENAI_API_KEY` | OpenAI auth | Using OpenAI-family providers without explicit apiKey argument | Used by OpenAI Completions/Responses providers |\n| `GEMINI_API_KEY` | Google Gemini auth | Using `google` provider models | Primary key for Gemini provider mapping |\n| `GOOGLE_API_KEY` | Gemini image tool auth fallback | Using `gemini_image` tool without `GEMINI_API_KEY` | Used by coding-agent image tool fallback path |\n| `GROQ_API_KEY` | Groq auth | Using Groq models | |\n| `CEREBRAS_API_KEY` | Cerebras auth | Using Cerebras models | |\n| `FIREWORKS_API_KEY` | Fireworks auth | Using Fireworks models | |\n| `TOGETHER_API_KEY` | Together auth | Using `together` provider | |\n| `HUGGINGFACE_HUB_TOKEN` | Hugging Face auth | Using `huggingface` provider | Primary Hugging Face token env var |\n| `HF_TOKEN` | Hugging Face auth | Using `huggingface` provider | Fallback when `HUGGINGFACE_HUB_TOKEN` is unset |\n| `SYNTHETIC_API_KEY` | Synthetic auth | Using Synthetic models | |\n| `NVIDIA_API_KEY` | NVIDIA auth | Using `nvidia` provider | |\n| `NANO_GPT_API_KEY` | NanoGPT auth | Using `nanogpt` provider | |\n| `VENICE_API_KEY` | Venice auth | Using `venice` provider | |\n| `LITELLM_API_KEY` | LiteLLM auth | Using `litellm` provider | OpenAI-compatible LiteLLM proxy key |\n| `LM_STUDIO_API_KEY` | LM Studio auth (optional) | Using `lm-studio` provider with authenticated hosts | Local LM Studio usually runs without auth; any non-empty token works when a key is required |\n| `OLLAMA_API_KEY` | Ollama auth (optional) | Using `ollama` provider with authenticated hosts | Local Ollama usually runs without auth; any non-empty token works when a key is required |\n| `LLAMA_CPP_API_KEY` | llama.cpp auth (optional) | Using `llama.cpp` provider with authenticated hosts | Local llama.cpp usually runs without auth; any non-empty token works when a key is configured |\n| `XIAOMI_API_KEY` | Xiaomi MiMo auth | Using `xiaomi` provider | |\n| `MOONSHOT_API_KEY` | Moonshot auth | Using `moonshot` provider | |\n| `XAI_API_KEY` | xAI auth | Using xAI models | |\n| `OPENROUTER_API_KEY` | OpenRouter auth | Using OpenRouter models | Also used by image tool when preferred/auto provider is OpenRouter |\n| `MISTRAL_API_KEY` | Mistral auth | Using Mistral models | |\n| `ZAI_API_KEY` | z.ai auth | Using z.ai models | Also used by z.ai web search provider |\n| `MINIMAX_API_KEY` | MiniMax auth | Using `minimax` provider | |\n| `MINIMAX_CODE_API_KEY` | MiniMax Code auth | Using `minimax-code` provider | |\n| `MINIMAX_CODE_CN_API_KEY` | MiniMax Code CN auth | Using `minimax-code-cn` provider | |\n| `OPENCODE_API_KEY` | OpenCode auth | Using `opencode-go` / `opencode-zen` models | |\n| `QIANFAN_API_KEY` | Qianfan auth | Using `qianfan` provider | |\n| `QWEN_OAUTH_TOKEN` | Qwen Portal auth | Using `qwen-portal` with OAuth token | Takes precedence over `QWEN_PORTAL_API_KEY` |\n| `QWEN_PORTAL_API_KEY` | Qwen Portal auth | Using `qwen-portal` with API key | Fallback after `QWEN_OAUTH_TOKEN` |\n| `ZENMUX_API_KEY` | ZenMux auth | Using `zenmux` provider | Used for ZenMux OpenAI and Anthropic-compatible routes |\n| `VLLM_API_KEY` | vLLM auth/discovery opt-in | Using `vllm` provider (local OpenAI-compatible servers) | Any non-empty value works for no-auth local servers |\n| `CURSOR_ACCESS_TOKEN` | Cursor provider auth | Using Cursor provider | |\n| `AI_GATEWAY_API_KEY` | Vercel AI Gateway auth | Using `vercel-ai-gateway` provider | |\n| `CLOUDFLARE_AI_GATEWAY_API_KEY` | Cloudflare AI Gateway auth | Using `cloudflare-ai-gateway` provider | Base URL must be configured as `https://gateway.ai.cloudflare.com/v1/<account>/<gateway>/anthropic` |\n| `ALIBABA_CODING_PLAN_API_KEY` | Alibaba Coding Plan auth | Using `alibaba-coding-plan` provider | |\n| `DEEPSEEK_API_KEY` | DeepSeek auth | Using DeepSeek models | |\n| `KILO_API_KEY` | Kilo auth | Using Kilo models | |\n| `OLLAMA_CLOUD_API_KEY` | Ollama Cloud auth | Using `ollama-cloud` provider | |\n| `GITLAB_TOKEN` | GitLab Duo auth | Using `gitlab-duo` provider | |\n\n### GitHub/Copilot token chains\n\n| Variable | Used for | Chain |\n| ---------------------- | ------------------------------------------------ | ---------------------------------------------------- |\n| `COPILOT_GITHUB_TOKEN` | GitHub Copilot provider auth | `COPILOT_GITHUB_TOKEN` → `GH_TOKEN` → `GITHUB_TOKEN` |\n| `GH_TOKEN` | Copilot fallback; GitHub API auth in web scraper | In web scraper: `GITHUB_TOKEN` → `GH_TOKEN` |\n| `GITHUB_TOKEN` | Copilot fallback; GitHub API auth in web scraper | In web scraper: checked before `GH_TOKEN` |\n\n---\n\n## 2) Provider-specific runtime configuration\n\n### Anthropic Foundry Gateway (Azure / enterprise proxy)\n\nWhen `CLAUDE_CODE_USE_FOUNDRY` is enabled, Anthropic requests switch to Foundry mode:\n\n- Base URL resolves from `FOUNDRY_BASE_URL` (fallback remains model/default base URL if unset).\n- API key resolution for provider `anthropic` becomes:\n `ANTHROPIC_FOUNDRY_API_KEY` → `ANTHROPIC_OAUTH_TOKEN` → `ANTHROPIC_API_KEY`.\n- `ANTHROPIC_CUSTOM_HEADERS` is parsed as comma/newline-separated `key: value` pairs and merged into request headers.\n- TLS client/server material can be injected from env values:\n `NODE_EXTRA_CA_CERTS`, `CLAUDE_CODE_CLIENT_CERT`, `CLAUDE_CODE_CLIENT_KEY`.\n Each accepts either:\n - a filesystem path to PEM content, or\n - inline PEM (including escaped `\\n` sequences).\n\n| Variable | Value type | Behavior |\n| --------------------------- | ---------------------------------------------- | ----------------------------------------------------------------------------- |\n| `CLAUDE_CODE_USE_FOUNDRY` | Boolean-like string (`1`, `true`, `yes`, `on`) | Enables Foundry mode for Anthropic provider |\n| `FOUNDRY_BASE_URL` | URL string | Anthropic endpoint base URL in Foundry mode |\n| `ANTHROPIC_FOUNDRY_API_KEY` | Token string | Used for `Authorization: Bearer <token>` |\n| `ANTHROPIC_CUSTOM_HEADERS` | Header list string | Extra headers; format `header-a: value, header-b: value` or newline-separated |\n| `NODE_EXTRA_CA_CERTS` | PEM path or inline PEM | Extra CA chain for server certificate validation |\n| `CLAUDE_CODE_CLIENT_CERT` | PEM path or inline PEM | mTLS client certificate |\n| `CLAUDE_CODE_CLIENT_KEY` | PEM path or inline PEM | mTLS client private key (must be paired with cert) |\n\n### Amazon Bedrock\n\n| Variable | Default / behavior |\n| ------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------- |\n| `AWS_REGION` | Primary region source |\n| `AWS_DEFAULT_REGION` | Fallback if `AWS_REGION` unset |\n| `AWS_PROFILE` | Enables named profile auth path |\n| `AWS_ACCESS_KEY_ID` + `AWS_SECRET_ACCESS_KEY` | Enables IAM key auth path |\n| `AWS_BEARER_TOKEN_BEDROCK` | Enables bearer token auth path |\n| `AWS_CONTAINER_CREDENTIALS_RELATIVE_URI` / `AWS_CONTAINER_CREDENTIALS_FULL_URI` | Enables ECS task credential path |\n| `AWS_WEB_IDENTITY_TOKEN_FILE` + `AWS_ROLE_ARN` | Enables web identity auth path |\n| `AWS_BEDROCK_SKIP_AUTH` | If `1`, injects dummy credentials (proxy/non-auth scenarios) |\n| `AWS_BEDROCK_FORCE_HTTP1` | If `1`, forces Node HTTP/1 request handler |\n| `HTTPS_PROXY` / `HTTP_PROXY` / `ALL_PROXY` | Routes Bedrock runtime and AWS SSO credential calls through the configured proxy using HTTP/1 |\n| `NO_PROXY` | Excludes matching hosts from proxy routing when a proxy variable is configured |\n\nRegion fallback in provider code: `options.region` → `AWS_REGION` → `AWS_DEFAULT_REGION` → `us-east-1`.\n\n### Azure OpenAI Responses\n\n| Variable | Default / behavior |\n| ---------------------------------- | --------------------------------------------------------------------------- |\n| `AZURE_OPENAI_API_KEY` | Required unless API key passed as option |\n| `AZURE_OPENAI_API_VERSION` | Default `v1` |\n| `AZURE_OPENAI_BASE_URL` | Direct base URL override |\n| `AZURE_OPENAI_RESOURCE_NAME` | Used to construct base URL: `https://<resource>.openai.azure.com/openai/v1` |\n| `AZURE_OPENAI_DEPLOYMENT_NAME_MAP` | Optional mapping string: `modelId=deploymentName,model2=deployment2` |\n\nBase URL resolution: option `azureBaseUrl` → env `AZURE_OPENAI_BASE_URL` → option/env resource name → `model.baseUrl`.\n\n### Google Vertex AI\n\n| Variable | Required? | Notes |\n| -------------------------------- | ------------------------------ | ------------------------------------------------------------------------------------------------------------------------- |\n| `GOOGLE_CLOUD_PROJECT` | Yes (unless passed in options) | Fallback: `GCLOUD_PROJECT` |\n| `GCLOUD_PROJECT` | Fallback | Used as alternate project ID source |\n| `GOOGLE_CLOUD_PROJECT_ID` | OAuth login helper only | Used by Gemini CLI OAuth project discovery |\n| `GOOGLE_CLOUD_LOCATION` | Yes (unless passed in options) | No default in provider |\n| `GOOGLE_CLOUD_API_KEY` | Conditional | Direct Vertex API-key auth; otherwise ADC fallback can authenticate when project and location are set |\n| `GOOGLE_APPLICATION_CREDENTIALS` | Conditional | If set, file must exist; otherwise ADC fallback path is checked (`~/.config/gcloud/application_default_credentials.json`) |\n\n### Kimi\n\n| Variable | Default / behavior |\n| ---------------------- | -------------------------------------------------------- |\n| `KIMI_CODE_OAUTH_HOST` | Primary OAuth host override |\n| `KIMI_OAUTH_HOST` | Fallback OAuth host override |\n| `KIMI_CODE_BASE_URL` | Overrides Kimi usage endpoint base URL (`usage/kimi.ts`) |\n\nOAuth host chain: `KIMI_CODE_OAUTH_HOST` → `KIMI_OAUTH_HOST` → `https://auth.kimi.com`.\n\n### Gemini CLI compatibility\n\n| Variable | Default / behavior |\n| -------------------------- | --------------------------------------------------------------- |\n| `PI_AI_GEMINI_CLI_VERSION` | Overrides Gemini CLI user-agent version tag (`0.35.3` if unset) |\n\n### OpenAI Codex responses (feature/debug controls)\n\n| Variable | Behavior |\n| ------------------------------------ | ---------------------------------------------------- |\n| `PI_CODEX_DEBUG` | `1`/`true` enables Codex provider debug logging |\n| `PI_CODEX_WEBSOCKET` | `1`/`true` enables websocket transport preference |\n| `PI_CODEX_WEBSOCKET_V2` | `1`/`true` enables websocket v2 path |\n| `PI_CODEX_WEBSOCKET_IDLE_TIMEOUT_MS` | Positive integer override (default 300000) |\n| `PI_CODEX_WEBSOCKET_RETRY_BUDGET` | Non-negative integer override (default 5) |\n| `PI_CODEX_WEBSOCKET_RETRY_DELAY_MS` | Positive integer base backoff override (default 500) |\n| `PI_OPENAI_STREAM_IDLE_TIMEOUT_MS` | Positive integer OpenAI stream idle timeout override |\n\n### Cursor provider debug\n\n| Variable | Behavior |\n| ------------------ | ------------------------------------------------------------------------ |\n| `DEBUG_CURSOR` | Enables provider debug logs; `2`/`verbose` for detailed payload snippets |\n| `DEBUG_CURSOR_LOG` | Optional file path for JSONL debug log output |\n\n### Prompt cache compatibility switch\n\n| Variable | Behavior |\n| -------------------- | ----------------------------------------------------------------------------------------------------------------- |\n| `PI_CACHE_RETENTION` | If `long`, enables long retention where supported (`anthropic`, `openai-responses`, Bedrock retention resolution) |\n\n---\n\n## 3) Web search subsystem\n\n### Search provider credentials\n\n| Variable | Used by |\n| --------------------------------------------------- | ------------------------------------------------------------- |\n| `EXA_API_KEY` | Exa search provider and Exa MCP tools |\n| `BRAVE_API_KEY` | Brave search provider |\n| `PERPLEXITY_API_KEY` | Perplexity search provider API-key mode |\n| `PERPLEXITY_COOKIES` | Perplexity cookie-auth search mode |\n| `TAVILY_API_KEY` | Tavily search provider |\n| `ZAI_API_KEY` | z.ai search provider (also checks stored OAuth in `agent.db`) |\n| `OPENAI_API_KEY` / Codex OAuth in DB | Codex search provider availability/auth |\n| `PI_CODEX_WEB_SEARCH_MODEL` | Codex search provider model override |\n| `MOONSHOT_SEARCH_API_KEY` / `KIMI_SEARCH_API_KEY` | Kimi/Moonshot search provider env auth |\n| `MOONSHOT_SEARCH_BASE_URL` / `KIMI_SEARCH_BASE_URL` | Kimi/Moonshot search endpoint override |\n| `KAGI_API_KEY` | Kagi search provider |\n| `JINA_API_KEY` | Jina search provider |\n| `PARALLEL_API_KEY` | Parallel search provider |\n| `SEARXNG_ENDPOINT`, `SEARXNG_TOKEN` | SearXNG endpoint and optional bearer token |\n| `SEARXNG_BASIC_USERNAME`, `SEARXNG_BASIC_PASSWORD` | SearXNG HTTP Basic Auth credentials |\n\nSearXNG also reads the equivalent `searxng.endpoint`, `searxng.token`, `searxng.basicUsername`, and `searxng.basicPassword` settings from `~/.omp/agent/config.yml`; environment variables are fallbacks.\n\n### Anthropic web search auth chain\n\nAnthropic web search uses `findAnthropicAuth()` from `packages/ai/src/utils/anthropic-auth.ts` in this order:\n\n1. `ANTHROPIC_SEARCH_API_KEY` (+ optional `ANTHROPIC_SEARCH_BASE_URL`)\n2. `ANTHROPIC_FOUNDRY_API_KEY` when `CLAUDE_CODE_USE_FOUNDRY` is enabled\n3. Anthropic OAuth credentials from `agent.db` (must not expire within 5-minute buffer)\n4. Anthropic API-key credentials from `agent.db`\n5. Generic Anthropic env fallback: provider key (`ANTHROPIC_FOUNDRY_API_KEY` in Foundry mode, otherwise `ANTHROPIC_OAUTH_TOKEN`/`ANTHROPIC_API_KEY`) + optional `ANTHROPIC_BASE_URL` (`FOUNDRY_BASE_URL` when Foundry mode is enabled)\n\nRelated vars:\n\n| Variable | Default / behavior |\n| --------------------------- | ---------------------------------------------------- |\n| `ANTHROPIC_SEARCH_API_KEY` | Highest-priority explicit search key |\n| `ANTHROPIC_SEARCH_BASE_URL` | Defaults to `https://api.anthropic.com` when omitted |\n| `ANTHROPIC_SEARCH_MODEL` | Defaults to `claude-haiku-4-5` |\n| `ANTHROPIC_BASE_URL` | Generic fallback base URL for tier-4 auth path |\n\n### Perplexity OAuth flow behavior flag\n\n| Variable | Behavior |\n| ------------------- | ------------------------------------------------------------------------------- |\n| `PI_AUTH_NO_BORROW` | If set, disables macOS native-app token borrowing path in Perplexity login flow |\n\n---\n\n## 4) Python tooling and kernel runtime\n\n| Variable | Default / behavior |\n| ------------------------- | ------------------------------------------------------------------------------------------------------------------- |\n| `PI_PY` | Eval backend override: `0`/`bash`=JavaScript only, `1`/`py`=Python only, `mix`/`both`=both; invalid values ignored |\n| `PI_PYTHON_SKIP_CHECK` | If `1`, skips Python interpreter availability checks (subprocess runner still starts on demand) |\n| `PI_PYTHON_INTEGRATION` | If `1`, opts gated integration tests in (e.g. `python-runner.integration.test.ts`) into running against real Python |\n| `PI_PYTHON_IPC_TRACE` | If `1`, logs NDJSON frames exchanged with the Python runner subprocess |\n| `VIRTUAL_ENV` | Highest-priority venv path for Python runtime resolution |\n\nExtra conditional behavior:\n\n- If `BUN_ENV=test` or `NODE_ENV=test`, Python availability checks are treated as OK and warming is skipped.\n- Python env filtering denies common API keys and allows safe base vars + `LC_`, `XDG_`, `PI_` prefixes.\n\n---\n\n## 5) Agent/runtime behavior toggles\n\n| Variable | Default / behavior |\n| ---------------------------- | -------------------------------------------------------------------------------------------------- |\n| `PI_SMOL_MODEL` | Ephemeral model-role override for `smol` (CLI `--smol` takes precedence) |\n| `PI_SLOW_MODEL` | Ephemeral model-role override for `slow` (CLI `--slow` takes precedence) |\n| `PI_PLAN_MODEL` | Ephemeral model-role override for `plan` (CLI `--plan` takes precedence) |\n| `PI_NO_TITLE` | If set (any non-empty value), disables auto session title generation on first user message |\n| `NULL_PROMPT` | If `true`, system prompt builder returns empty string |\n| `PI_BLOCKED_AGENT` | Blocks a specific subagent type in task tool |\n| `PI_SUBPROCESS_CMD` | Overrides subagent spawn command (`omp` / `omp.cmd` resolution bypass) |\n| `PI_TASK_MAX_OUTPUT_BYTES` | Max captured output bytes per subagent (default `500000`) |\n| `PI_TASK_MAX_OUTPUT_LINES` | Max captured output lines per subagent (default `5000`) |\n| `PI_TIMING` | If `1`, enables startup/tool timing instrumentation logs |\n| `PI_PACKAGE_DIR` | Overrides package asset base dir resolution (docs/examples/changelog path lookup) |\n| `PI_DISABLE_LSPMUX` | If `1`, disables lspmux detection/integration and forces direct LSP server spawning |\n| `PI_RPC_EMIT_TITLE` | Boolean-like flag enabling title events in RPC mode |\n| `SMITHERY_URL` | Smithery web URL override (default `https://smithery.ai`) |\n| `SMITHERY_API_URL` | Smithery API base URL override (default `https://api.smithery.ai`) |\n| `PUPPETEER_EXECUTABLE_PATH` | Browser tool Chromium executable override |\n| `LM_STUDIO_BASE_URL` | Default implicit LM Studio discovery base URL override (`http://127.0.0.1:1234/v1` if unset) |\n| `OLLAMA_BASE_URL` | Default implicit Ollama discovery base URL override (`http://127.0.0.1:11434` if unset) |\n| `LLAMA_CPP_BASE_URL` | Default implicit Llama.cpp discovery base URL override (`http://127.0.0.1:8080` if unset) |\n| `PI_EDIT_VARIANT` | Forces edit tool variant when valid (`patch`, `replace`, `hashline`, `atom`, `vim`, `apply_patch`) |\n| `PI_FORCE_IMAGE_PROTOCOL` | Forces supported image protocol (`kitty`, `iterm2`/`iterm`, `sixel`, `none`) where used |\n| `PI_ALLOW_SIXEL_PASSTHROUGH` | Allows SIXEL passthrough when `PI_FORCE_IMAGE_PROTOCOL=sixel` |\n| `PI_NO_PTY` | If `1`, disables interactive PTY path for bash tool |\n\n`PI_NO_PTY` is also set internally when CLI `--no-pty` is used.\n\n---\n\n## 6) Storage and config root paths\n\nThese are consumed via `@oh-my-pi/pi-utils/dirs` and affect where coding-agent stores data.\n\n| Variable | Default / behavior |\n| --------------------- | ----------------------------------------------------------------------------- |\n| `PI_CONFIG_DIR` | Config root dirname under home (default `.omp`) |\n| `PI_CODING_AGENT_DIR` | Full override for agent directory (default `~/<PI_CONFIG_DIR or .omp>/agent`) |\n| `PWD` | Used when matching canonical current working directory in path helpers |\n\n---\n\n## 7) Shell/tool execution environment\n\n(From `packages/utils/src/procmgr.ts` and coding-agent bash tool integration.)\n\n| Variable | Behavior |\n| -------------------------- | ------------------------------------------------------------------------------ |\n| `PI_BASH_NO_CI` | Suppresses automatic `CI=true` injection into spawned shell env |\n| `CLAUDE_BASH_NO_CI` | Legacy alias fallback for `PI_BASH_NO_CI` |\n| `PI_BASH_NO_LOGIN` | Disables login-shell mode; shell args become `['-c']` instead of `['-l','-c']` |\n| `CLAUDE_BASH_NO_LOGIN` | Legacy alias fallback for `PI_BASH_NO_LOGIN` |\n| `PI_SHELL_PREFIX` | Optional command prefix wrapper |\n| `CLAUDE_CODE_SHELL_PREFIX` | Legacy alias fallback for `PI_SHELL_PREFIX` |\n| `VISUAL` | Preferred external editor command |\n| `EDITOR` | Fallback external editor command |\n\nCurrent implementation: `PI_BASH_NO_LOGIN`/`CLAUDE_BASH_NO_LOGIN` are active; when either is set, `getShellArgs()` returns `['-c']`.\n\n---\n\n## 8) UI/theme/session detection (auto-detected env)\n\nThese are read as runtime signals; they are usually set by the terminal/OS rather than manually configured.\n\n| Variable | Used for |\n| ------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------- |\n| `COLORTERM`, `TERM`, `WT_SESSION` | Color capability detection (theme color mode) |\n| `COLORFGBG` | Terminal background light/dark auto-detection |\n| `TERM_PROGRAM`, `TERM_PROGRAM_VERSION`, `TERMINAL_EMULATOR` | Terminal identity in system prompt/context |\n| `KDE_FULL_SESSION`, `XDG_CURRENT_DESKTOP`, `DESKTOP_SESSION`, `XDG_SESSION_DESKTOP`, `GDMSESSION`, `WINDOWMANAGER` | Desktop/window-manager detection in system prompt/context |\n| `KITTY_WINDOW_ID`, `TMUX_PANE`, `TERM_SESSION_ID`, `WT_SESSION` | Stable per-terminal session breadcrumb IDs |\n| `SHELL`, `ComSpec`, `TERM_PROGRAM`, `TERM` | System info diagnostics |\n| `APPDATA`, `XDG_CONFIG_HOME` | lspmux config path resolution |\n| `HOME` | Path shortening in MCP command UI |\n\n---\n\n## 9) TUI runtime flags (shared package, affects coding-agent UX)\n\n| Variable | Behavior |\n| ------------------------- | ------------------------------------------------------------------------------------- |\n| `PI_NOTIFICATIONS` | `off` / `0` / `false` suppress desktop notifications |\n| `PI_TUI_WRITE_LOG` | If set, logs TUI writes to file |\n| `PI_HARDWARE_CURSOR` | If `1`, enables hardware cursor mode |\n| `PI_CLEAR_ON_SHRINK` | If `1`, clears empty rows when content shrinks |\n| `PI_DEBUG_REDRAW` | If `1`, enables redraw debug logging |\n| `PI_TUI_DEBUG` | If `1`, enables deep TUI debug dump path |\n| `PI_FORCE_IMAGE_PROTOCOL` | Forces terminal image protocol detection (`kitty`, `iterm2`/`iterm`, `sixel`, `none`) |\n\n---\n\n## 10) Commit generation controls\n\n| Variable | Behavior |\n| ------------------------- | ------------------------------------------------------------------- |\n| `PI_COMMIT_TEST_FALLBACK` | If `true` (case-insensitive), force commit fallback generation path |\n| `PI_COMMIT_NO_FALLBACK` | If `true`, disables fallback when agent returns no proposal |\n| `PI_COMMIT_MAP_REDUCE` | If `false`, disables map-reduce commit analysis path |\n| `DEBUG` | If set, commit agent error stack traces are printed |\n\n---\n\n## Security-sensitive variables\n\nTreat these as secrets; do not log or commit them:\n\n- Provider/API keys and OAuth/bearer credentials (all `*_API_KEY`, `*_TOKEN`, OAuth access/refresh tokens)\n- Cloud credentials (`AWS_*`, `GOOGLE_APPLICATION_CREDENTIALS` path may expose service-account material)\n- Search/provider auth vars (`EXA_API_KEY`, `BRAVE_API_KEY`, `PERPLEXITY_API_KEY`, Anthropic search keys)\n- Foundry mTLS material (`CLAUDE_CODE_CLIENT_CERT`, `CLAUDE_CODE_CLIENT_KEY`, `NODE_EXTRA_CA_CERTS` when it points to private CA bundles)\n\nPython runtime also explicitly strips many common key vars before spawning kernel subprocesses (`packages/coding-agent/src/eval/py/runtime.ts`).\n",
13
13
  "extension-loading.md": "# Extension Loading (TypeScript/JavaScript Modules)\n\nThis document covers how the coding agent discovers and loads **extension modules** (`.ts`/`.js`) at startup.\n\nIt does **not** cover `gemini-extension.json` manifest extensions (documented separately).\n\n## What this subsystem does\n\nExtension loading builds a list of module entry files, imports each module with Bun, executes its factory, and returns:\n\n- loaded extension definitions\n- per-path load errors (without aborting the whole load)\n- a shared extension runtime object used later by `ExtensionRunner`\n\n## Primary implementation files\n\n- `src/extensibility/extensions/loader.ts` — path discovery + import/execution\n- `src/extensibility/extensions/index.ts` — public exports\n- `src/extensibility/extensions/runner.ts` — runtime/event execution after load\n- `src/discovery/builtin.ts` — native auto-discovery provider for extension modules\n- `src/config/settings.ts` — loads merged `extensions` / `disabledExtensions` settings\n\n---\n\n## Inputs to extension loading\n\n### 1) Auto-discovered native extension modules\n\n`discoverAndLoadExtensions()` first asks discovery providers for `extension-module` capability items, then keeps only provider `native` items.\n\nEffective native locations:\n\n- Project: `<cwd>/.omp/extensions`\n- User: `~/.omp/agent/extensions`\n\nPath roots come from the native provider (`SOURCE_PATHS.native`).\n\nNotes:\n\n- Native auto-discovery is currently `.omp` based.\n- Legacy `.pi` is still accepted in `package.json` manifest keys (`pi.extensions`), but not as a native root here.\n\n### 2) Installed plugin extension entries\n\nAfter native auto-discovery, `discoverAndLoadExtensions()` appends extension entry points from enabled installed plugins via `getAllPluginExtensionPaths(cwd)`.\n\nPlugin extension entries come from package `omp.extensions` / `pi.extensions` manifests, including enabled feature entries.\n\n### 3) Explicitly configured paths\n\nAfter plugin extension entries, configured paths are appended and resolved.\n\nConfigured path sources in the main session startup path (`sdk.ts`):\n\n1. CLI-provided paths (`--extension/-e`, and `--hook` is also treated as an extension path)\n2. Settings `extensions` array (merged global + project settings)\n\nGlobal settings file:\n\n- `~/.omp/agent/config.yml` (or custom agent dir via `PI_CODING_AGENT_DIR`)\n\nProject settings file:\n\n- `<cwd>/.omp/settings.json`\n\nExamples:\n\n```yaml\n# ~/.omp/agent/config.yml\nextensions:\n - ~/my-exts/safety.ts\n - ./local/ext-pack\n```\n\n```json\n{\n \"extensions\": [\"./.omp/extensions/my-extra\"]\n}\n```\n\n---\n\n## Enable/disable controls\n\n### Disable discovery\n\n- CLI: `--no-extensions`\n- SDK option: `disableExtensionDiscovery`\n\nBehavior split:\n\n- SDK: when `disableExtensionDiscovery=true`, it still loads `additionalExtensionPaths` via `loadExtensions()`.\n- CLI path building (`main.ts`) currently clears CLI extension paths when `--no-extensions` is set, so explicit `-e/--hook` are not forwarded in that mode.\n\n### Disable specific extension modules\n\n`disabledExtensions` setting filters by extension id format:\n\n- `extension-module:<derivedName>`\n\n`derivedName` is based on entry path (`getExtensionNameFromPath`), for example:\n\n- `/x/foo.ts` -> `foo`\n- `/x/bar/index.ts` -> `bar`\n\nExample:\n\n```yaml\ndisabledExtensions:\n - extension-module:foo\n```\n\n---\n\n## Path and entry resolution\n\n### Path normalization\n\nFor configured paths:\n\n1. Normalize unicode spaces\n2. Expand `~`\n3. If relative, resolve against current `cwd`\n\n### If configured path is a file\n\nIt is used directly as a module entry candidate.\n\n### If configured path is a directory\n\nResolution order:\n\n1. `package.json` in that directory with `omp.extensions` (or legacy `pi.extensions`) -> use declared entries\n2. `index.ts`\n3. `index.js`\n4. Otherwise scan one level for extension entries:\n - direct `*.ts` / `*.js`\n - subdir `index.ts` / `index.js`\n - subdir `package.json` with `omp.extensions` / `pi.extensions`\n\nRules and constraints:\n\n- no recursive discovery beyond one subdirectory level\n- declared `extensions` manifest entries are resolved relative to that package directory\n- declared entries are included only if file exists/access is allowed\n- in `*/index.{ts,js}` pairs, TypeScript is preferred over JavaScript\n- symlinks are treated as eligible files/directories\n\n### Ignore behavior differs by source\n\n- Native auto-discovery (`discoverExtensionModulePaths` in discovery helpers) uses native glob with `gitignore: true` and `hidden: false`.\n- Explicit configured directory scanning in `loader.ts` uses `readdir` rules and does **not** apply gitignore filtering.\n\n---\n\n## Load order and precedence\n\n`discoverAndLoadExtensions()` builds one ordered list and then calls `loadExtensions()`.\n\nOrder:\n\n1. Native auto-discovered modules\n2. Installed plugin extension entries\n3. Explicit configured paths (in provided order)\n\nIn `sdk.ts`, configured order is:\n\n1. CLI additional paths\n2. Settings `extensions`\n\nDe-duplication:\n\n- absolute path based\n- first seen path wins\n- later duplicates are ignored\n\nImplication: if the same module path is both auto-discovered and explicitly configured, it is loaded once at the first position (auto-discovered stage).\n\n---\n\n## Module import and factory contract\n\nEach candidate path is loaded with dynamic import:\n\n- `await import(resolvedPath)`\n- factory is `module.default ?? module`\n- factory must be a function (`ExtensionFactory`)\n\nIf export is not a function, that path fails with a structured error and loading continues.\n\n---\n\n## Failure handling and isolation\n\n### During loading\n\nPer extension path, failures are captured as `{ path, error }` and do not stop other paths from loading.\n\nCommon cases:\n\n- import failure / missing file\n- invalid factory export (non-function)\n- exception thrown while executing factory\n\n### Runtime isolation model\n\n- Extensions are **not sandboxed** (same process/runtime).\n- They share one `EventBus` and one `ExtensionRuntime` instance.\n- During load, runtime action methods intentionally throw `ExtensionRuntimeNotInitializedError`; action wiring happens later in `ExtensionRunner.initialize()`.\n\n### After loading\n\nWhen events run through `ExtensionRunner`, handler exceptions are caught and emitted as extension errors instead of crashing the runner loop.\n\n---\n\n## Minimal user/project layout examples\n\n### User-level\n\n```text\n~/.omp/agent/\n config.yml\n extensions/\n guardrails.ts\n audit/\n index.ts\n```\n\n### Project-level\n\n```text\n<repo>/\n .omp/\n settings.json\n extensions/\n checks/\n package.json\n lint-gates.ts\n```\n\n`checks/package.json`:\n\n```json\n{\n \"omp\": {\n \"extensions\": [\"./src/check-a.ts\", \"./src/check-b.js\"]\n }\n}\n```\n\nLegacy manifest key still accepted:\n\n```json\n{\n \"pi\": {\n \"extensions\": [\"./index.ts\"]\n }\n}\n```\n",
14
14
  "extensions.md": "# Extensions\n\nPrimary guide for authoring runtime extensions in `packages/coding-agent`.\n\nThis document covers the current extension runtime in:\n\n- `src/extensibility/extensions/types.ts`\n- `src/extensibility/extensions/runner.ts`\n- `src/extensibility/extensions/wrapper.ts`\n- `src/extensibility/extensions/index.ts`\n- `src/modes/controllers/extension-ui-controller.ts`\n\nFor discovery paths and filesystem loading rules, see `docs/extension-loading.md`.\n\n## What an extension is\n\nAn extension is a TS/JS module exporting a default factory:\n\n```ts\nimport type { ExtensionAPI } from \"@oh-my-pi/pi-coding-agent\";\n\nexport default function myExtension(pi: ExtensionAPI) {\n // register handlers/tools/commands/renderers\n}\n```\n\nExtensions can combine all of the following in one module:\n\n- event handlers (`pi.on(...)`)\n- LLM-callable tools (`pi.registerTool(...)`)\n- slash commands (`pi.registerCommand(...)`)\n- keyboard shortcuts and flags\n- custom message rendering\n- session/message injection APIs (`sendMessage`, `sendUserMessage`, `appendEntry`)\n\n## Runtime model\n\n1. Extensions are imported and their factory functions run.\n2. During that load phase, registration methods are valid; runtime action methods are not yet initialized.\n3. `ExtensionRunner.initialize(...)` wires live actions/contexts for the active mode.\n4. Session/agent/tool lifecycle events are emitted to handlers.\n5. Every tool execution is wrapped with extension interception (`tool_call` / `tool_result`).\n\n```text\nExtension lifecycle (simplified)\n\nload paths\n │\n ▼\nimport module + run factory (registration only)\n │\n ▼\nExtensionRunner.initialize(mode/session/tool registry)\n │\n ├─ emit session/agent events to handlers\n ├─ wrap tool execution (tool_call/tool_result)\n └─ expose runtime actions (sendMessage, setActiveTools, ...)\n```\n\nImportant constraint from `loader.ts`:\n\n- calling action methods like `pi.sendMessage()` during extension load throws `ExtensionRuntimeNotInitializedError`\n- register first; perform runtime behavior from events/commands/tools\n\n## Quick start\n\n```ts\nimport type { ExtensionAPI } from \"@oh-my-pi/pi-coding-agent\";\nimport { Type } from \"@sinclair/typebox\";\n\nexport default function (pi: ExtensionAPI) {\n pi.setLabel(\"Safety + Utilities\");\n\n pi.on(\"session_start\", async (_event, ctx) => {\n ctx.ui.notify(`Extension loaded in ${ctx.cwd}`, \"info\");\n });\n\n pi.on(\"tool_call\", async (event) => {\n if (event.toolName === \"bash\" && event.input.command?.includes(\"rm -rf\")) {\n return { block: true, reason: \"Blocked by extension policy\" };\n }\n });\n\n pi.registerTool({\n name: \"hello_extension\",\n label: \"Hello Extension\",\n description: \"Return a greeting\",\n parameters: Type.Object({ name: Type.String() }),\n async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {\n return {\n content: [{ type: \"text\", text: `Hello, ${params.name}` }],\n details: { greeted: params.name },\n };\n },\n });\n\n pi.registerCommand(\"hello-ext\", {\n description: \"Show queue state\",\n handler: async (_args, ctx) => {\n ctx.ui.notify(`pending=${ctx.hasPendingMessages()}`, \"info\");\n },\n });\n}\n```\n\n## Extension API surfaces\n\n## 1) Registration and actions (`ExtensionAPI`)\n\nCore methods:\n\n- `on(event, handler)`\n- `registerTool`, `registerCommand`, `registerShortcut`, `registerFlag`\n- `registerMessageRenderer`\n- `sendMessage`, `sendUserMessage`, `appendEntry`\n- `getActiveTools`, `getAllTools`, `setActiveTools`\n- `getSessionName`, `setSessionName`\n- `setModel`, `getThinkingLevel`, `setThinkingLevel`\n- `registerProvider`\n- `events` (shared event bus)\n\nIn interactive mode, `input` handlers run before the built-in first-message auto-title check. Extensions that call `await pi.setSessionName(...)` from `input` can set the persisted session name and prevent the default auto-generated title from running for that session.\n\nAlso exposed:\n\n- `pi.logger`\n- `pi.typebox`\n- `pi.pi` (package exports)\n\n### Message delivery semantics\n\n`pi.sendMessage(message, options)` supports:\n\n- `deliverAs: \"steer\"` (default) — interrupts current run\n- `deliverAs: \"followUp\"` — queued to run after current run\n- `deliverAs: \"nextTurn\"` — stored and injected on the next user prompt\n- `triggerTurn: true` — starts a turn when idle (`nextTurn` ignores this)\n\n`pi.sendUserMessage(content, { deliverAs })` always goes through prompt flow; while streaming it queues as steer/follow-up.\n\n## 2) Handler context (`ExtensionContext`)\n\nHandlers and tool `execute` receive `ctx` with:\n\n- `ui`\n- `hasUI`\n- `cwd`\n- `sessionManager` (read-only)\n- `modelRegistry`, `model`\n- `getContextUsage()`\n- `compact(...)`\n- `isIdle()`, `hasPendingMessages()`, `abort()`\n- `shutdown()`\n- `getSystemPrompt()`\n\n## 3) Command context (`ExtensionCommandContext`)\n\nCommand handlers additionally get:\n\n- `waitForIdle()`\n- `newSession(...)`\n- `switchSession(...)`\n- `branch(entryId)`\n- `navigateTree(targetId, { summarize })`\n- `reload()`\n\nUse command context for session-control flows; these methods are intentionally separated from general event handlers.\n\n## Event surface (current names and behavior)\n\nCanonical event unions and payload types are in `types.ts`.\n\n### Session lifecycle\n\n- `session_start`\n- `session_before_switch` / `session_switch`\n- `session_before_branch` / `session_branch`\n- `session_before_compact` / `session.compacting` / `session_compact`\n- `session_before_tree` / `session_tree`\n- `session_shutdown`\n\nCancelable pre-events:\n\n- `session_before_switch` → `{ cancel?: boolean }`\n- `session_before_branch` → `{ cancel?: boolean; skipConversationRestore?: boolean }`\n- `session_before_compact` → `{ cancel?: boolean; compaction?: CompactionResult }`\n- `session_before_tree` → `{ cancel?: boolean; summary?: { summary: string; details?: unknown } }`\n\n### Prompt and turn lifecycle\n\n- `input`\n- `before_agent_start`\n- `context`\n- `agent_start` / `agent_end`\n- `turn_start` / `turn_end`\n- `message_start` / `message_update` / `message_end`\n\n### Tool lifecycle\n\n- `tool_call` (pre-exec, may block)\n- `tool_result` (post-exec, may patch content/details/isError)\n- `tool_execution_start` / `tool_execution_update` / `tool_execution_end` (observability)\n\n`tool_result` is middleware-style: handlers run in extension order and each sees prior modifications.\n\n### Reliability/runtime signals\n\n- `auto_compaction_start` / `auto_compaction_end`\n- `auto_retry_start` / `auto_retry_end`\n- `ttsr_triggered`\n- `todo_reminder`\n\n### User command interception\n\n- `user_bash` (override with `{ result }`)\n- `user_python` (override with `{ result }`)\n\n### `resources_discover`\n\n`resources_discover` exists in extension types and `ExtensionRunner`.\nCurrent runtime note: `ExtensionRunner.emitResourcesDiscover(...)` is implemented, but there are no `AgentSession` callsites invoking it in the current codebase.\n\n## Tool authoring details\n\n`registerTool` uses `ToolDefinition` from `types.ts`.\n\nCurrent `execute` signature:\n\n```ts\nexecute(\n\ttoolCallId,\n\tparams,\n\tsignal,\n\tonUpdate,\n\tctx,\n): Promise<AgentToolResult>\n```\n\nTemplate:\n\n```ts\npi.registerTool({\n name: \"my_tool\",\n label: \"My Tool\",\n description: \"...\",\n parameters: Type.Object({}),\n async execute(_id, _params, signal, onUpdate, ctx) {\n if (signal?.aborted) {\n return { content: [{ type: \"text\", text: \"Cancelled\" }] };\n }\n onUpdate?.({ content: [{ type: \"text\", text: \"Working...\" }] });\n return { content: [{ type: \"text\", text: \"Done\" }], details: {} };\n },\n onSession(event, ctx) {\n // reason: start|switch|branch|tree|shutdown\n },\n renderCall(args, options, theme) {\n // optional TUI render\n },\n renderResult(result, options, theme, args) {\n // optional TUI render\n },\n});\n```\n\n`tool_call`/`tool_result` intercept all tools once the registry is wrapped in `sdk.ts`, including built-ins and extension/custom tools.\n\n## UI integration points\n\n`ctx.ui` implements the `ExtensionUIContext` interface. Support differs by mode.\n\n### Interactive mode (`extension-ui-controller.ts`)\n\nSupported:\n\n- dialogs: `select`, `confirm`, `input`, `editor`\n- notifications/status/editor text/terminal input/custom overlays\n- theme listing/loading by name (`setTheme` supports string names)\n- tools expanded toggle\n\nCurrent no-op methods in this controller:\n\n- `setFooter`\n- `setHeader`\n- `setEditorComponent`\n\nAlso note: `setWidget` currently routes to status-line text via `setHookWidget(...)`.\n\n### RPC mode (`rpc-mode.ts`)\n\n`ctx.ui` is backed by RPC `extension_ui_request` events:\n\n- dialog methods (`select`, `confirm`, `input`, `editor`) round-trip to client responses\n- fire-and-forget methods emit requests (`notify`, `setStatus`, `setWidget` for string arrays, `setTitle`, `setEditorText`)\n\nUnsupported/no-op in RPC implementation:\n\n- `onTerminalInput`\n- `custom`\n- `setFooter`, `setHeader`, `setEditorComponent`\n- `setWorkingMessage`\n- theme switching/loading (`setTheme` returns failure)\n- tool expansion controls are inert\n\n### Print/headless/subagent paths\n\nWhen no UI context is supplied to runner init, `ctx.hasUI` is `false` and methods are no-op/default-returning.\n\n### Background interactive mode\n\nBackground mode installs a non-interactive UI context object. In current implementation, `ctx.hasUI` may still be `true` while interactive dialogs return defaults/no-op behavior.\n\n## Session and state patterns\n\nFor durable extension state:\n\n1. Persist with `pi.appendEntry(customType, data)`.\n2. Rebuild state from `ctx.sessionManager.getBranch()` on `session_start`, `session_branch`, `session_tree`.\n3. Keep tool result `details` structured when state should be visible/reconstructible from tool result history.\n\nExample reconstruction pattern:\n\n```ts\npi.on(\"session_start\", async (_event, ctx) => {\n let latest;\n for (const entry of ctx.sessionManager.getBranch()) {\n if (entry.type === \"custom\" && entry.customType === \"my-state\") {\n latest = entry.data;\n }\n }\n // restore from latest\n});\n```\n\n## Rendering extension points\n\n## Custom message renderer\n\n```ts\npi.registerMessageRenderer(\"my-type\", (message, { expanded }, theme) => {\n // return pi-tui Component\n});\n```\n\nUsed by interactive rendering when custom messages are displayed.\n\n## Tool call/result renderer\n\nProvide `renderCall` / `renderResult` on `registerTool` definitions for custom tool visualization in TUI.\n\n## Constraints and pitfalls\n\n- Runtime actions are unavailable during extension load.\n- `tool_call` errors block execution (fail-closed).\n- Command name conflicts with built-ins are skipped with diagnostics.\n- Reserved shortcuts are ignored (`ctrl+c`, `ctrl+d`, `ctrl+z`, `ctrl+k`, `ctrl+p`, `ctrl+l`, `ctrl+o`, `ctrl+t`, `ctrl+g`, `shift+tab`, `shift+ctrl+p`, `alt+enter`, `escape`, `enter`).\n- Treat `ctx.reload()` as terminal for the current command handler frame.\n\n## Extensions vs hooks vs custom-tools\n\nUse the right surface:\n\n- **Extensions** (`src/extensibility/extensions/*`): unified system (events + tools + commands + renderers + provider registration).\n- **Hooks** (`src/extensibility/hooks/*`): separate legacy event API.\n- **Custom-tools** (`src/extensibility/custom-tools/*`): tool-focused modules; when loaded alongside extensions they are adapted and still pass through extension interception wrappers.\n\nIf you need one package that owns policy, tools, command UX, and rendering together, use extensions.\n",
15
15
  "fs-scan-cache-architecture.md": "# Filesystem Scan Cache Architecture Contract\n\nThis document defines the current contract for the shared filesystem scan cache implemented in Rust (`crates/pi-natives/src/fs_cache.rs`) and consumed by native discovery/search APIs exposed to `packages/coding-agent`.\n\n## What this cache is\n\nThe cache stores full directory-scan entry lists (`GlobMatch[]`) keyed by scan scope and traversal policy, then lets higher-level operations (glob filtering, fuzzy scoring, grep file selection) run against those cached entries.\n\nPrimary goals:\n\n- avoid repeated filesystem walks for repeated discovery/search calls\n- keep consistency across `glob`, `fuzzyFind`, and `grep` when they share the same scan policy\n- allow explicit staleness recovery for empty results and explicit invalidation after file mutations\n\n## Ownership and public surface\n\n- Cache implementation and policy: `crates/pi-natives/src/fs_cache.rs`\n- Native consumers:\n - `crates/pi-natives/src/glob.rs`\n - `crates/pi-natives/src/fd.rs` (`fuzzyFind`)\n - `crates/pi-natives/src/grep.rs`\n- JS binding/export:\n - `packages/natives/src/glob/index.ts` (`invalidateFsScanCache`)\n - `packages/natives/src/glob/types.ts`\n - `packages/natives/src/grep/types.ts`\n- Coding-agent mutation invalidation helpers:\n - `packages/coding-agent/src/tools/fs-cache-invalidation.ts`\n\n## Cache key partitioning (hard contract)\n\nEach entry is keyed by:\n\n- canonicalized `root` directory path\n- `include_hidden` boolean\n- `use_gitignore` boolean\n- `skip_node_modules` boolean\n\nImplications:\n\n- Hidden and non-hidden scans do **not** share entries.\n- Gitignore-respecting and ignore-disabled scans do **not** share entries.\n- Scans that prune `node_modules` do **not** share entries with scans that include it.\n- Consumers must pass stable semantics for hidden/gitignore/node_modules behavior; changing any flag creates a different cache partition.\n\n## Scan collection behavior\n\nCache population uses a deterministic walker (`ignore::WalkBuilder`) configured by `include_hidden`, `use_gitignore`, and `skip_node_modules`:\n\n- `follow_links(false)`\n- sorted by file path\n- `.git` is always skipped\n- `node_modules` is pruned at traversal time when `skip_node_modules=true`\n- entry file type + `mtime` are captured via `symlink_metadata`\n\nSearch roots are resolved by `resolve_search_path`:\n\n- relative paths are resolved against current cwd\n- target must be an existing directory\n- root is canonicalized when possible\n\n## Freshness and eviction policy\n\nGlobal policy (environment-overridable):\n\n- `FS_SCAN_CACHE_TTL_MS` (default `1000`)\n- `FS_SCAN_EMPTY_RECHECK_MS` (default `200`)\n- `FS_SCAN_CACHE_MAX_ENTRIES` (default `16`)\n\nBehavior:\n\n- `get_or_scan(...)`\n - if TTL is `0`: bypass cache entirely, always fresh scan (`cache_age_ms = 0`)\n - on cache hit within TTL: return cached entries + non-zero `cache_age_ms`\n - on expired hit: evict key, rescan, store fresh entry\n- max entry enforcement is oldest-first eviction by `created_at`\n\n## Empty-result fast recheck (separate from normal hits)\n\nNormal cache hit:\n\n- a cache hit inside TTL returns cached entries and does nothing else.\n\nEmpty-result fast recheck:\n\n- this is a **caller-side** policy using `ScanResult.cache_age_ms`\n- if filtered/query result is empty and cached scan age is at least `empty_recheck_ms()`, caller performs one `force_rescan(...)` and retries\n- intended to reduce stale-negative results when files were recently added but cache is still within TTL\n\nCurrent consumers:\n\n- `glob`: rechecks when filtered matches are empty and scan age exceeds threshold\n- `fuzzyFind` (`fd.rs`): rechecks only when query is non-empty and scored matches are empty\n- `grep`: rechecks when selected candidate file list is empty\n\n## Consumer defaults and cache usage\n\nCache is opt-in on all exposed APIs (`cache?: boolean`, default `false`).\n\nCurrent defaults in native APIs:\n\n- `glob`: `hidden=false`, `gitignore=true`, `cache=false`, and `node_modules` included only when the pattern mentions `node_modules`\n- `fuzzyFind`: `hidden=false`, `gitignore=true`, `cache=false`, and `node_modules` is skipped\n- `grep`: `hidden=true`, `gitignore=true`, `cache=false`, and `node_modules` included only when the glob mentions `node_modules`\n\nCoding-agent callers today:\n\n- High-volume mention candidate discovery enables cache:\n - `packages/coding-agent/src/utils/file-mentions.ts`\n - profile: `hidden=true`, `gitignore=true`, `includeNodeModules=true`, `cache=true`\n- Tool-level `grep` integration currently disables scan cache (`cache: false`):\n - `packages/coding-agent/src/tools/grep.ts`\n\n## Invalidation contract\n\nNative invalidation entrypoint:\n\n- `invalidateFsScanCache(path?: string)`\n - with `path`: remove cache entries whose root is a prefix of target path\n - without path: clear all scan cache entries\n\nPath handling details:\n\n- relative invalidation paths are resolved against cwd\n- invalidation attempts canonicalization\n- if target does not exist (e.g., delete), fallback canonicalizes parent and reattaches filename when possible\n- this preserves invalidation behavior for create/delete/rename where one side may not exist\n\n## Coding-agent mutation flow responsibilities\n\nCoding-agent code must invalidate after successful filesystem mutations.\n\nCentral helpers:\n\n- `invalidateFsScanAfterWrite(path)`\n- `invalidateFsScanAfterDelete(path)`\n- `invalidateFsScanAfterRename(oldPath, newPath)` (invalidates both sides when paths differ)\n\nCurrent mutation tool callsites:\n\n- `packages/coding-agent/src/tools/write.ts`\n- `packages/coding-agent/src/patch/index.ts` (hashline/patch/replace flows)\n\nRule: if a flow mutates filesystem content or location and bypasses these helpers, cache staleness bugs are expected.\n\n## Adding a new cache consumer safely\n\nWhen introducing cache use in a new scanner/search path:\n\n1. **Use stable scan policy inputs**\n - decide hidden/gitignore/node_modules semantics first\n - pass them consistently to `get_or_scan`/`force_rescan` so cache partitions are intentional\n\n2. **Treat cache data as pre-filtered only by traversal policy**\n - apply tool-specific filtering (glob patterns, type filters, scoring) after retrieval\n - never assume cached entries already reflect your higher-level filters\n\n3. **Implement empty-result fast recheck only for stale-negative risk**\n - use `scan.cache_age_ms >= empty_recheck_ms()`\n - retry once with `force_rescan(..., store=true, ...)`\n - keep this path separate from normal cache-hit logic\n\n4. **Respect no-cache mode explicitly**\n - when caller disables cache, call `force_rescan(..., store=false, ...)`\n - do not populate shared cache in a no-cache request path\n\n5. **Wire mutation invalidation for any new write path**\n - after successful write/edit/delete/rename, call the coding-agent invalidation helper\n - for rename/move, invalidate both old and new paths\n\n6. **Do not add per-call TTL knobs**\n - current contract is global policy only (env-configured), no per-request TTL override\n\n## Known boundaries\n\n- Cache scope is process-local in-memory (`DashMap`), not persisted across process restarts.\n- Cache stores scan entries, not final tool results.\n- `glob`/`fuzzyFind`/`grep` share scan entries only when key dimensions (`root`, `hidden`, `gitignore`, `skip_node_modules`) match.\n- `.git` is always excluded at scan collection time regardless of caller options.\n",
@@ -31,16 +31,16 @@ export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
31
31
  "natives-rust-task-cancellation.md": "# Native Rust task execution and cancellation (`pi-natives`)\n\nThis document describes how `crates/pi-natives` schedules native work and how cancellation flows from JS options (`timeoutMs`, `AbortSignal`) into Rust execution.\n\n## Implementation files\n\n- `crates/pi-natives/src/task.rs`\n- `crates/pi-natives/src/grep.rs`\n- `crates/pi-natives/src/glob.rs`\n- `crates/pi-natives/src/fd.rs`\n- `crates/pi-natives/src/ast.rs`\n- `crates/pi-natives/src/shell.rs`\n- `crates/pi-natives/src/pty.rs`\n- `crates/pi-natives/src/html.rs`\n- `crates/pi-natives/src/image.rs`\n- `crates/pi-natives/src/clipboard.rs`\n- `crates/pi-natives/src/text.rs`\n- `crates/pi-natives/src/ps.rs`\n\n## Core primitives (`task.rs`)\n\n`task.rs` defines:\n\n1. `task::blocking(tag, cancel_token, work)`\n - Wraps `napi::AsyncTask` / `Task`.\n - `compute()` runs on libuv worker threads.\n - Returns a JS `Promise<T>` for exported functions.\n - Records a profiling sample through `profile_region(tag)`.\n\n2. `task::future(env, tag, work)`\n - Wraps `env.spawn_future(...)`.\n - Runs async work on Tokio's runtime.\n - Returns `PromiseRaw<'env, T>`.\n - Records a profiling sample through `profile_region(tag)`.\n\n3. `CancelToken` / `AbortToken` / `AbortReason`\n - `CancelToken::new(timeout_ms, signal)` combines an optional deadline and optional JS `AbortSignal` converted from `Unknown`.\n - `CancelToken::heartbeat()` is cooperative cancellation for blocking loops.\n - `CancelToken::wait()` asynchronously waits for signal, timeout, or Ctrl-C.\n - `CancelToken::emplace_abort_token()` creates an abortable flag when a later `Shell.abort()`/internal bridge needs one.\n - `AbortToken::abort(reason)` lets external code request abort.\n\n## `blocking` vs `future`: execution model and selection\n\n### Use `task::blocking`\n\nUse when work is CPU-heavy or fundamentally synchronous/blocking:\n\n- regex/file scanning (`grep`, `glob`, `fuzzyFind`)\n- ast-grep search/edit worker work\n- PTY loop internals through `tokio::task::spawn_blocking`\n- image decode/resize/encode\n- HTML conversion\n- clipboard image read\n\nBehavior:\n\n- Work closure receives a cloned `CancelToken`.\n- Cancellation is only observed where code checks `ct.heartbeat()?`.\n- Closure `Err(...)` rejects the JS promise.\n\n### Use `task::future`\n\nUse when work must `await` async operations:\n\n- shell session orchestration (`Shell.run`, `executeShell`)\n- PTY outer promise (`PtySession.start`) before it enters `spawn_blocking`\n- task racing (`tokio::select!`) between completion and cancellation\n\nBehavior:\n\n- Future code can race normal completion against `ct.wait()`.\n- On cancel path, async implementations typically cancel subordinate machinery and may force-abort after a grace timeout.\n\n## JS API ↔ Rust export mapping (task/cancel relevant)\n\n| JS-facing API | Rust export | Scheduler | Cancellation hookup |\n| --------------------------------------- | ------------------------------------ | -------------------------------------------------------------- | ---------------------------------------------------------------------------------------- |\n| `grep(options, onMatch?)` | `grep` | `task::blocking(\"grep\", ct, ...)` | `CancelToken::new(options.timeoutMs, options.signal)` + heartbeat checks |\n| `glob(options, onMatch?)` | `glob` | `task::blocking(\"glob\", ct, ...)` | `CancelToken::new(...)` + heartbeat checks |\n| `fuzzyFind(options)` | `fuzzy_find` | `task::blocking(\"fuzzy_find\", ct, ...)` | `CancelToken::new(...)` + heartbeat checks |\n| `astGrep(options)` / `astEdit(options)` | ast exports | blocking worker path | timeout/signal fields are accepted by options and checked cooperatively in worker loops |\n| `Shell#run(options, onChunk?)` | `Shell::run` | `task::future(env, \"shell.run\", ...)` | `ct.wait()` raced against run task; bridges to Tokio cancellation token and `AbortToken` |\n| `executeShell(options, onChunk?)` | `execute_shell` | `task::future(env, \"shell.execute\", ...)` | same cancel race and 2s graceful window |\n| `PtySession#start(options, onChunk?)` | `PtySession::start` | `task::future(env, \"pty.start\", ...)` + inner `spawn_blocking` | `CancelToken` checked in sync PTY loop via `heartbeat()` |\n| `htmlToMarkdown(html, options?)` | `html_to_markdown` | `task::blocking(\"html_to_markdown\", (), ...)` | none (`()` token) |\n| `PhotonImage.parse/encode/resize` | `PhotonImage::{parse,encode,resize}` | `task::blocking(...)` | none (`()` token) |\n| `readImageFromClipboard()` | `read_image_from_clipboard` | `task::blocking(\"clipboard.read_image\", (), ...)` | none (`()` token) |\n\n`text.rs`, `tokens.rs`, `keys.rs`, most `ps.rs` functions, and synchronous utility exports do not use `task::blocking`/`task::future` and therefore do not participate in this cancellation path.\n\n## Cancellation lifecycle and state transitions\n\n### `CancelToken` lifecycle\n\n```text\nCreated\n ├─ no signal + no timeout -> passive token\n ├─ signal registered -> AbortSignal callback can set AbortReason::Signal\n └─ deadline set -> timeout check becomes active\n\nRunning\n ├─ heartbeat()/wait() sees signal -> AbortReason::Signal\n ├─ heartbeat()/wait() sees deadline -> AbortReason::Timeout\n ├─ wait() sees Ctrl-C -> AbortReason::User\n └─ no abort -> continue\n\nAborted\n └─ flag stores first observed cause for waiters; heartbeat formats it as \"Aborted: <reason>\"\n```\n\n### Before-start vs mid-execution cancellation\n\n- **Before start / before first cancellation check**:\n - `task::future` users that race on `ct.wait()` can resolve cancellation once they enter `select!`.\n - `task::blocking` users only observe cancellation when closure code reaches `heartbeat()`.\n\n- **Mid-execution**:\n - `blocking`: next `heartbeat()` returns `Err(\"Aborted: ...\")`.\n - `future`: `ct.wait()` branch wins `select!`, then code cancels subordinate async machinery.\n - shell: cancellation triggers a Tokio cancellation token, waits up to 2 seconds, then aborts the task if needed.\n - PTY: heartbeat failure or `kill()` terminates PTY child/process tree and drains output briefly.\n\n## Heartbeat expectations for long-running loops\n\n`heartbeat()` must run at predictable cadence in loops with unbounded or large work sets.\n\nObserved patterns:\n\n- `glob` filtering checks entries during scan/filter work.\n- `fd` scoring checks scanned candidates.\n- `grep` checks before/during expensive search and passes tokens into shared scan/cache helpers.\n- `run_pty_sync` checks every loop tick with a maximum 16ms wait cadence.\n\nPractical rule: no loop over external-size input should exceed a short bounded interval without a heartbeat.\n\n## Failure behavior and error propagation to JS\n\n### Blocking tasks\n\nError path:\n\n1. Closure returns `Err(napi::Error)` (including `heartbeat()` abort).\n2. `Task::compute()` returns `Err`.\n3. `AsyncTask` rejects JS promise.\n\nTypical error strings:\n\n- `Aborted: Timeout`\n- `Aborted: Signal`\n- domain errors (`Failed to decode image: ...`, `Conversion error: ...`, etc.)\n\n### Future tasks\n\nError path:\n\n1. Async body returns `Err(napi::Error)` or join failure is mapped (`... task failed: {err}`).\n2. `task::future`-spawned promise rejects.\n3. Shell and PTY command APIs model cancellation as structured results instead of rejection when the cancellation path wins: `exitCode` omitted, `cancelled` or `timedOut` set.\n\n### Cancellation reporting split\n\n- **Abort as error**: blocking exports using `heartbeat()?`.\n- **Abort as typed result**: shell/PTY command APIs that model cancellation in result structs.\n\nChoose one model per API and document it explicitly.\n\n## Common pitfalls\n\n1. **Missing heartbeat in blocking loops**\n - Symptom: timeout/signal appears ignored until loop ends.\n - Fix: add `ct.heartbeat()?` at loop top and before expensive per-item steps.\n\n2. **Long uncancelable sections**\n - Symptom: cancellation latency spikes during single large call (decode, sort, compression, parser invocation, etc.).\n - Fix: split work into chunks with heartbeat boundaries; if impossible, document latency.\n\n3. **Blocking async executor**\n - Symptom: async API stalls when sync-heavy code runs directly in future.\n - Fix: move CPU/sync blocks to `task::blocking` or `tokio::task::spawn_blocking`.\n\n4. **Inconsistent cancel semantics**\n - Symptom: one API rejects on cancel, another resolves with flags, confusing callers.\n - Fix: standardize per domain and keep docs aligned.\n\n5. **Forgetting cancellation bridge in nested async tasks**\n - Symptom: outer token is cancelled but inner readers/subprocess tasks keep running.\n - Fix: bridge cancellation to inner token/signal and enforce grace timeout + forced abort fallback.\n\n## Checklist for new cancellable exports\n\n1. Classify work correctly:\n - CPU-bound or sync blocking -> `task::blocking`.\n - async I/O / `await` orchestration -> `task::future`.\n\n2. Expose cancel inputs when needed:\n - include `timeoutMs` and `signal` in `#[napi(object)]` options,\n - create `let ct = task::CancelToken::new(timeout_ms, signal);`.\n\n3. Wire cancellation through all layers:\n - blocking loops: `ct.heartbeat()?` at stable intervals,\n - async orchestration: race with `ct.wait()` and cancel sub-tasks/tokens.\n\n4. Decide cancellation contract:\n - reject promise with abort error, or\n - resolve typed `{ cancelled, timedOut, ... }`,\n - keep this contract consistent for the API family.\n\n5. Propagate failures with context:\n - map errors via `Error::from_reason(format!(\"...: {err}\"))`,\n - include stage-specific prefixes (`spawn`, `decode`, `wait`, etc.).\n\n6. Handle before-start and mid-flight cancellation:\n - cancellation check/await must happen before expensive body and during long execution.\n\n7. Validate no executor misuse:\n - no long sync work directly inside async futures without `spawn_blocking`/blocking task wrapper.\n",
32
32
  "natives-shell-pty-process.md": "# Natives Shell, PTY, Process, and Key Internals\n\nThis document covers the execution/process/terminal primitives in `@oh-my-pi/pi-natives`: `shell`, `pty`, `ps`, and `keys`, using the architecture terms from `docs/natives-architecture.md`.\n\n## Implementation files\n\n- `crates/pi-natives/src/shell.rs`\n- `crates/pi-natives/src/shell/windows.rs` (Windows-only PATH enrichment)\n- `crates/pi-natives/src/pty.rs`\n- `crates/pi-natives/src/ps.rs`\n- `crates/pi-natives/src/keys.rs`\n- `crates/pi-natives/src/task.rs`\n- `packages/natives/native/index.d.ts`\n\n## Layer ownership\n\n- **Package entrypoint** (`packages/natives/native/index.js`): loads the `.node` addon and exports generated N-API bindings.\n- **Rust N-API module layer** (`crates/pi-natives/src/*`): shell/PTY process execution, process-tree traversal/termination, and key-sequence parsing.\n- **Consumers** (`packages/coding-agent`, `packages/tui`): higher-level session policy, output artifact/minimizer handling, render policy, and UI key handling.\n\n## Shell subsystem (`shell`)\n\n### API model\n\nTwo execution modes are exposed:\n\n1. **One-shot** via `executeShell(options, onChunk?)`.\n2. **Persistent session** via `new Shell(options?)` then `shell.run(...)` repeatedly.\n\nBoth stream output through a threadsafe callback and return `{ exitCode?, cancelled, timedOut, minimized? }`.\n\n`ShellOptions` supports `sessionEnv`, `snapshotPath`, and optional output `minimizer`. `ShellExecuteOptions` supports command-scoped `env`, session-level `sessionEnv`, `snapshotPath`, timeout/signal, and optional minimizer. `ShellRunOptions` supports command, cwd, command-scoped env, timeout, and signal.\n\n### Session creation and environment model\n\nRust creates `brush_core::Shell` with:\n\n- non-interactive, non-login mode,\n- `no_profile` and `no_rc`,\n- `do_not_inherit_env: true`,\n- bash-mode builtins, with `exec` and `suspend` disabled,\n- explicit environment reconstruction from host env,\n- skip-list for shell-sensitive vars (`PS1`, `PWD`, `SHLVL`, bash function exports, etc.).\n\nSession env behavior:\n\n- `ShellOptions.sessionEnv` / one-shot `sessionEnv` is applied at session creation.\n- `ShellRunOptions.env` / one-shot `env` is command-scoped (`EnvironmentScope::Command`) and popped after the command.\n- `PATH` is merged specially on Windows with case-insensitive dedupe.\n- Windows-only path enrichment (`shell/windows.rs`) appends discovered Git-for-Windows paths when present and not already included.\n- `snapshotPath`, when present, is sourced during session creation with stdout/stderr/stdin wired to null files.\n\n### Runtime lifecycle and state transitions\n\nPersistent shell (`Shell.run`) uses this state machine:\n\n- **Idle/Uninitialized**: `session: None`.\n- **Running**: first `run()` lazily creates a session, stores an abort token, executes command.\n- **Completed + keepalive**: if execution control flow is normal, abort state is cleared and session is reused.\n- **Completed + teardown**: if control flow is loop/script/shell-exit related, session is dropped.\n- **Cancelled/Timed out**: run task is cancelled, grace wait is 2 seconds, task may be force-aborted, session is dropped if lock can be acquired.\n- **Error**: session is dropped.\n\nOne-shot shell (`executeShell`) always creates and drops a fresh session per call.\n\n### Streaming/output and minimizer behavior\n\n- Stdout/stderr are routed into a shared pipe and read concurrently.\n- Reader decodes UTF-8 incrementally; invalid byte sequences emit `U+FFFD` replacement chunks.\n- The command runs in a new process group policy.\n- Optional minimizer configuration can capture and rewrite output. When minimization occurs, the result includes `minimized` with filter name, replacement text, original text, and byte counts.\n- Consumers are responsible for persisting or displaying minimizer artifacts; the native result only carries the data.\n\n### Cancellation, timeout, and abort\n\n- `CancelToken` is constructed from `timeoutMs` and optional `AbortSignal`.\n- On cancellation/timeout, shell cancellation token is triggered, then task gets a 2-second graceful window before forced abort.\n- Structured result flags are used:\n - timeout -> `exitCode` omitted, `timedOut: true`.\n - abort signal / `Shell.abort()` -> `exitCode` omitted, `cancelled: true`.\n\n`Shell.abort()` behavior:\n\n- aborts the current running command for that `Shell` instance through the stored `AbortToken`,\n- resolves successfully even when nothing is running.\n\n### Failure behavior\n\nCommon surfaced errors include:\n\n- session init failures (`Failed to initialize shell`),\n- cwd errors (`Failed to set cwd`),\n- env set/pop failures,\n- snapshot source failures (`Failed to source snapshot`),\n- pipe creation/clone failures,\n- execution failure (`Shell execution failed: ...`),\n- task wrapper failures (`Shell execution task failed: ...`).\n\n## PTY subsystem (`pty`)\n\n### API model\n\n`new PtySession()` exposes:\n\n- `start(options, onChunk?) -> Promise<{ exitCode?, cancelled, timedOut }>`\n- `write(data)`\n- `resize(cols, rows)`\n- `kill()`\n\n`PtyStartOptions` supports `command`, optional `cwd`, optional `env`, `timeoutMs`, `signal`, `cols`, and `rows`.\n\n### Runtime lifecycle and state transitions\n\n`PtySession` state machine:\n\n- **Idle**: `core: None`.\n- **Reserved**: `start()` installs control channel synchronously (`core: Some`) before async work begins, so `write/resize/kill` become immediately valid.\n- **Running**: blocking PTY loop handles child state, reader events, cancellation heartbeat, and control messages.\n- **Terminal closed / drain**: child exit or cancellation starts a short reader drain window.\n- **Finalized**: `core` is always reset to `None` after start task completion (success or error).\n\nConcurrency guard:\n\n- starting while already running returns `PTY session already running`.\n\n### Spawn/attach/write/read/terminate patterns\n\n- PTY opened via `portable_pty::native_pty_system().openpty(...)`.\n- Command currently runs as `sh -lc <command>` with optional `cwd` and env overrides.\n- Default size is `120x40`; dimensions are clamped (`cols 20..400`, `rows 5..200`).\n- `write()` sends raw bytes to PTY stdin.\n- `resize()` sends a control message and clamps dimensions again.\n- `kill()` sends a control message that marks the run cancelled and terminates the child/process tree.\n\nOutput path:\n\n- dedicated reader thread reads master stream,\n- incremental UTF-8 decode emits `U+FFFD` for invalid bytes,\n- chunks forwarded through N-API threadsafe callback.\n\nTermination path:\n\n- Unix: terminate process group when known, terminate child tree, call child kill, then repeat with SIGKILL.\n- Non-Unix: terminate child tree, call child kill, then repeat with SIGKILL-equivalent process-tree helper.\n\n### Cancellation and timeout semantics\n\n- `timeoutMs` and `AbortSignal` feed a `CancelToken`.\n- Loop calls `ct.heartbeat()` periodically with a 16ms maximum wait cadence.\n- Timeout classification is based on the heartbeat error string containing `Timeout`.\n- Cancellation/kill starts a 300ms post-cancel drain window; normal child exit starts a 300ms post-exit drain window.\n\n### Failure behavior\n\nError surfaces include:\n\n- PTY allocation/open failure,\n- PTY spawn failure,\n- writer/reader acquisition failure,\n- child status/wait failures,\n- lock poisoning,\n- control-channel disconnection (`PTY session is no longer available`).\n\nControl call failures when not running:\n\n- `write/resize/kill` return `PTY session is not running`.\n\n## Process-tree subsystem (`ps`)\n\n### API model\n\n- `killTree(pid, signal) -> number`\n- `listDescendants(pid) -> number[]`\n\n### Platform-specific implementation\n\n- **Linux**: recursively reads `/proc/<pid>/task/<pid>/children`.\n- **macOS**: uses `libproc` `proc_listchildpids`.\n- **Windows**: snapshots process table with `CreateToolhelp32Snapshot`, builds parent->children map, terminates with `OpenProcess(PROCESS_TERMINATE)` + `TerminateProcess`.\n\n### Kill-tree behavior\n\n- Descendants are collected recursively.\n- Kill order is bottom-up (deepest descendants first).\n- Root pid is killed last.\n- Return value is count of successful terminations.\n\nSignal behavior:\n\n- POSIX: provided `signal` is passed to `kill`.\n- Windows: `signal` is ignored; termination is unconditional process terminate.\n\n### Failure behavior\n\nThis module is intentionally non-throwing at API surface for ordinary process misses:\n\n- missing/inaccessible process tree branches are skipped,\n- per-pid kill failures are counted as unsuccessful,\n- lookup miss typically yields `[]` from `listDescendants` and `0` from `killTree`.\n\n## Key parsing subsystem (`keys`)\n\n### API model\n\nExposed helpers:\n\n- `parseKey(data, kittyProtocolActive)`\n- `matchesKey(data, keyId, kittyProtocolActive)`\n- `parseKittySequence(data)`\n- `matchesKittySequence(data, expectedCodepoint, expectedModifier)`\n- `matchesLegacySequence(data, keyName)`\n\n### Parsing model\n\nThe parser combines:\n\n- direct single-byte mappings (`enter`, `tab`, `ctrl+<letter>`, printable ASCII),\n- O(1) legacy escape-sequence lookup (PHF map),\n- xterm `modifyOtherKeys` parsing,\n- Kitty protocol parsing (`CSI u`, `CSI ~`, `CSI 1;...<letter>`),\n- normalization to key IDs (`ctrl+c`, `shift+tab`, `pageUp`, `f5`, etc.).\n\nModifier handling:\n\n- only shift/alt/ctrl bits are compared for key matching,\n- lock bits are masked out before comparisons.\n\nLayout behavior:\n\n- base-layout fallback is intentionally constrained so remapped layouts do not create false matches for ASCII letters/symbols.\n\n### Failure behavior\n\n- Unrecognized or invalid sequences produce `null` from parse functions.\n- Match functions return `false` on parse failure or mismatch.\n- No thrown error surface for malformed key input.\n\n## JS API ↔ Rust export mapping\n\n### Shell + PTY + Process\n\n| JS API | Rust N-API export | Notes |\n| --------------------------------- | -------------------------------------- | ----------------------------------------- |\n| `executeShell(options, onChunk?)` | `executeShell` (`execute_shell`) | One-shot shell execution |\n| `new Shell(options?)` | `Shell` class | Persistent shell session |\n| `shell.run(options, onChunk?)` | `Shell::run` | Reuses session on keepalive control flow |\n| `shell.abort()` | `Shell::abort` | Aborts active run for that shell instance |\n| `new PtySession()` | `PtySession` class | Stateful PTY session |\n| `pty.start(options, onChunk?)` | `PtySession::start` | Interactive PTY run |\n| `pty.write(data)` | `PtySession::write` | Raw stdin passthrough |\n| `pty.resize(cols, rows)` | `PtySession::resize` | Clamped terminal dimensions |\n| `pty.kill()` | `PtySession::kill` | Force-kills active PTY child |\n| `killTree(pid, signal)` | `killTree` (`kill_tree`) | Children-first process tree termination |\n| `listDescendants(pid)` | `listDescendants` (`list_descendants`) | Recursive descendants listing |\n\n### Keys\n\n| JS API | Rust N-API export | Notes |\n| ---------------------------------------------- | --------------------------------------------------- | ------------------------------- |\n| `matchesKittySequence(data, cp, mod)` | `matchesKittySequence` (`matches_kitty_sequence`) | Kitty codepoint+modifier match |\n| `parseKey(data, kittyProtocolActive)` | `parseKey` (`parse_key`) | Normalized key-id parser |\n| `matchesLegacySequence(data, keyName)` | `matchesLegacySequence` (`matches_legacy_sequence`) | Exact legacy sequence map check |\n| `parseKittySequence(data)` | `parseKittySequence` (`parse_kitty_sequence`) | Structured Kitty parse result |\n| `matchesKey(data, keyId, kittyProtocolActive)` | `matchesKey` (`matches_key`) | High-level key matcher |\n\n## Abandoned session cleanup and finalization notes\n\n- **Shell persistent session**: if a run is cancelled/timed out/errors/non-keepalive control flow, Rust drops the internal session state. Successful normal runs keep the session for reuse.\n- **PTY session**: `core` is always cleared after `start()` finishes, including failure paths.\n- **No explicit JS finalizer-driven kill contract** is exposed by wrappers; cleanup is primarily tied to run completion/cancellation paths. Callers should use `timeoutMs`, `AbortSignal`, `shell.abort()`, or `pty.kill()` for deterministic teardown.\n",
33
33
  "natives-text-search-pipeline.md": "# Natives Text/Search Pipeline\n\nThis document maps the `@oh-my-pi/pi-natives` text/search/code surface from generated JS/TS exports to Rust N-API modules and back to JS result objects.\n\nTerminology follows `docs/natives-architecture.md`:\n\n- **Generated binding**: public API in `packages/natives/native/index.d.ts`.\n- **Rust module layer**: N-API exports in `crates/pi-natives/src/*`.\n- **Shared scan cache**: `fs_cache`-backed directory-entry cache used by discovery/search flows.\n\n## Implementation files\n\n- `packages/natives/native/index.d.ts`\n- `crates/pi-natives/src/grep.rs`\n- `crates/pi-natives/src/glob.rs`\n- `crates/pi-natives/src/glob_util.rs`\n- `crates/pi-natives/src/fs_cache.rs`\n- `crates/pi-natives/src/fd.rs`\n- `crates/pi-natives/src/ast.rs`\n- `crates/pi-natives/src/text.rs`\n- `crates/pi-natives/src/highlight.rs`\n- `crates/pi-natives/src/tokens.rs`\n\n## JS API ↔ Rust export mapping\n\n| JS API | Rust export (`#[napi]`, snake_case -> camelCase) | Rust module |\n| ------------------------------------------------------------------------------- | ------------------------------------------------ | -------------- |\n| `grep(options, onMatch?)` | `grep` | `grep.rs` |\n| `search(content, options)` | `search` | `grep.rs` |\n| `hasMatch(content, pattern, ignoreCase?, multiline?)` | `hasMatch` | `grep.rs` |\n| `fuzzyFind(options)` | `fuzzyFind` | `fd.rs` |\n| `glob(options, onMatch?)` | `glob` | `glob.rs` |\n| `invalidateFsScanCache(path?)` | `invalidateFsScanCache` | `fs_cache.rs` |\n| `astGrep(options)` | `astGrep` | `ast.rs` |\n| `astEdit(options)` | `astEdit` | `ast.rs` |\n| `wrapTextWithAnsi(text, width, tabWidth)` | `wrapTextWithAnsi` | `text.rs` |\n| `truncateToWidth(text, maxWidth, ellipsis, pad, tabWidth)` | `truncateToWidth` | `text.rs` |\n| `sliceWithWidth(line, startCol, length, strict, tabWidth)` | `sliceWithWidth` | `text.rs` |\n| `extractSegments(line, beforeEnd, afterStart, afterLen, strictAfter, tabWidth)` | `extractSegments` | `text.rs` |\n| `sanitizeText(text)` | `sanitizeText` | `text.rs` |\n| `visibleWidth(text, tabWidth)` | `visibleWidth` | `text.rs` |\n| `highlightCode(code, lang, colors)` | `highlightCode` | `highlight.rs` |\n| `supportsLanguage(lang)` | `supportsLanguage` | `highlight.rs` |\n| `getSupportedLanguages()` | `getSupportedLanguages` | `highlight.rs` |\n| `countTokens(input, encoding?)` | `countTokens` | `tokens.rs` |\n\n## Pipeline overview by subsystem\n\n## 1) Regex search (`grep`, `search`, `hasMatch`)\n\n### Input/options flow\n\n1. Callers invoke generated native exports directly; there is no package-local TS wrapper that renames `search` to `searchContent`.\n2. Rust option structs in `grep.rs` deserialize camelCase fields (`ignoreCase`, `maxCount`, `contextBefore`, `contextAfter`, `maxColumns`, `timeoutMs`).\n3. `grep` creates `CancelToken` from `timeoutMs` + `AbortSignal` and runs inside `task::blocking(\"grep\", ...)`.\n4. `search` and `hasMatch` operate on provided string/`Uint8Array` content and do not scan the filesystem.\n\n### Execution branches\n\n- **In-memory branch**\n - `search` -> `search_sync` / search helpers over provided content bytes.\n - `hasMatch` compiles/checks pattern against provided content and returns a boolean.\n - No filesystem scan, no `fs_cache`.\n- **Single-file branch**\n - `grep` resolves path, checks metadata is file, and searches that file.\n- **Directory branch**\n - Optional cache lookup via `fs_cache::get_or_scan` when `cache: true`.\n - Fresh scan via `fs_cache::force_rescan` when `cache: false`.\n - Optional empty-result recheck when cached results are older than the empty-result recheck threshold.\n - Entry filtering: file-only + optional glob filter (`glob_util`) + optional type filter mapping (`js`, `ts`, `rust`, etc.).\n\n### Search/collection semantics\n\n- Regex engine: `grep_regex::RegexMatcherBuilder` with `ignoreCase` and `multiline`.\n- Context resolution:\n - `contextBefore/contextAfter` override legacy `context`.\n - Non-content modes do not collect context.\n- Output modes:\n - `content` -> one `GrepMatch` per hit.\n - `count` and `filesWithMatches` map to count-style entries (`lineNumber=0`, `line=\"\"`, `matchCount` set).\n- Limits:\n - Global `offset` and `maxCount` apply across files.\n - Parallel path is used only when `maxCount` is unset and `offset == 0`; otherwise sequential path preserves deterministic global offset/limit semantics.\n\n### Result shaping back to JS\n\n- Rust `SearchResult`/`GrepResult` fields map to TS interfaces via N-API object conversion.\n- Counters are clamped before crossing N-API where needed.\n- `GrepResult.limitReached` is optional and emitted when true.\n- Streaming callback receives each shaped `GrepMatch` for content or count-style entries.\n\n### Failure behavior\n\n- `search` returns `SearchResult.error` for regex/search failures instead of throwing.\n- `grep` rejects on hard errors such as invalid path, invalid glob/regex, or cancellation timeout/abort.\n- `hasMatch` returns a boolean on success and throws on invalid pattern/UTF-8 conversion errors.\n- File open/search errors in multi-file scans are skipped per-file; scan continues.\n\n### Malformed regex handling\n\n`grep.rs` sanitizes braces before regex compile:\n\n- Invalid repetition-like braces are escaped (`{`/`}` -> `\\{`/`\\}`) when they cannot form `{N}`, `{N,}`, `{N,M}`.\n- This prevents common literal-template fragments (for example `${platform}`) from failing as malformed repetition.\n- Remaining invalid regex syntax still returns a regex error.\n\n## 2) File discovery (`glob`) and fuzzy path search (`fuzzyFind`)\n\n`glob` and `fuzzyFind` share `fs_cache` scans; matching logic differs.\n\n### `glob` flow\n\n1. Caller passes `GlobOptions` directly. `pattern` and `path` are required in the generated type.\n2. Rust resolves the search path and compiles pattern via `glob_util::compile_glob`.\n3. Entry source:\n - `cache=true` -> `get_or_scan` + optional stale-empty `force_rescan`.\n - `cache=false` -> `force_rescan(..., store=false)` (fresh only).\n4. Filtering:\n - skip `.git` always;\n - skip `node_modules` unless requested (`includeNodeModules`) or pattern mentions `node_modules`;\n - apply glob match;\n - apply file-type filter; symlink `file`/`dir` filters resolve target metadata.\n5. Optional sort by mtime descending (`sortByMtime`) before truncating to `maxResults`.\n\n### `fuzzyFind` flow\n\n1. Rust implementation lives in `fd.rs`; generated export is `fuzzyFind`.\n2. Shared scan source from `fs_cache` with the same cache/no-cache split and stale-empty recheck policy.\n3. Scoring:\n - exact / starts-with / contains / subsequence-based fuzzy score;\n - separator/punctuation-normalized scoring path;\n - directory bonus and deterministic tie-break (`score desc`, then `path asc`).\n4. Symlink entries are excluded from fuzzy results.\n\n### Failure behavior\n\n- Invalid glob pattern returns an error from `glob_util::compile_glob`.\n- Search root must resolve to an existing directory for directory discovery flows.\n- Cancellation/timeouts propagate as abort errors via `CancelToken::heartbeat()` checks in loops.\n\n### Malformed glob handling\n\n`glob_util::build_glob_pattern` is tolerant:\n\n- normalizes `\\` to `/`,\n- auto-prefixes simple recursive patterns with `**/` when `recursive=true`,\n- auto-closes unbalanced `{...` alternation groups before compile.\n\n## 3) AST search/edit (`astGrep`, `astEdit`)\n\n`ast.rs` exposes syntax-aware code search and rewrite operations.\n\n- `astGrep(options)` returns matches with byte/line/column coordinates and optional metavariable bindings.\n- `astEdit(options)` returns replacement changes, per-file counts, searched/touched file counts, parse errors, and whether edits were applied.\n- `dryRun` defaults to true for edit options in the generated documentation.\n- Options include language override, path/glob/selector, strictness, limits, parse-error policy, `signal`, and `timeoutMs`.\n\nThese exports are direct native APIs used by tooling; they are not mediated by a TS wrapper in `packages/natives`.\n\n## 4) Shared scan/cache lifecycle (`fs_cache`)\n\n`fs_cache` stores scan results as normalized relative entries (`path`, `fileType`, optional `mtime`) keyed by:\n\n- canonical search root,\n- `include_hidden`,\n- `use_gitignore`.\n\n### Cache state transitions\n\n1. **Miss / disabled**\n - TTL is `0` or key absent/expired -> fresh collection.\n2. **Hit**\n - Entry age is within TTL -> return cached entries + `cache_age_ms`.\n3. **Stale-empty recheck**\n - If query yields zero matches and cache age exceeds the empty-result threshold, force one rescan.\n4. **Invalidation**\n - `invalidateFsScanCache(path?)`:\n - no arg: clear all keys;\n - path arg: remove keys for roots affected by that path.\n\n### Stale-result tradeoff\n\n- Cache favors low-latency repeated scans over immediate consistency.\n- TTL window can return stale positives/negatives.\n- Empty-result recheck reduces stale negatives for older cached scans at the cost of one extra scan.\n- Explicit invalidation is the intended correctness hook after file mutations.\n\n## 5) ANSI text utilities (`text`)\n\nThese are pure, in-memory utilities.\n\n### Boundaries and responsibilities\n\n- `text.rs` owns terminal-cell semantics:\n - ANSI sequence parsing,\n - grapheme-aware width and slicing,\n - wrap/truncate/sanitize behavior,\n - explicit tab-width parameter on width-sensitive APIs.\n- `grep.rs` line truncation (`maxColumns`) is separate:\n - simple character-boundary truncation of matched lines with `...`,\n - not ANSI-state-preserving and not terminal-cell width aware.\n\n### Key behaviors\n\n- `wrapTextWithAnsi`: wraps by visible width, carries active SGR codes across wrapped lines.\n- `truncateToWidth`: visible-cell truncation with ellipsis policy (`Unicode`, `Ascii`, `Omit`), optional right padding.\n- `sliceWithWidth`: column slicing with optional strict width enforcement.\n- `extractSegments`: extracts before/after segments around an overlay while restoring ANSI state for the `after` segment.\n- `sanitizeText`: strips ANSI escapes + control chars, drops lone surrogates, normalizes line endings.\n- `visibleWidth`: counts visible terminal cells using caller-supplied tab width.\n\n### Failure behavior\n\nText functions generally return deterministic transformed output; errors are limited to N-API argument/string conversion boundaries.\n\n## 6) Syntax highlighting (`highlight`)\n\n`highlight.rs` is pure transformation; it does not use the filesystem scan cache.\n\n### Flow\n\n1. Caller passes `code`, optional `lang`, and ANSI color palette.\n2. Rust resolves syntax by token/name lookup, extension lookup, alias table fallback, then plain-text fallback.\n3. Each line is parsed with syntect `ParseState` and scope stack.\n4. Scopes map to semantic color categories and ANSI color codes are injected/reset.\n\n### Failure behavior\n\n- Per-line parse failure does not fail the call: that line is appended unhighlighted and processing continues.\n- Unknown/unsupported language falls back to plain text syntax.\n\n## 7) Token counting (`tokens`)\n\n`countTokens(input, encoding?)` is an in-memory utility.\n\n- `input` may be a single string or an array of strings.\n- Arrays return one aggregate count and are encoded in parallel in Rust.\n- Default encoding is `O200kBase`; `Cl100kBase` is also available.\n- The implementation uses ordinary tokenization, not special-token handling.\n\n## Pure utility vs filesystem-dependent flows\n\n| Flow | Filesystem access | Shared cache | Notes |\n| ---------------------------- | ----------------- | -------------------- | --------------------------------------------- |\n| `search` / `hasMatch` | No | No | regex on provided bytes/string only |\n| `text` module functions | No | No | ANSI/width/sanitization only |\n| `highlight` module functions | No | No | syntax + ANSI coloring only |\n| `countTokens` | No | No | tokenization only |\n| `astGrep` / `astEdit` | Yes | No | syntax-aware file search/edit |\n| `glob` | Yes | Optional | directory scans + glob filtering |\n| `fuzzyFind` | Yes | Optional | directory scans + fuzzy scoring |\n| `grep` (file/dir path) | Yes | Optional in dir mode | ripgrep over files, optional filters/callback |\n\n## End-to-end lifecycle summary\n\n1. Caller invokes generated native export with typed options.\n2. Rust validates/normalizes options and builds matcher/search config.\n3. For filesystem flows, entries are scanned (cache hit/miss/rescan where applicable) then filtered/scored/searched.\n4. Worker loops periodically call cancel heartbeat; timeout/abort can terminate execution.\n5. Rust shapes outputs into N-API objects (`lineNumber`, `matchCount`, `limitReached`, etc.).\n6. Generated bindings return typed JS objects and optional per-match callbacks for `grep`/`glob`.\n",
34
- "non-compaction-retry-policy.md": "# Non-compaction auto-retry policy\n\nThis document describes the standard API-error retry path in `AgentSession`.\n\nIt explicitly excludes context-overflow recovery via auto-compaction. Overflow is handled by compaction logic and is documented separately in [`compaction.md`](../docs/compaction.md).\n\n## Implementation files\n\n- [`../src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts)\n- [`../src/config/settings-schema.ts`](../packages/coding-agent/src/config/settings-schema.ts)\n- [`../src/modes/controllers/event-controller.ts`](../packages/coding-agent/src/modes/controllers/event-controller.ts)\n- [`../src/modes/rpc/rpc-mode.ts`](../packages/coding-agent/src/modes/rpc/rpc-mode.ts)\n- [`../src/modes/rpc/rpc-client.ts`](../packages/coding-agent/src/modes/rpc/rpc-client.ts)\n- [`../src/modes/rpc/rpc-types.ts`](../packages/coding-agent/src/modes/rpc/rpc-types.ts)\n\n## Scope boundary vs compaction\n\nRetry and compaction are checked from the same `agent_end` path, but they are intentionally separated:\n\n1. `agent_end` inspects the last assistant message.\n2. `#isRetryableError(...)` runs first.\n3. If retry is initiated, compaction checks are skipped for that turn.\n4. Context-overflow errors are hard-excluded from retry classification (`isContextOverflow(...)` short-circuits retry).\n5. Overflow therefore falls through to `#checkCompaction(...)` instead of standard retry.\n\nSo: overload/rate/server/network-style failures use this retry policy; context-window overflow uses compaction recovery.\n\n## Retry classification\n\n`#isRetryableError(...)` requires all of the following:\n\n- assistant `stopReason === \"error\"`\n- `errorMessage` exists\n- message is **not** context overflow\n- `errorMessage` matches transient transport/envelope patterns or `isUsageLimitError(...)`\n\nCurrent retryable inputs are regex/string-classified:\n\n- transient transport/envelope failures, including Anthropic stream-envelope failures before `message_start`\n- overloaded/provider-returned-error wording\n- rate limit / usage limit / too many requests\n- HTTP-like server classes: 429, 500, 502, 503, 504\n- service unavailable / server/internal error\n- network/connection/socket failures, refused/closed connections, upstream connect/reset-before-headers, socket hang up, timeout/timed out, fetch failed, terminated, retry delay wording, and unexpected socket close messages\n\nThis is string-pattern classification, not typed provider error codes.\n\n## Retry lifecycle and state transitions\n\nSession state used by retry:\n\n- `#retryAttempt: number` (`0` means idle)\n- `#retryPromise: Promise<void> | undefined` (tracks in-progress retry lifecycle)\n- `#retryResolve: (() => void) | undefined` (resolves `#retryPromise`)\n- `#retryAbortController: AbortController | undefined` (cancels backoff sleep)\n\nFlow (`#handleRetryableError`):\n\n1. Read `retry` settings group.\n2. If `retry.enabled === false`, stop immediately (`false`, no retry started).\n3. Increment `#retryAttempt`.\n4. Create `#retryPromise` once (first attempt in a chain).\n5. If attempt exceeded `retry.maxRetries`, emit final failure event and stop.\n6. Compute base delay: `retry.baseDelayMs * 2^(attempt-1)`.\n7. For usage-limit errors, parse retry hints and call auth storage (`markUsageLimitReached(...)`); if credential switching succeeds, force delay to `0`, otherwise use a larger retry-after/backoff hint when present.\n8. If no credential switch occurred, suppress the current model selector for cooldown, try configured retry model fallback chains, and force delay to `0` on model switch.\n9. Emit `auto_retry_start`.\n10. Remove the trailing assistant error message from agent runtime state (kept in persisted session history).\n11. Sleep with abort support.\n12. Schedule `agent.continue()` through the post-prompt task scheduler (`delayMs: 1`) for the same prompt generation.\n\n### What resets retry counters\n\n`#retryAttempt` resets to `0` in these cases:\n\n- first successful non-error, non-aborted assistant message after retries started (emits `auto_retry_end { success: true }`)\n- retry cancellation during backoff sleep\n- max retries exceeded path\n\n`#retryPromise` resolves/clears when retry chain ends (success, cancellation, or max-exceeded), via `#resolveRetry()`.\n\n## Backoff and max-attempt semantics\n\nSettings:\n\n- `retry.enabled` (default `true`)\n- `retry.maxRetries` (default `3`)\n- `retry.baseDelayMs` (default `2000`)\n\nAttempt numbering:\n\n- attempt counter is incremented before max-check\n- start events use current attempt (1-based)\n- max-exceeded end event reports `attempt: this.#retryAttempt - 1` (last attempted retry count)\n\nBackoff sequence with default settings:\n\n- attempt 1: 2000 ms\n- attempt 2: 4000 ms\n- attempt 3: 8000 ms\n\nDelay override inputs can come from parsed retry headers (`retry-after-ms`, `retry-after`, `x-ratelimit-reset-ms`, `x-ratelimit-reset`) or usage-limit backoff. Credential/model fallback switches set delay to `0`; otherwise parsed hints can extend the exponential local delay.\n\n## Abort mechanics\n\n### Explicit retry abort\n\n`abortRetry()`:\n\n- aborts `#retryAbortController` (if present)\n- resolves retry promise (`#resolveRetry()`) so awaiters are unblocked\n\nIf abort hits while sleeping, catch path emits:\n\n- `auto_retry_end { success: false, finalError: \"Retry cancelled\" }`\n- resets attempt/controller\n\n### Global operation abort interaction\n\n`abort()` calls `abortRetry()` before aborting the active agent stream. This guarantees retry backoff is cancelled when user issues a general abort.\n\n### TUI interaction\n\nOn `auto_retry_start`, EventController:\n\n- swaps `Esc` handler to `session.abortRetry()`\n- renders loader text: `Retrying (attempt/maxAttempts) in Ns… (esc to cancel)`\n\nOn `auto_retry_end`, it restores prior `Esc` handler and clears loader state.\n\n## Streaming and prompt completion behavior\n\n`prompt()` ultimately waits on `#waitForRetry()` after `agent.prompt(...)` returns.\n\nEffect:\n\n- a prompt call does not fully resolve until any started retry chain finishes (success/failure/cancel)\n- retry lifecycle is part of one logical prompt execution boundary\n\nThis prevents callers from treating a retrying turn as complete too early.\n\n## Controls: settings and RPC\n\n### Configuration knobs\n\nDefined in settings schema under retry group:\n\n- `retry.enabled`\n- `retry.maxRetries`\n- `retry.baseDelayMs`\n- `retry.fallbackChains`\n- `retry.fallbackRevertPolicy` (`\"cooldown-expiry\"` by default; `\"never\"` disables automatic restoration)\n\nProgrammatic toggles in session:\n\n- `setAutoRetryEnabled(enabled)` writes `retry.enabled`\n- `autoRetryEnabled` reads `retry.enabled`\n- `isRetrying` reports whether retry lifecycle promise is active\n\n### RPC controls\n\nRPC command surface:\n\n- `set_auto_retry` → `session.setAutoRetryEnabled(command.enabled)`\n- `abort_retry` → `session.abortRetry()`\n\nClient helpers:\n\n- `RpcClient.setAutoRetry(enabled)`\n- `RpcClient.abortRetry()`\n\nBoth commands return success responses; retry progress/failure details come from streamed session events, not command response payloads.\n\n## Event emission and failure surfacing\n\nSession-level retry events:\n\n- `auto_retry_start { attempt, maxAttempts, delayMs, errorMessage }`\n- `auto_retry_end { success, attempt, finalError? }`\n- `retry_fallback_applied { from, to, role }`\n- `retry_fallback_succeeded { model, role }`\n\nPropagation:\n\n- emitted through `AgentSession.subscribe(...)`\n- forwarded to extension runner as extension events\n- in RPC mode, forwarded directly as JSON event objects (`session.subscribe(event => output(event))`)\n- in TUI, consumed by `EventController` for loader/error UI\n\nFinal failure surfacing:\n\n- On max-exceeded or cancellation, `auto_retry_end.success === false`\n- TUI shows: `Retry failed after N attempts: <finalError>`\n- Extensions/hooks receive `auto_retry_end` with same fields\n- RPC consumers receive same event object on stdout stream\n\n## Permanent stop conditions\n\nRetry stops and will not auto-continue when any of these occur:\n\n- `retry.enabled` is false\n- error is not retry-classified\n- error is context overflow (delegated to compaction path)\n- max retries exceeded\n- user cancels retry (`abort_retry` or `Esc` during retry loader)\n- global abort (`abort`) cancels retry first\n\nA new retry chain can still start later on a future retryable error after counters reset.\n\n## Operational caveats\n\n- Classification is regex text matching; provider-specific structured errors are not used here.\n- Retry strips the failing assistant error from **runtime context** before re-continue, but session history still keeps that error entry.\n- `RpcSessionState` currently exposes `autoCompactionEnabled` but not an `autoRetryEnabled` field; RPC callers must track their own toggle state or query settings through other APIs.\n- Model fallback changes append temporary `model_change` entries and may later restore the primary model when its cooldown expires, depending on `retry.fallbackRevertPolicy`.\n",
34
+ "non-compaction-retry-policy.md": "# Non-compaction auto-retry policy\n\nThis document describes the standard API-error retry path in `AgentSession`.\n\nIt explicitly excludes context-overflow recovery via auto-compaction. Overflow is handled by compaction logic and is documented separately in [`compaction.md`](../docs/compaction.md).\n\n## Implementation files\n\n- [`../src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts)\n- [`../src/config/settings-schema.ts`](../packages/coding-agent/src/config/settings-schema.ts)\n- [`../src/modes/controllers/event-controller.ts`](../packages/coding-agent/src/modes/controllers/event-controller.ts)\n- [`../src/modes/rpc/rpc-mode.ts`](../packages/coding-agent/src/modes/rpc/rpc-mode.ts)\n- [`../src/modes/rpc/rpc-client.ts`](../packages/coding-agent/src/modes/rpc/rpc-client.ts)\n- [`../src/modes/rpc/rpc-types.ts`](../packages/coding-agent/src/modes/rpc/rpc-types.ts)\n\n## Scope boundary vs compaction\n\nRetry and compaction are checked from the same `agent_end` path, but they are intentionally separated:\n\n1. `agent_end` inspects the last assistant message.\n2. `#isRetryableError(...)` runs first.\n3. If retry is initiated, compaction checks are skipped for that turn.\n4. Context-overflow errors are hard-excluded from retry classification (`isContextOverflow(...)` short-circuits retry).\n5. Overflow therefore falls through to `#checkCompaction(...)` instead of standard retry.\n\nSo: overload/rate/server/network-style failures use this retry policy; context-window overflow uses compaction recovery.\n\n## Retry classification\n\n`#isRetryableError(...)` requires all of the following:\n\n- assistant `stopReason === \"error\"`\n- `errorMessage` exists\n- message is **not** context overflow\n- `errorMessage` matches transient transport/envelope patterns or `isUsageLimitError(...)`\n\nCurrent retryable inputs are regex/string-classified:\n\n- transient transport/envelope failures, including Anthropic stream-envelope failures before `message_start`\n- overloaded/provider-returned-error wording\n- rate limit / usage limit / too many requests\n- HTTP-like server classes: 429, 500, 502, 503, 504\n- service unavailable / server/internal error\n- provider-suggested retry wording, including OpenAI `retry your request` failures\n- network/connection/socket failures, refused/closed connections, upstream connect/reset-before-headers, socket hang up, timeout/timed out, fetch failed, terminated, retry delay wording, and unexpected socket close messages\n\nThis is string-pattern classification, not typed provider error codes.\n\n## Retry lifecycle and state transitions\n\nSession state used by retry:\n\n- `#retryAttempt: number` (`0` means idle)\n- `#retryPromise: Promise<void> | undefined` (tracks in-progress retry lifecycle)\n- `#retryResolve: (() => void) | undefined` (resolves `#retryPromise`)\n- `#retryAbortController: AbortController | undefined` (cancels backoff sleep)\n\nFlow (`#handleRetryableError`):\n\n1. Read `retry` settings group.\n2. If `retry.enabled === false`, stop immediately (`false`, no retry started).\n3. Increment `#retryAttempt`.\n4. Create `#retryPromise` once (first attempt in a chain).\n5. If attempt exceeded `retry.maxRetries`, emit final failure event and stop.\n6. Compute base delay: `retry.baseDelayMs * 2^(attempt-1)`.\n7. For usage-limit errors, parse retry hints and call auth storage (`markUsageLimitReached(...)`); if credential switching succeeds, force delay to `0`, otherwise use a larger retry-after/backoff hint when present.\n8. If no credential switch occurred, suppress the current model selector for cooldown, try configured retry model fallback chains, and force delay to `0` on model switch.\n9. Emit `auto_retry_start`.\n10. Remove the trailing assistant error message from agent runtime state (kept in persisted session history).\n11. Sleep with abort support.\n12. Schedule `agent.continue()` through the post-prompt task scheduler (`delayMs: 1`) for the same prompt generation.\n\n### What resets retry counters\n\n`#retryAttempt` resets to `0` in these cases:\n\n- first successful non-error, non-aborted assistant message after retries started (emits `auto_retry_end { success: true }`)\n- retry cancellation during backoff sleep\n- max retries exceeded path\n\n`#retryPromise` resolves/clears when retry chain ends (success, cancellation, or max-exceeded), via `#resolveRetry()`.\n\n## Backoff and max-attempt semantics\n\nSettings:\n\n- `retry.enabled` (default `true`)\n- `retry.maxRetries` (default `3`)\n- `retry.baseDelayMs` (default `2000`)\n\nAttempt numbering:\n\n- attempt counter is incremented before max-check\n- start events use current attempt (1-based)\n- max-exceeded end event reports `attempt: this.#retryAttempt - 1` (last attempted retry count)\n\nBackoff sequence with default settings:\n\n- attempt 1: 2000 ms\n- attempt 2: 4000 ms\n- attempt 3: 8000 ms\n\nDelay override inputs can come from parsed retry headers (`retry-after-ms`, `retry-after`, `x-ratelimit-reset-ms`, `x-ratelimit-reset`) or usage-limit backoff. Credential/model fallback switches set delay to `0`; otherwise parsed hints can extend the exponential local delay.\n\n## Abort mechanics\n\n### Explicit retry abort\n\n`abortRetry()`:\n\n- aborts `#retryAbortController` (if present)\n- resolves retry promise (`#resolveRetry()`) so awaiters are unblocked\n\nIf abort hits while sleeping, catch path emits:\n\n- `auto_retry_end { success: false, finalError: \"Retry cancelled\" }`\n- resets attempt/controller\n\n### Global operation abort interaction\n\n`abort()` calls `abortRetry()` before aborting the active agent stream. This guarantees retry backoff is cancelled when user issues a general abort.\n\n### TUI interaction\n\nOn `auto_retry_start`, EventController:\n\n- swaps `Esc` handler to `session.abortRetry()`\n- renders loader text: `Retrying (attempt/maxAttempts) in Ns… (esc to cancel)`\n\nOn `auto_retry_end`, it restores prior `Esc` handler and clears loader state.\n\n## Streaming and prompt completion behavior\n\n`prompt()` ultimately waits on `#waitForRetry()` after `agent.prompt(...)` returns.\n\nEffect:\n\n- a prompt call does not fully resolve until any started retry chain finishes (success/failure/cancel)\n- retry lifecycle is part of one logical prompt execution boundary\n\nThis prevents callers from treating a retrying turn as complete too early.\n\n## Controls: settings and RPC\n\n### Configuration knobs\n\nDefined in settings schema under retry group:\n\n- `retry.enabled`\n- `retry.maxRetries`\n- `retry.baseDelayMs`\n- `retry.fallbackChains`\n- `retry.fallbackRevertPolicy` (`\"cooldown-expiry\"` by default; `\"never\"` disables automatic restoration)\n\nProgrammatic toggles in session:\n\n- `setAutoRetryEnabled(enabled)` writes `retry.enabled`\n- `autoRetryEnabled` reads `retry.enabled`\n- `isRetrying` reports whether retry lifecycle promise is active\n\n### RPC controls\n\nRPC command surface:\n\n- `set_auto_retry` → `session.setAutoRetryEnabled(command.enabled)`\n- `abort_retry` → `session.abortRetry()`\n\nClient helpers:\n\n- `RpcClient.setAutoRetry(enabled)`\n- `RpcClient.abortRetry()`\n\nBoth commands return success responses; retry progress/failure details come from streamed session events, not command response payloads.\n\n## Event emission and failure surfacing\n\nSession-level retry events:\n\n- `auto_retry_start { attempt, maxAttempts, delayMs, errorMessage }`\n- `auto_retry_end { success, attempt, finalError? }`\n- `retry_fallback_applied { from, to, role }`\n- `retry_fallback_succeeded { model, role }`\n\nPropagation:\n\n- emitted through `AgentSession.subscribe(...)`\n- forwarded to extension runner as extension events\n- in RPC mode, forwarded directly as JSON event objects (`session.subscribe(event => output(event))`)\n- in TUI, consumed by `EventController` for loader/error UI\n\nFinal failure surfacing:\n\n- On max-exceeded or cancellation, `auto_retry_end.success === false`\n- TUI shows: `Retry failed after N attempts: <finalError>`\n- Extensions/hooks receive `auto_retry_end` with same fields\n- RPC consumers receive same event object on stdout stream\n\n## Permanent stop conditions\n\nRetry stops and will not auto-continue when any of these occur:\n\n- `retry.enabled` is false\n- error is not retry-classified\n- error is context overflow (delegated to compaction path)\n- max retries exceeded\n- user cancels retry (`abort_retry` or `Esc` during retry loader)\n- global abort (`abort`) cancels retry first\n\nA new retry chain can still start later on a future retryable error after counters reset.\n\n## Operational caveats\n\n- Classification is regex text matching; provider-specific structured errors are not used here.\n- Retry strips the failing assistant error from **runtime context** before re-continue, but session history still keeps that error entry.\n- `RpcSessionState` currently exposes `autoCompactionEnabled` but not an `autoRetryEnabled` field; RPC callers must track their own toggle state or query settings through other APIs.\n- Model fallback changes append temporary `model_change` entries and may later restore the primary model when its cooldown expires, depending on `retry.fallbackRevertPolicy`.\n",
35
35
  "notebook-tool-runtime.md": "# Notebook tool runtime internals\n\nThis document describes the current `notebook` tool implementation and its relationship to the kernel-backed Python runtime.\n\nThe critical distinction: **`notebook` is a JSON notebook editor, not a notebook executor**. It edits `.ipynb` cell sources directly; it does not start or talk to a Python kernel.\n\n## Implementation files\n\n- [`src/tools/notebook.ts`](../packages/coding-agent/src/tools/notebook.ts)\n- [`src/eval/py/executor.ts`](../packages/coding-agent/src/eval/py/executor.ts)\n- [`src/eval/py/kernel.ts`](../packages/coding-agent/src/eval/py/kernel.ts)\n- [`src/session/streaming-output.ts`](../packages/coding-agent/src/session/streaming-output.ts)\n- [`src/tools/eval.ts`](../packages/coding-agent/src/tools/eval.ts)\n\n## 1) Runtime boundary: editing vs executing\n\n## `notebook` tool (`src/tools/notebook.ts`)\n\n- Supports `action: edit | insert | delete` on a `.ipynb` file.\n- Resolves path relative to session CWD (`resolveToCwd`).\n- Loads notebook JSON, validates `cells` array, validates `cell_index` bounds.\n- Applies source edits in-memory and writes full notebook JSON back with `JSON.stringify(notebook, null, 1)`.\n- Returns textual summary + structured `details` (`action`, `cellIndex`, `cellType`, `totalCells`, `cellSource`).\n\nNo kernel lifecycle exists in this tool:\n\n- no gateway acquisition\n- no kernel session ID\n- no `execute_request`\n- no stream chunks from kernel channels\n- no rich display capture (`image/png`, JSON display, status MIME)\n\n## Notebook-like execution path (`src/tools/eval.ts` + `src/eval/py/*`)\n\nWhen the agent needs to run cell-style Python code (sequential cells, persistent state, rich displays), that goes through the **`eval` tool** with `language: \"python\"`, not `notebook`.\n\nThat path is where kernel modes, restart/cancel behavior, chunk streaming, and output artifact truncation live.\n\n## 2) Notebook cell handling semantics (`notebook` tool)\n\n## Source normalization\n\n`content` is split into `source: string[]` with newline preservation:\n\n- each non-final line keeps trailing `\\n`\n- final line has no forced trailing newline\n\nThis mirrors notebook JSON conventions and avoids accidental line concatenation on later edits.\n\n## Action behavior\n\n- `edit`\n - replaces `cells[cell_index].source`\n - preserves existing `cell_type`\n- `insert`\n - inserts at `[0..cellCount]`\n - `cell_type` defaults to `code`\n - code cells initialize `execution_count: null` and `outputs: []`\n - markdown cells initialize only `metadata` + `source`\n- `delete`\n - removes `cells[cell_index]`\n - returns removed `source` in details for renderer preview\n\n## Error surfaces\n\nHard failures are thrown for:\n\n- missing notebook file\n- invalid JSON\n- missing/non-array `cells`\n- out-of-range index (insert and non-insert have different valid ranges)\n- missing `content` for `edit`/`insert`\n\nThese become `Error:` tool responses upstream; renderer uses notebook path + formatted error text.\n\n## 3) Kernel session semantics (where they actually exist)\n\nKernel semantics are implemented in `executePython` / `PythonKernel` and apply to the Python backend of the `eval` tool.\n\n## Modes\n\n`PythonKernelMode`:\n\n- `session` (default)\n - kernels cached in `kernelSessions` map\n - max 4 sessions; oldest evicted on overflow\n - idle/dead cleanup every 30s, timeout after 5 minutes\n - per-session queue serializes execution (`session.queue`)\n- `per-call`\n - creates kernel for request\n - executes\n - always shuts down kernel in `finally`\n\n## Reset behavior\n\n`eval` passes `reset` only for the first cell in a multi-cell Python call; later cells always run with `reset: false`.\n\n## Kernel death / restart / retry\n\nIn session mode (`withKernelSession`):\n\n- dead kernel detected by heartbeat (`kernel.isAlive()` check every 5s) or execute failure.\n- pre-run dead state triggers `restartKernelSession`.\n- execute-time crash path retries once: restart kernel, rerun handler.\n- `restartCount > 1` in same session throws `Python kernel restarted too many times in this session`.\n\nStartup retry behavior:\n\n- shared gateway kernel creation retries once on `SharedGatewayCreateError` with HTTP 5xx.\n\nResource exhaustion recovery:\n\n- detects `EMFILE`/`ENFILE`/\"Too many open files\" style failures\n- clears tracked sessions\n- calls `shutdownSharedGateway()`\n- retries kernel session creation once\n\n## 4) Environment/session variable injection\n\nKernel startup receives the optional session file path from executor:\n\n- `PI_SESSION_FILE` (session state file path)\n\n`PythonKernel.#initializeKernelEnvironment(...)` then runs init script inside kernel to:\n\n- `os.chdir(cwd)`\n- inject env entries into `os.environ`\n- prepend cwd to `sys.path` if missing\n\nImplication:\n\n- prelude helpers that read session context rely on this env var in Python process state.\n\n## 5) Streaming/chunk and display handling (kernel-backed path)\n\nThe kernel client processes Jupyter protocol messages per execution:\n\n- `stream` -> text chunk to `onChunk`\n- `execute_result` / `display_data` ->\n - display text chosen by MIME precedence: `text/markdown` > `text/plain` > converted `text/html`\n - structured outputs captured separately:\n - `application/json` -> `{ type: \"json\" }`\n - `image/png` -> `{ type: \"image\" }`\n - `application/x-omp-status` -> `{ type: \"status\" }` (no text emission)\n- `error` -> traceback text pushed to chunk stream + structured error metadata\n- `input_request` -> emits stdin warning text, sends empty `input_reply`, marks stdin requested\n- completion waits for both `execute_reply` and kernel `status=idle`\n\nCancellation/timeout:\n\n- abort signal triggers `interrupt()` (REST `/interrupt` + control-channel `interrupt_request`)\n- result marks `cancelled=true`\n- timeout path annotates output with `Command timed out after <n> seconds`\n\n## 6) Truncation and artifact behavior\n\n`OutputSink` in `src/session/streaming-output.ts` is used by kernel execution paths (`executeWithKernel`):\n\n- sanitizes every chunk (`sanitizeText`)\n- tracks total/output lines and bytes\n- optional artifact spill file (`artifactPath`, `artifactId`)\n- when in-memory buffer exceeds threshold (`DEFAULT_MAX_BYTES` unless overridden):\n - marks truncated\n - keeps tail bytes in memory (UTF-8 safe boundary)\n - can spill full stream to artifact sink\n\n`dump()` returns:\n\n- visible output text (possibly tail-truncated)\n- truncation flag + counts\n- artifact ID (for `artifact://<id>` references)\n\n`eval` converts this metadata into result truncation notices and TUI warnings.\n\n`notebook` tool does **not** use `OutputSink`; it has no stream/artifact truncation pipeline because it does not execute code.\n\n## 7) Renderer assumptions and formatting\n\n## Notebook renderer (`notebookToolRenderer`)\n\n- call view: status line with action + notebook path + cell/type metadata\n- result view:\n - success summary derived from `details`\n - `cellSource` rendered via `renderCodeCell`\n - markdown cells set language hint `markdown`; other cells have no explicit language override\n - collapsed code preview limit is `PREVIEW_LIMITS.COLLAPSED_LINES * 2`\n - supports expanded mode via shared render options\n - uses render cache keyed by width + expanded state\n\nError rendering assumption:\n\n- if first text content starts with `Error:`, renderer formats as notebook error block.\n\n## Python renderer (for actual execution output)\n\nKernel-backed execution rendering expects:\n\n- per-cell status transitions (`pending/running/complete/error`)\n- optional structured status event section\n- optional JSON output trees\n- truncation warnings + optional `artifact://<id>` pointer\n\nThis renderer behavior is unrelated to `notebook` JSON editing results except that both reuse shared TUI primitives.\n\n## 8) Divergence from eval Python backend behavior\n\nIf \"plain Python execution\" means the `eval` tool with `language: \"python\"`:\n\n- `eval` executes code in a kernel, persists state by mode, streams chunks, captures rich displays, handles interrupts/timeouts, and supports output truncation/artifacts.\n- `notebook` performs deterministic notebook JSON mutations only; no execution, no kernel state, no chunk stream, no display outputs, no artifact pipeline.\n\nIf a workflow needs both:\n\n1. edit notebook source with `notebook`\n2. execute code cells via `eval` with `language: \"python\"` (manually passing code), not through `notebook`\n\nCurrent implementation does not provide a single tool that both mutates `.ipynb` and executes notebook cells through kernel context.\n",
36
36
  "plugin-manager-installer-plumbing.md": "# Plugin manager and installer plumbing\n\nThis document describes how `omp plugin` operations mutate plugin state on disk and how installed plugins become runtime capabilities (tools and extensions today, hooks/commands path resolution available).\n\n## Scope and architecture\n\nThere are two plugin-management implementations in the codebase:\n\n1. **Active path used by CLI commands**: `PluginManager` (`src/extensibility/plugins/manager.ts`)\n2. **Legacy helper module**: installer functions (`src/extensibility/plugins/installer.ts`)\n\n`omp plugin ...` command execution goes through `PluginManager`.\n\n`installer.ts` still documents important safety checks and filesystem behavior, but it is not the path used by `src/commands/plugin.ts` + `src/cli/plugin-cli.ts`.\n\n## Lifecycle: from CLI invocation to runtime availability\n\n```text\nomp plugin <action> ...\n -> src/commands/plugin.ts\n -> runPluginCommand(...) in src/cli/plugin-cli.ts\n -> PluginManager method (install/list/uninstall/link/...)\n -> mutate ~/.omp/plugins/{package.json,node_modules,omp-plugins.lock.json}\n -> runtime discovery: discoverAndLoadCustomTools(...) and discoverAndLoadExtensions(...)\n -> getAllPluginToolPaths(cwd) / getAllPluginExtensionPaths(cwd)\n -> custom tool loader imports tool modules; extension loader imports extension modules\n```\n\n### Command entrypoints\n\n- `src/commands/plugin.ts` defines command/flags and forwards to `runPluginCommand`.\n- `src/cli/plugin-cli.ts` maps subcommands to `PluginManager` methods:\n - `install`, `uninstall`, `list`, `link`, `doctor`, `features`, `config`, `enable`, `disable`\n- No explicit `update` action exists; update is done by re-running `install` with a new package/version spec.\n\n## On-disk model\n\nGlobal plugin state lives under `~/.omp/plugins`:\n\n- `package.json` — dependency manifest used by `bun install`/`bun uninstall`\n- `node_modules/` — installed plugin packages or symlinks\n- `omp-plugins.lock.json` — runtime state:\n - enabled/disabled per plugin\n - selected feature set per plugin\n - persisted plugin settings\n\nProject-local overrides live at:\n\n- `<cwd>/.omp/plugin-overrides.json`\n\nOverrides are read-only from manager/loader perspective (no write path here) and can disable plugins or override features/settings for this project.\n\n## Plugin spec parsing and metadata interpretation\n\n## Install spec grammar\n\n`parsePluginSpec` (`parser.ts`) supports:\n\n- `pkg` -> `features: null` (defaults behavior)\n- `pkg[*]` -> enable all manifest features\n- `pkg[]` -> enable no optional features\n- `pkg[a,b]` -> enable named features\n- `@scope/pkg@1.2.3[feat]` -> scoped + versioned package with explicit feature selection\n\n`extractPackageName` strips version suffix for on-disk path lookup after install.\n\n## Manifest source and required fields\n\nManifest is resolved as:\n\n1. `package.json.omp`\n2. fallback `package.json.pi`\n3. fallback `{ version: package.version }`\n\nImplications:\n\n- There is no strict schema validation in manager/loader.\n- A package missing `omp`/`pi` is still installable and listable.\n- Runtime plugin loading (`getEnabledPlugins`) skips packages without `omp`/`pi` manifest.\n- `manifest.version` is always overwritten from package `version`.\n\nMalformed `package.json` JSON is a hard failure at read time; malformed manifest shape may fail later only when specific fields are consumed.\n\n## Install/update flow (`PluginManager.install`)\n\n1. Parse feature bracket syntax from install spec.\n2. Validate package name against regex + shell-metacharacter denylist.\n3. Ensure plugin `package.json` exists (`omp-plugins`, private dependencies map).\n4. Run `bun install <packageSpec>` in `~/.omp/plugins`.\n5. Read installed package `node_modules/<name>/package.json`.\n6. Resolve manifest and compute `enabledFeatures`:\n - `[*]`: all declared features (or `null` if no feature map)\n - `[a,b]`: validates each feature exists in manifest features map\n - `[]`: empty feature list\n - bare spec: `null` (use defaults policy later in loader)\n7. Upsert lockfile runtime state: `{ version, enabledFeatures, enabled: true }`.\n\n### Update semantics\n\nBecause update is install-driven:\n\n- `omp plugin install pkg@newVersion` updates dependency and lockfile version.\n- Existing settings are preserved; state entry is overwritten for version/features/enabled.\n- No separate “check updates” or transactional migration logic exists.\n\n## Remove flow (`PluginManager.uninstall`)\n\n1. Validate package name.\n2. Run `bun uninstall <name>` in plugin dir.\n3. Remove plugin runtime state from lockfile:\n - `config.plugins[name]`\n - `config.settings[name]`\n\nIf uninstall command fails, runtime state is not changed.\n\n## List flow (`PluginManager.list`)\n\n1. Read plugin dependency map from `~/.omp/plugins/package.json`.\n2. Load lockfile runtime config (missing file -> empty defaults).\n3. Load project overrides (`<cwd>/.omp/plugin-overrides.json`, parse/read errors -> empty object with warning).\n4. For each dependency with a resolvable package.json:\n - build `InstalledPlugin` record\n - merge feature/enable state:\n - base from lockfile (or defaults)\n - project overrides can replace feature selection\n - project `disabled` list masks plugin as disabled\n\nThis is the effective state used by CLI status output and settings/features operations.\n\n## Link flow (`PluginManager.link`)\n\n`link` supports local plugin development by symlinking a local package into `~/.omp/plugins/node_modules/<pkg.name>`.\n\nBehavior:\n\n1. Resolve `localPath` against manager cwd.\n2. Require local `package.json` and `name` field.\n3. Ensure plugin dirs exist.\n4. For scoped names, create scope directory.\n5. Remove existing path at target link location.\n6. Create symlink.\n7. Add runtime lockfile entry enabled with default features (`null`).\n\nCaveat: current `PluginManager.link` does not enforce the `cwd` path-boundary check present in legacy `installer.ts` (`normalizedPath.startsWith(normalizedCwd)`), so trust is the caller’s responsibility.\n\n## Runtime loading: from installed plugin to callable capabilities\n\n## Discovery gate\n\n`getEnabledPlugins(cwd)` (`plugins/loader.ts`) reads:\n\n- plugin dependency manifest (`package.json`)\n- lockfile runtime state\n- project overrides via `getConfigDirPaths(\"plugin-overrides.json\", { user: false, cwd })`\n\nFiltering:\n\n- skip if no plugin package.json\n- skip if manifest (`omp`/`pi`) absent\n- skip if globally disabled in lockfile\n- skip if project-disabled\n\n## Capability path resolution\n\nFor each enabled plugin:\n\n- `resolvePluginExtensionPaths(plugin)`\n- `resolvePluginToolPaths(plugin)`\n- `resolvePluginHookPaths(plugin)`\n- `resolvePluginCommandPaths(plugin)`\n\nEach resolver includes base entries plus feature entries:\n\n- explicit feature list -> only selected features\n- `enabledFeatures === null` -> enable features marked `default: true`\n\nMissing files are silently skipped (`existsSync` guard).\n\n## Current runtime wiring differences\n\n- **Tools are wired into runtime today** via `discoverAndLoadCustomTools` (`custom-tools/loader.ts`), which calls `getAllPluginToolPaths(cwd)`.\n- **Extensions are wired into runtime today** via `discoverAndLoadExtensions` (`extensions/loader.ts`), which calls `getAllPluginExtensionPaths(cwd)`.\n- Paths are de-duplicated by resolved absolute path in custom tool and extension discovery (`seen` set, first path wins).\n- **Hooks/commands resolvers exist** and are exported, but this code path does not currently wire them into a runtime registry in the same way tools and extensions are wired.\n\n## Lock/state management details\n\n`PluginManager` caches runtime config in memory per instance (`#runtimeConfig`) and lazily loads once.\n\nLoad behavior:\n\n- lockfile missing -> `{ plugins: {}, settings: {} }`\n- lockfile read/parse failure -> warning + same empty defaults\n\nSave behavior:\n\n- writes full lockfile JSON pretty-printed each mutation\n\nNo cross-process locking or merge strategy exists; concurrent writers can overwrite each other.\n\n## Safety checks and trust boundaries\n\n## Input/package validation\n\nActive manager path enforces package-name validation:\n\n- regex for scoped/unscoped package specs (optionally with version)\n- explicit shell metacharacter denylist (`[;&|`$(){}[]<>\\\\]`)\n\nThis limits command-injection risk when invoking `bun install/uninstall`.\n\n## Filesystem trust boundary\n\n- Plugin code executes in-process when custom tool modules are imported; no sandboxing.\n- Manifest relative paths are joined against plugin package directory and only existence-checked.\n- The plugin package itself is trusted code once installed.\n\n## Legacy installer-only checks\n\n`installer.ts` includes additional link-time checks not mirrored in `PluginManager.link`:\n\n- local path must resolve inside project cwd\n- extra package name/path traversal guards for symlink target naming\n\nBecause CLI uses `PluginManager`, these stricter link guards are not currently on the main path.\n\n## Failure, partial success, and rollback behavior\n\nThe plugin manager is not transactional.\n\n| Operation stage | Failure behavior | Rollback |\n| -------------------------------------------------------- | -------------------------- | ----------------------------------------------------------------------------- |\n| `bun install` fails | install aborts with stderr | N/A (no state writes yet) |\n| Install succeeds, then manifest/feature validation fails | command fails | No uninstall rollback; dependency may remain in `node_modules`/`package.json` |\n| Install succeeds, then lockfile write fails | command fails | No rollback of installed package |\n| `bun uninstall` succeeds, lockfile write fails | command fails | Package removed, stale runtime state may remain |\n| `link` removes old target then symlink creation fails | command fails | No restoration of previous link/dir |\n\nOperationally, `doctor --fix` can repair some drift (`bun install`, orphaned config cleanup, invalid-feature cleanup), but it is best-effort.\n\n## Malformed/missing manifest behavior summary\n\n- Missing `omp`/`pi` field:\n - install/list: tolerated (minimal manifest)\n - runtime enabled-plugin discovery: skipped as non-plugin\n- Missing feature referenced by install spec or `features --set/--enable`: hard error with available feature list\n- Invalid `plugin-overrides.json`: ignored with fallback to `{}` in both manager and loader paths\n- Missing tool/hook/command file paths referenced by manifest: silently ignored during resolver expansion; flagged as errors only by `doctor`\n\n## Mode differences and precedence\n\n- `--dry-run` (install): returns synthetic install result, no filesystem/network/state writes.\n- `--json`: output formatting only, no behavior change.\n- Project overrides always take precedence over global lockfile for feature/settings view.\n- Effective enablement is `runtimeEnabled && !projectDisabled`.\n\n## Implementation files\n\n- [`src/commands/plugin.ts`](../packages/coding-agent/src/commands/plugin.ts) — CLI command declaration and flag mapping\n- [`src/cli/plugin-cli.ts`](../packages/coding-agent/src/cli/plugin-cli.ts) — action dispatch, user-facing command handlers\n- [`src/extensibility/plugins/manager.ts`](../packages/coding-agent/src/extensibility/plugins/manager.ts) — active install/remove/list/link/state/doctor implementation\n- [`src/extensibility/plugins/installer.ts`](../packages/coding-agent/src/extensibility/plugins/installer.ts) — legacy installer helpers and additional link safety checks\n- [`src/extensibility/plugins/loader.ts`](../packages/coding-agent/src/extensibility/plugins/loader.ts) — enabled-plugin discovery and tool/hook/command path resolution\n- [`src/extensibility/plugins/parser.ts`](../packages/coding-agent/src/extensibility/plugins/parser.ts) — install spec and package-name parsing helpers\n- [`src/extensibility/plugins/types.ts`](../packages/coding-agent/src/extensibility/plugins/types.ts) — manifest/runtime/override type contracts\n- [`src/extensibility/custom-tools/loader.ts`](../packages/coding-agent/src/extensibility/custom-tools/loader.ts) — runtime wiring for plugin-provided tool modules\n- [`src/extensibility/extensions/loader.ts`](../packages/coding-agent/src/extensibility/extensions/loader.ts) — runtime wiring for plugin-provided extension modules\n",
37
37
  "porting-from-pi-mono.md": "# Porting From pi-mono: A Practical Merge Guide\n\nThis guide is a repeatable checklist for porting changes from pi-mono into this repo.\nUse it for any merge: single file, feature branch, or full release sync.\n\n## Last Sync Point (historical upstream marker)\n\n**Commit:** `b21b42d032919de2f2e6920a76fa9a37c3920c0a`\n**Date:** 2026-03-22\n\nUpdate this section after each sync; do not reuse the previous range. This commit is an upstream pi-mono marker and may not exist in this repo's local object database.\n\nWhen starting a new sync, generate patches from this commit forward in a pi-mono checkout or remote that contains the commit:\n\n```bash\ngit format-patch b21b42d032919de2f2e6920a76fa9a37c3920c0a..HEAD --stdout > changes.patch\n```\n\n## 0) Define the scope\n\n- Identify the upstream reference (commit, tag, or PR).\n- List the packages or folders you plan to touch.\n- Decide which features are in-scope and which are intentionally skipped.\n\n## 1) Bring code over safely\n\n- Prefer a clean, focused diff rather than a wholesale copy.\n- Avoid copying built artifacts or generated files.\n- If upstream added new files, add them explicitly and review contents.\n\n## 2) Match import extension conventions\n\nMost runtime TypeScript sources omit `.js` in internal imports, but several current entrypoints and tool modules keep `.js` for ESM/runtime compatibility. Follow the surrounding file and package export style; do not blanket-strip or blanket-add extensions.\n\n- In `packages/coding-agent` runtime sources, prefer extensionless internal imports when the surrounding module does, but preserve existing `.js` imports in files that already require them.\n- In `packages/tui/test` and `packages/natives/bench`, keep `.js` where surrounding files already use it.\n- Keep real file extensions when required by tooling or import assertions (e.g., `.json`, `.css`, `.md` text embeds).\n- Example: `import { x } from \"./foo.js\";` → `import { x } from \"./foo\";` only when that package/file convention is extensionless.\n\n## 3) Replace import scopes\n\nUpstream uses different package scopes. Replace them consistently.\n\n- Replace old scopes with the local scope used here.\n- Examples (adjust to match the actual packages you are porting):\n - `@mariozechner/pi-coding-agent` → `@oh-my-pi/pi-coding-agent`\n - `@mariozechner/pi-agent-core` → `@oh-my-pi/pi-agent-core`\n - `@mariozechner/pi-tui` → `@oh-my-pi/pi-tui`\n - `@mariozechner/pi-ai` → `@oh-my-pi/pi-ai`\n\n## 4) Use Bun APIs where they improve on Node\n\nWe run on Bun, but the current source intentionally mixes Bun APIs with small Node standard-library APIs. Replace Node APIs only when Bun provides a clearer, safer, or simpler implementation; do not mechanically rewrite every Node import.\n\n**Prefer replacing when porting new code:**\n\n- Process spawning: prefer Bun Shell `$` for simple commands; use `Bun.spawn`/`Bun.spawnSync` for streaming or process control. Keep existing `child_process` only where its exact semantics are needed.\n- HTTP clients: `node-fetch`, `axios` → native `fetch`\n- SQLite: `better-sqlite3` → `bun:sqlite`\n- Env loading: `dotenv` → Bun loads `.env` automatically\n- Runtime text/assets: prefer Bun imports such as `with { type: \"text\" }` or `Bun.file()` over copy steps or bundled fallback file reads.\n\n**DO NOT replace (these work fine in Bun):**\n\n- `os.homedir()` — do NOT replace with `Bun.env.HOME` or literal `\"~\"`\n- `os.tmpdir()` — do NOT replace with `Bun.env.TMPDIR || \"/tmp\"` or hardcoded paths\n- `fs.mkdtempSync()` — do NOT replace with manual path construction\n- `path.join()`, `path.resolve()`, etc. — these are fine\n\n**Import style:** Use the `node:` prefix for Node standard-library imports. Namespace imports are common, but named imports are acceptable where the surrounding code already uses them.\n\n**Additional Bun conventions:**\n\n- Prefer Bun Shell `$` for short, non-streaming commands; use `Bun.spawn` only when you need streaming I/O or process control.\n- Use `Bun.file()`/`Bun.write()` for simple files and `node:fs/promises` for directory-oriented operations. Existing synchronous `node:fs` calls are acceptable when the calling flow is intentionally synchronous.\n- Avoid `Bun.file().exists()` checks; use `isEnoent` handling in try/catch.\n- Prefer `Bun.sleep(ms)` over `setTimeout` wrappers.\n\n**Wrong:**\n\n```typescript\n// BROKEN: env vars may be undefined, \"~\" is not expanded\nconst home = Bun.env.HOME || \"~\";\nconst tmp = Bun.env.TMPDIR || \"/tmp\";\n```\n\n**Correct:**\n\n```typescript\nimport * as os from \"node:os\";\nimport * as fs from \"node:fs\";\nimport * as path from \"node:path\";\n\nconst configDir = path.join(os.homedir(), \".config\", \"myapp\");\nconst tempDir = fs.mkdtempSync(path.join(os.tmpdir(), \"myapp-\"));\n```\n\n## 5) Prefer Bun embeds (no copying)\n\nDo not add new runtime asset copy steps. Keep assets in repo and prefer Bun embeds/imports; preserve existing explicit generation workflows such as `packages/coding-agent/src/export/html/template.generated.ts`.\n\n- If upstream copies assets into a dist folder, replace with Bun-friendly embeds.\n- Prompts are static `.md` files; use Bun text imports (`with { type: \"text\" }`) and Handlebars instead of inline prompt strings.\n- Use `import.meta.dir` + `Bun.file` to load adjacent non-text resources.\n- Keep assets in-repo and let the bundler include them.\n- Eliminate copy scripts unless the user explicitly requests them or the package already has an intentional generation step.\n- If upstream reads a bundled fallback file at runtime, replace filesystem reads with a Bun text embed import unless the current package already uses a generated asset pipeline.\n - Example (Codex instructions fallback):\n - `const FALLBACK_PROMPT_PATH = join(import.meta.dir, \"codex-instructions.md\");` -> removed\n - `import FALLBACK_INSTRUCTIONS from \"./codex-instructions.md\" with { type: \"text\" };`\n - Use `return FALLBACK_INSTRUCTIONS;` instead of `readFileSync(FALLBACK_PROMPT_PATH, \"utf8\")`\n\n## 6) Port `package.json` carefully\n\nTreat `package.json` as a contract. Merge intentionally.\n\n- Keep existing `name`, `version`, `type`, `exports`, and `bin` unless the port requires changes.\n- Replace npm/node scripts with Bun equivalents (e.g., `bun check`, `bun test`).\n- Ensure dependencies use the correct scope.\n- Do not downgrade dependencies to fix type errors; upgrade instead.\n- Validate workspace package links and `peerDependencies`.\n\n## 7) Align code style and tooling\n\n- Keep existing formatting conventions.\n- Do not introduce `any` unless required.\n- Avoid dynamic imports unless they are required for optional dependencies, startup cost, or runtime-only modules; prefer top-level imports otherwise.\n- Never build prompts in code; prompts are static `.md` files rendered with Handlebars.\n- In `packages/coding-agent`, use `logger` from `@oh-my-pi/pi-utils` for internal/runtime logging; CLI command files may use `console.*` for intentional user-facing output.\n- Use `Promise.withResolvers()` instead of `new Promise((resolve, reject) => ...)`.\n- Prefer ES `#` private fields for new encapsulated state. Constructor parameter properties already exist in current code and are acceptable; do not churn unrelated access modifiers while porting.\n- Prefer existing helpers and utilities over new ad-hoc code.\n Preserve Bun-first infrastructure changes already made in this repo:\n - Runtime is Bun (no Node entry points for the main CLI).\n - Package manager is Bun (no npm lockfiles).\n - Heavy Node APIs should not be introduced casually; current source still uses selected Node APIs (`node:crypto`, `node:readline`, synchronous `node:fs`, and `child_process`) where they fit provider, CLI, or process-control semantics.\n - Lightweight Node APIs (`os.homedir`, `os.tmpdir`, `fs.mkdtempSync`, `path.*`) are kept.\n - CLI shebangs use `bun` (not `node`, not `tsx`).\n - TypeScript packages generally use source files directly; `@oh-my-pi/pi-natives` exports generated native bindings from `packages/natives/native`.\n - CI workflows run Bun for install/check/test.\n\n## 8) Remove old compatibility layers\n\nUnless requested, remove upstream compatibility shims.\n\n- Delete old APIs that were replaced.\n- Update all call sites to the new API directly.\n- Do not keep `*_v2` or parallel versions.\n\n## 9) Update docs and references\n\n- Replace pi-mono repo links where appropriate.\n- Update examples to use Bun and correct package scopes.\n- Ensure README instructions still match the current repo behavior.\n\n## 10) Validate the port\n\nRun the standard checks after changes:\n\n- `bun check`\n\nIf the repo already has failing checks unrelated to your changes, call that out.\nTests use Bun's runner (not Vitest), but only run `bun test` when explicitly requested.\n\n## 11) Protect improved features (regression trap list)\n\nIf you already improved behavior locally, treat those as **non‑negotiable**. Before porting, write down\nthe improvements and add explicit checks so they don’t get lost in the merge.\n\n- **Freeze the expected behavior**: add a short “before/after” note for each improvement (inputs, outputs,\n defaults, edge cases). This prevents silent rollback.\n- **Map old → new APIs**: if upstream renamed concepts (hooks → extensions, custom tools → tools, etc.),\n ensure every old entry point still wires through. One missed flag or export equals lost functionality.\n- **Verify exports**: check `package.json` `exports`, public types, and barrel files. Upstream ports often\n forget to re-export local additions.\n- **Cover non‑happy paths**: if you fixed error handling, timeouts, or fallback logic, add a test or at\n least a manual checklist that exercises those paths.\n- **Check defaults and config merge order**: improvements often live in defaults. Confirm new defaults\n didn’t revert (e.g., new config precedence, disabled features, tool lists).\n- **Audit env/shell behavior**: if you fixed execution or sandboxing, verify the new path still uses your\n sanitized env and does not reintroduce alias/function overrides.\n- **Re-run targeted samples**: keep a minimal set of \"known good\" examples and run them after the port\n (CLI flags, extension registration, tool execution).\n\n## 12) Detect and handle reworked code\n\nBefore porting a file, check if upstream significantly refactored it:\n\n```bash\n# Compare the file you're about to port against what you have locally\ngit diff HEAD upstream/main -- path/to/file.ts\n```\n\nIf the diff shows the file was **reworked** (not just patched):\n\n- New abstractions, renamed concepts, merged modules, changed data flow\n\nThen you must **read the new implementation thoroughly** before porting. Blind merging of reworked code loses functionality because:\n\nNote: interactive mode was recently split into controllers/utils/types. When backporting related changes, port updates into the individual files we created and ensure `interactive-mode.ts` wiring stays in sync.\n\n1. **Defaults change silently** - A new variable `defaultFoo = [a, b]` may replace an old `getAllFoo()` that returned `[a, b, c, d, e]`.\n\n2. **API options get dropped** - When systems merge (e.g., `hooks` + `customTools` → `extensions`), old options may not wire through to the new implementation.\n\n3. **Code paths go stale** - A renamed concept (e.g., `hookMessage` → `custom`) needs updates in every switch statement, type guard, and handler—not just the definition.\n\n4. **Context/capabilities shrink** - Old APIs may have exposed `{ logger, typebox, pi }` that new APIs forgot to include.\n\n### Semantic porting process\n\nWhen upstream reworked a module:\n\n1. **Read the old implementation** - Understand what it did, what options it accepted, what it exposed.\n\n2. **Read the new implementation** - Understand the new abstractions and how they map to old behavior.\n\n3. **Verify feature parity** - For each capability in the old code, confirm the new code preserves it or explicitly removes it.\n\n4. **Grep for stragglers** - Search for old names/concepts that may have been missed in switch statements, handlers, UI components.\n\n5. **Test the boundaries** - CLI flags, SDK options, event handlers, default values—these are where regressions hide.\n\n### Quick checks\n\n```bash\n# Find all uses of an old concept that may need updating\nrg \"oldConceptName\" --type ts\n\n# Compare default values between versions\ngit show upstream/main:path/to/file.ts | rg \"default|DEFAULT\"\n\n# Check if all enum/union values have handlers\nrg \"case \\\"\" path/to/file.ts\n```\n\n## 13) Quick audit checklist\n\nUse this as a final pass before you finish:\n\n- [ ] Import extensions follow the local package convention (no blanket `.js` stripping)\n- [ ] No newly introduced Node-only APIs unless they match an existing justified pattern\n- [ ] All package scopes updated\n- [ ] `package.json` scripts use Bun\n- [ ] Prompts are `.md` text imports (no inline prompt strings)\n- [ ] No internal/runtime `console.*` in coding-agent; CLI user-facing output is intentional\n- [ ] Assets load via Bun embed/import patterns, or through an existing intentional generation pipeline\n- [ ] Tests or checks run (or explicitly noted as blocked)\n- [ ] No functionality regressions (see sections 11-12)\n\n## 14) Commit message format\n\nWhen committing a backport, follow the repo format `<type>(scope): <past-tense description>` and keep the commit\nrange in the title.\n\n```\nfix(coding-agent): backported pi-mono changes (<from>..<to>)\n\npackages/<package>:\n- <type>: <description>\n- <type>: <description> (#<issue> by @<contributor>)\n\npackages/<other-package>:\n- <type>: <description>\n```\n\n**Example:**\n\n```\nfix(coding-agent): backported pi-mono changes (9f3eef65f..52532c7c0)\n\npackages/ai:\n- fix: handle \"sensitive\" stop reason from Anthropic API\n- fix: normalize tool call IDs with special characters for Responses API\n- fix: add overflow detection for Bedrock, MiniMax, Kimi providers\n- fix: 429 status is rate limiting, not context overflow\n\npackages/tui:\n- fix: refactored autocomplete state tracking\n- fix: file autocomplete should not trigger on empty text\n- fix: configurable autocomplete max visible items\n- fix: improved table column width calculation with word-aware wrapping\n\npackages/coding-agent:\n- fix: preserve external config.yml edits on save (#1046 by @nicobailonMD)\n- fix: resolve macOS NFD and curly quote variants in file paths\n```\n\n**Rules:**\n\n- Group changes by package\n- Use conventional commit types (`fix`, `feat`, `refactor`, `perf`, `docs`)\n- Include upstream issue/PR numbers and contributor attribution for external contributions\n- The commit range in the title helps track sync points\n\n## 15) Intentional Divergences\n\nOur fork has architectural decisions that differ from upstream. **Do not port these upstream patterns:**\n\n### UI Architecture\n\n| Upstream | Our Fork | Reason |\n| ------------------------------------------- | --------------------------------------------------------- | --------------------------------------------------------------------- |\n| `FooterDataProvider` class | `StatusLineComponent` | Simpler, integrated status line |\n| `ctx.ui.setHeader()` / `ctx.ui.setFooter()` | No-op stubs in current extension contexts | Not currently wired to replace the TUI status/header UI |\n| `ctx.ui.setEditorComponent()` | No-op stubs in current extension contexts | Custom editor replacement is not currently wired |\n| `InteractiveModeOptions` options object | Positional constructor args (options type still exported) | Keep constructor signature; update the type when upstream adds fields |\n\n### Component Naming\n\n| Upstream | Our Fork |\n| ---------------------------- | ----------------------- |\n| `extension-input.ts` | `hook-input.ts` |\n| `extension-selector.ts` | `hook-selector.ts` |\n| `ExtensionInputComponent` | `HookInputComponent` |\n| `ExtensionSelectorComponent` | `HookSelectorComponent` |\n\n### API Naming\n\n| Upstream | Our Fork | Notes |\n| ---------------------------------------- | ---------------------------------------- | ----------------------------------------- |\n| `sessionManager.appendSessionInfo(name)` | `sessionManager.setSessionName(name)` | We use `sessionName` throughout |\n| `sessionManager.getSessionName()` | `sessionManager.getSessionName()` | Same (we unified to match upstream's RPC) |\n| `agent.sessionName` / `setSessionName()` | `agent.sessionName` / `setSessionName()` | Same |\n\n### File Consolidation\n\n| Upstream | Our Fork | Reason |\n| -------------------------------------------------- | --------------------------------------------------------- | --------------------------------------------- |\n| `clipboard.ts` + `clipboard-image.ts` (tool files) | `src/utils/clipboard.ts` backed by `@oh-my-pi/pi-natives` | Native implementation with a small TS wrapper |\n\n### Test Framework\n\n| Upstream | Our Fork |\n| ------------------------- | ----------------------------- |\n| `vitest` with `vi.mock()` | `bun:test` with `vi` from bun |\n| `node:test` assertions | `expect()` matchers |\n\n### Tool Architecture\n\n| Upstream | Our Fork | Notes |\n| ----------------------------------- | ------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------- |\n| `createTool(cwd: string, options?)` | `createTools(session: ToolSession)` via `BUILTIN_TOOLS` registry | Tool factories accept `ToolSession` and can return `null` |\n| Per-tool `*Operations` interfaces | Only current per-tool override interfaces remain (for example `FindOperations`) | Used for SSH/remote overrides where present |\n| Node.js `fs/promises` everywhere | Bun file APIs for simple file writes/reads, `node:fs/promises` for dirs, selected sync `node:fs` where needed | Prefer Bun APIs when they simplify |\n\n### Auth Storage\n\n| Upstream | Our Fork | Notes |\n| ------------------------------- | ------------------------------------------- | -------------------------------------------- |\n| `proper-lockfile` + `auth.json` | `agent.db` (bun:sqlite) | Credentials stored exclusively in `agent.db` |\n| Single credential per provider | Multi-credential with round-robin selection | Session affinity and backoff logic preserved |\n\n### Extensions\n\n| Upstream | Our Fork |\n| ----------------------------- | ------------------------------------------------- |\n| `jiti` for TypeScript loading | Native Bun `import()` |\n| `pkg.pi` manifest field | `pkg.omp` preferred; fallback to `pkg.pi` remains |\n\n### Skip These Upstream Features\n\nWhen porting, **skip** these files/features entirely:\n\n- `footer-data-provider.ts` — we use StatusLineComponent\n- `clipboard-image.ts` — image clipboard support is exposed through `src/utils/clipboard.ts` backed by `@oh-my-pi/pi-natives`\n- GitHub workflow files — we have our own CI\n- `models.generated.ts` — auto-generated, regenerate locally (as models.json instead)\n\n### Features We Added (Preserve These)\n\nThese exist in our fork but not upstream. **Never overwrite:**\n\n- `StatusLineComponent` in interactive mode\n- Multi-credential auth with session affinity\n- Capability-based discovery system (`defineCapability`, `registerProvider`, `loadCapability`, `skillCapability`, etc.)\n- MCP/Exa/SSH integrations\n- LSP writethrough for format-on-save\n- Bash interception (`checkBashInterception`)\n- Fuzzy path suggestions in read tool\n",
38
38
  "porting-to-natives.md": "# Porting to pi-natives (N-API) — Field Notes\n\nThis is a practical guide for moving hot paths into `crates/pi-natives` and wiring them through the generated native package entrypoint. It exists to avoid the same failures happening twice.\n\n## When to port\n\nPort when any of these are true:\n\n- The hot path runs in render loops, tight UI updates, or large batches.\n- JS allocations dominate (string churn, regex backtracking, large arrays).\n- You already have a JS baseline and can benchmark both versions side by side.\n- The work is CPU-bound or blocking I/O that can run on the libuv thread pool.\n- The work is async I/O that can run on Tokio's runtime (for example shell execution).\n\nAvoid ports that depend on JS-only state or dynamic imports. N-API exports should be data-in/data-out. Long-running work should go through `task::blocking` (CPU-bound/blocking I/O) or `task::future` (async I/O) with cancellation where the caller needs `timeoutMs` or `AbortSignal`.\n\n## Current package shape\n\n`@oh-my-pi/pi-natives` no longer has a `packages/natives/src/<module>` TypeScript wrapper layer. The package root points at generated native artifacts:\n\n- runtime entry: `packages/natives/native/index.js`\n- types entry: `packages/natives/native/index.d.ts`\n- loader helpers: `packages/natives/native/loader-state.js`\n- embedded manifest: `packages/natives/native/embedded-addon.js`\n\nConsumers import directly from `@oh-my-pi/pi-natives`. The generated declarations are produced during `bun --cwd=packages/natives run build`.\n\n## Anatomy of a native export\n\n**Rust side:**\n\n- Implementation lives in `crates/pi-natives/src/<module>.rs`.\n- If you add a new module, register it in `crates/pi-natives/src/lib.rs`.\n- Export with `#[napi]`; snake_case exports are converted to camelCase automatically. Use explicit JS names only for true aliases/non-default names. Use `#[napi(object)]` for object-shaped structs.\n- For CPU-bound or blocking work, use `task::blocking(tag, cancel_token, work)`.\n- For async work that needs Tokio, use `task::future(env, tag, work)`.\n- Pass a `CancelToken` when the API exposes `timeoutMs` or `AbortSignal`, and call `heartbeat()` inside long loops.\n\n**Package/build side:**\n\n- `packages/natives/scripts/build-native.ts` runs napi-rs, installs the `.node` artifact, copies generated `index.js`/`index.d.ts`, and appends enum runtime exports.\n- `packages/natives/native/index.js` is the loader that chooses a candidate `.node` file and returns the loaded addon.\n- `packages/natives/package.json` exposes only the package root (`@oh-my-pi/pi-natives`).\n\n**Consumer side:**\n\n- Update direct imports/callsites in `packages/coding-agent` or `packages/tui` when the new export replaces a JS implementation.\n- Keep higher-level policy in consumers unless it belongs in the native primitive itself.\n\n## Porting checklist\n\n1. **Add the Rust implementation**\n\n- Put the core logic in a plain Rust function.\n- If it is a new module, add it to `crates/pi-natives/src/lib.rs`.\n- Expose it with `#[napi]` so the default snake_case -> camelCase mapping stays consistent.\n- Keep signatures owned and simple: `String`, `Vec<String>`, `Uint8Array`, `Either<JsString, Uint8Array>`, or `#[napi(object)]` structs.\n- For CPU-bound or blocking work, use `task::blocking`; for async work, use `task::future`.\n- If exposing cancellation, include `timeout_ms: Option<u32>` and `signal: Option<Unknown<'env>>` in options, create `CancelToken::new(...)`, and heartbeat in long loops.\n\n2. **Build generated bindings**\n\n- Run `bun --cwd=packages/natives run build`.\n- Confirm the generated `packages/natives/native/index.d.ts` includes the new export with the intended JS name/signature.\n- Confirm `packages/natives/native/index.js` still has generated enum exports appended when enum changes are involved.\n\n3. **Update consumers**\n\n- Import the new export directly from `@oh-my-pi/pi-natives`.\n- Replace only callsites where the native implementation is faster/equivalent and preserves behavior.\n- Remove obsolete JS implementation code in the same change when the native path becomes canonical.\n\n4. **Add benchmarks**\n\n- Put benchmarks next to the owning package (`packages/tui/bench`, `packages/natives/bench`, or `packages/coding-agent/bench`).\n- Include a JS baseline and native version in the same run.\n- Use `Bun.nanoseconds()` and a fixed iteration count.\n- Keep benchmark inputs realistic for the hot path.\n\n5. **Run focused verification**\n\n- Build the native package.\n- Run the benchmark.\n- Run the narrow tests or scenario covering the changed export/callsites.\n\n## Pain points and how to avoid them\n\n### 1) Stale platform/variant artifacts\n\nThe loader probes platform-tagged artifacts in deterministic order. For x64, selected variant candidates are tried before the unsuffixed default fallback:\n\n- `modern`: `pi_natives.<tag>-modern.node`, then `...-baseline.node`, then `pi_natives.<tag>.node`.\n- `baseline`: `pi_natives.<tag>-baseline.node`, then `pi_natives.<tag>.node`.\n\nNon-x64 uses `pi_natives.<tag>.node`.\n\nCompiled binaries also probe `<getNativesDir()>/<version>/...` and a legacy user-data directory before package/executable locations. If any earlier candidate is stale, a new export may appear missing.\n\n**Fix:** remove stale candidate/cache files and rebuild.\n\n```bash\nrm packages/natives/native/pi_natives.<platform>-<arch>.node\nrm packages/natives/native/pi_natives.<platform>-<arch>-modern.node\nrm packages/natives/native/pi_natives.<platform>-<arch>-baseline.node\nbun --cwd=packages/natives run build\n```\n\nFor compiled binaries, delete the versioned addon cache shown in the loader error (normally under `~/.omp/natives/<version>` unless `$XDG_DATA_HOME/omp` is used).\n\n### 2) Generated types do not match loaded binary\n\nThis can happen when `native/index.d.ts` was regenerated but the `.node` file being loaded is stale or from a different platform/variant.\n\nVerify the loaded export set from the actual candidate path:\n\n```bash\nbun -e 'const tag = `${process.platform}-${process.arch}`; const mod = require(`./packages/natives/native/pi_natives.${tag}.node`); console.log(Object.keys(mod).sort())'\n```\n\nFix the build/candidate mismatch. Do not paper over it with optional consumer checks if the export is required.\n\n### 3) Rust signature mismatch\n\nKeep N-API signatures simple and owned. Avoid borrowed references like `&str` in public exports. If you need structured data, use `#[napi(object)]` structs. If you need callbacks, use napi-rs `ThreadsafeFunction` and keep callback error/value behavior explicit.\n\n### 4) Enum runtime exports\n\nnapi-rs declarations alone are not enough for JS callers that use enum objects at runtime. `scripts/gen-enums.ts` appends enum objects to `native/index.js`. If you add or change a native enum, verify both `native/index.d.ts` and the generated enum export block in `native/index.js`.\n\n### 5) Benchmarking mistakes\n\n- Do not compare different inputs or allocations.\n- Keep JS and native using identical input arrays.\n- Run both in the same benchmark file to avoid skew.\n- Include enough iterations to smooth startup noise, but keep inputs realistic.\n\n## Benchmark template\n\n```ts\nconst ITERATIONS = 2000;\n\nfunction bench(name: string, fn: () => void): number {\n const start = Bun.nanoseconds();\n for (let i = 0; i < ITERATIONS; i++) fn();\n const elapsed = (Bun.nanoseconds() - start) / 1e6;\n console.log(\n `${name}: ${elapsed.toFixed(2)}ms total (${(elapsed / ITERATIONS).toFixed(6)}ms/op)`,\n );\n return elapsed;\n}\n\nbench(\"feature/js\", () => {\n jsImpl(sample);\n});\n\nbench(\"feature/native\", () => {\n nativeImpl(sample);\n});\n```\n\n## Verification checklist\n\n- Generated `native/index.d.ts` includes the new export and intended TS signature.\n- The loaded `.node` file's `Object.keys(require(candidate))` includes the new export.\n- Runtime enum objects are present when the change adds/changes enums.\n- Bench numbers are recorded in the PR/notes.\n- Call sites are updated only if native is faster/equal and behavior-compatible.\n- Obsolete JS code is removed when the native implementation becomes canonical.\n\n## Rule of thumb\n\n- If native is slower, do not switch callsites. Keep or remove the export based on whether it has a near-term owner.\n- If native is faster and behavior-compatible, switch callsites and keep a benchmark to catch regressions.\n",
39
39
  "provider-streaming-internals.md": "# Provider streaming internals\n\nThis document explains how token/tool streaming is normalized in `@oh-my-pi/pi-ai`, then propagated through `@oh-my-pi/pi-agent-core` and `coding-agent` session events.\n\n## End-to-end flow\n\n1. `streamSimple()` (`packages/ai/src/stream.ts`) maps generic options and dispatches to a provider stream function.\n2. Provider stream functions translate provider-native stream events into the unified `AssistantMessageEvent` sequence. Current built-ins include Anthropic, OpenAI Responses/Completions/Codex/Azure Responses, Google Gemini/Gemini CLI/Vertex, Bedrock Converse, Ollama, Cursor, plus GitLab Duo/Kimi wrappers and extension-registered custom APIs.\n3. Each provider pushes events into `AssistantMessageEventStream` (`packages/ai/src/utils/event-stream.ts`), which throttles delta events and exposes:\n - async iteration for incremental updates\n - `result()` for final `AssistantMessage`\n4. `agentLoop` (`packages/agent/src/agent-loop.ts`) consumes those events, mutates in-flight assistant state, and emits `message_update` events carrying the raw `assistantMessageEvent`.\n5. `AgentSession` (`packages/coding-agent/src/session/agent-session.ts`) subscribes to agent events, persists messages, drives extension hooks, and applies session behaviors (retry, compaction, TTSR, streaming-edit abort checks).\n\n## Unified stream contract in `@oh-my-pi/pi-ai`\n\nAll providers emit the same shape (`AssistantMessageEvent` in `packages/ai/src/types.ts`):\n\n- `start`\n- content block lifecycle triplets:\n - text: `text_start` → `text_delta`\\* → `text_end`\n - thinking: `thinking_start` → `thinking_delta`\\* → `thinking_end`\n - tool call: `toolcall_start` → `toolcall_delta`\\* → `toolcall_end`\n- terminal event:\n - `done` with `reason: \"stop\" | \"length\" | \"toolUse\"`\n - or `error` with `reason: \"aborted\" | \"error\"`\n\n`AssistantMessageEventStream` guarantees:\n\n- final result is resolved by terminal event (`done` or `error`)\n- deltas are batched/throttled (~50ms)\n- buffered deltas are flushed before non-delta events and before completion\n\n## Delta throttling and harmonization behavior\n\n`AssistantMessageEventStream` treats `text_delta`, `thinking_delta`, and `toolcall_delta` as mergeable events:\n\n- buffered deltas are merged only when **type + contentIndex** match\n- merge keeps the latest `partial` snapshot\n- non-delta events force immediate flush\n\nThis smooths high-frequency provider streams for TUI/event consumers, but is not provider backpressure: providers still produce at full speed, while the local stream buffers.\n\n## Provider normalization details\n\n## Anthropic (`anthropic-messages`)\n\nSource: `packages/ai/src/providers/anthropic.ts`\n\nNormalization points:\n\n- `message_start` initializes usage (input/output/cache tokens)\n- `content_block_start` maps to text/thinking/toolcall starts\n- `content_block_delta` maps:\n - `text_delta` → `text_delta`\n - `thinking_delta` → `thinking_delta`\n - `input_json_delta` → `toolcall_delta`\n - `signature_delta` updates `thinkingSignature` only (no event)\n- `content_block_stop` emits corresponding `*_end`\n- `message_delta.stop_reason` maps via `mapStopReason()`\n\nTool-call argument streaming:\n\n- each tool block carries internal `partialJson`\n- every JSON delta appends to `partialJson`\n- `arguments` are reparsed on each delta via `parseStreamingJson()`\n- `toolcall_end` reparses once more, then strips `partialJson`\n\n## OpenAI Responses family (`openai-responses`, `openai-codex-responses`, `azure-openai-responses`)\n\nSources: `packages/ai/src/providers/openai-responses.ts`, `openai-codex-responses.ts`, and `azure-openai-responses.ts`\n\nNormalization points:\n\n- `response.output_item.added` starts reasoning/text/function-call blocks\n- reasoning summary events (`response.reasoning_summary_text.delta`) become `thinking_delta`\n- output/refusal deltas become `text_delta`\n- `response.function_call_arguments.delta` becomes `toolcall_delta`\n- `response.output_item.done` emits `thinking_end` / `text_end` / `toolcall_end`\n- `response.completed` maps status to stop reason and usage\n\nTool-call argument streaming:\n\n- same `partialJson` accumulation pattern as Anthropic\n- providers that send only `response.function_call_arguments.done` still populate final args\n- tool call IDs are normalized as `\"<call_id>|<item_id>\"`\n\n## Google Generative AI (`google-generative-ai`)\n\nSource: `packages/ai/src/providers/google.ts`\n\nNormalization points:\n\n- iterates `candidate.content.parts`\n- text parts are split into thinking vs text by `isThinkingPart(part)`\n- block transitions close previous block before starting a new one\n- `part.functionCall` is treated as a complete tool call (start/delta/end emitted immediately)\n- finish reason mapped by `mapStopReason()` from `google-shared.ts`\n\nTool-call argument streaming:\n\n- function call args arrive as structured object, not incremental JSON text\n- implementation emits one synthetic `toolcall_delta` containing `JSON.stringify(arguments)`\n- no partial JSON parser needed for Google in this path\n\n## Partial tool-call JSON accumulation and recovery\n\nShared behavior for Anthropic/OpenAI Responses uses `parseStreamingJson()` (`packages/ai/src/utils/json-parse.ts`):\n\n1. try `JSON.parse`\n2. fallback to `partial-json` parser for incomplete fragments\n3. if both fail, return `{}`\n\nImplications:\n\n- malformed or truncated argument deltas do not crash stream processing immediately\n- in-progress `arguments` may temporarily be `{}`\n- later valid deltas can recover structured arguments because parsing is retried on every append\n- final `toolcall_end` performs one more parse attempt before emission\n\n## Stop reasons vs transport/runtime errors\n\nProvider stop reasons are mapped to normalized `stopReason`:\n\n- Anthropic: `end_turn`→`stop`, `max_tokens`→`length`, `tool_use`→`toolUse`, safety/refusal cases→`error`\n- OpenAI Responses: `completed`→`stop`, `incomplete`→`length`, `failed/cancelled`→`error`\n- Google: `STOP`→`stop`, `MAX_TOKENS`→`length`, safety/prohibited/malformed-function-call classes→`error`\n\nError semantics are split in two stages:\n\n1. **Model completion semantics** (provider reported finish reason/status)\n2. **Transport/runtime failure** (network/client/parser/abort exceptions)\n\nIf provider stream throws or signals failure, each provider wrapper catches and emits terminal `error` event with:\n\n- `stopReason = \"aborted\"` when abort signal is set\n- otherwise `stopReason = \"error\"`\n- `errorMessage = formatErrorMessageWithRetryAfter(error)`\n\n## Malformed chunk / SSE parse failure behavior\n\nFor these provider paths, chunk/SSE framing is handled by vendor SDK streams (Anthropic SDK, OpenAI SDK, Google SDK). This code does not implement a custom SSE decoder here.\n\nObserved behavior in current implementation:\n\n- malformed chunk/SSE parsing at SDK level surfaces as an exception or stream `error` event\n- provider wrapper converts that into unified terminal `error` event\n- no provider-specific resume/retry inside the stream function itself\n- higher-level retries are handled in `AgentSession` auto-retry logic (message-level retry, not stream-chunk replay)\n\n## Cancellation boundaries\n\nCancellation is layered:\n\n- AI provider request: `options.signal` is passed into provider client stream call.\n- Provider wrapper: after stream loop, aborted signal forces error path (`\"Request was aborted\"`).\n- Agent loop: checks `signal.aborted` before handling each provider event and can synthesize an aborted assistant message from the latest partial.\n- Session/agent controls: `AgentSession.abort()` -> `agent.abort()` -> shared abort controller cancellation.\n\nTool execution cancellation is separate from model stream cancellation:\n\n- tool runners use `AbortSignal.any([agentSignal, steeringAbortSignal])`\n- steering interrupts can abort remaining tool execution while preserving already-produced tool results\n\n## Backpressure boundaries\n\nThere is no hard backpressure mechanism between provider SDK stream and downstream consumers:\n\n- `EventStream` uses in-memory queues with no max size\n- throttling reduces UI update rate but does not slow provider intake\n- if consumers lag significantly, queued events can grow until completion\n\nCurrent design favors responsiveness and simple ordering over bounded-buffer flow control.\n\n## How stream events surface as agent/session events\n\n`agentLoop.streamAssistantResponse()` bridges `AssistantMessageEvent` to `AgentEvent`:\n\n- on `start`: pushes placeholder assistant message and emits `message_start`\n- on block events (`text_*`, `thinking_*`, `toolcall_*`): updates last assistant message, emits `message_update` with raw `assistantMessageEvent`\n- on terminal (`done`/`error`): resolves final message from `response.result()`, emits `message_end`\n\n`AgentSession` then consumes those events for session-level behaviors:\n\n- TTSR watches `message_update.assistantMessageEvent` for `text_delta`, `thinking_delta`, and `toolcall_delta`\n- streaming edit guard inspects `toolcall_delta`/`toolcall_end` on `edit` calls and can abort early\n- persistence writes finalized messages at `message_end`\n- auto-retry examines assistant `stopReason === \"error\"` plus `errorMessage` heuristics\n\n## Unified vs provider-specific responsibilities\n\nUnified (common contract):\n\n- event shape (`AssistantMessageEvent`)\n- final result extraction (`done`/`error`)\n- delta throttling + merge rules\n- agent/session event propagation model\n\nProvider-specific (not fully abstracted):\n\n- upstream event taxonomies and mapping logic\n- stop-reason translation tables\n- tool-call ID conventions\n- reasoning/thinking block semantics and signatures\n- usage token semantics and availability timing\n- message conversion constraints per API\n\n## Implementation files\n\n- [`../../ai/src/stream.ts`](../packages/ai/src/stream.ts) — provider dispatch, option mapping, API key/session plumbing, custom API dispatch, and provider-specific credential handling.\n- [`../../ai/src/utils/event-stream.ts`](../packages/ai/src/utils/event-stream.ts) — generic stream queue + assistant delta throttling.\n- [`../../ai/src/utils/json-parse.ts`](../packages/ai/src/utils/json-parse.ts) — partial JSON parsing for streamed tool arguments.\n- [`../../ai/src/providers/anthropic.ts`](../packages/ai/src/providers/anthropic.ts) — Anthropic event translation and tool JSON delta accumulation.\n- [`../../ai/src/providers/openai-responses.ts`](../packages/ai/src/providers/openai-responses.ts), [`openai-codex-responses.ts`](../packages/ai/src/providers/openai-codex-responses.ts), [`azure-openai-responses.ts`](../packages/ai/src/providers/azure-openai-responses.ts) — Responses-family event translation and status mapping.\n- [`../../ai/src/providers/google.ts`](../packages/ai/src/providers/google.ts), [`google-gemini-cli.ts`](../packages/ai/src/providers/google-gemini-cli.ts), [`google-vertex.ts`](../packages/ai/src/providers/google-vertex.ts) — Gemini stream chunk-to-block translation variants.\n- [`../../ai/src/providers/google-shared.ts`](../packages/ai/src/providers/google-shared.ts) — Gemini finish-reason mapping and shared conversion rules.\n- [`../../ai/src/providers/amazon-bedrock.ts`](../packages/ai/src/providers/amazon-bedrock.ts), [`openai-completions.ts`](../packages/ai/src/providers/openai-completions.ts), [`ollama.ts`](../packages/ai/src/providers/ollama.ts), [`cursor.ts`](../packages/ai/src/providers/cursor.ts) — additional built-in stream adapters using the same event contract.\n- [`../../agent/src/agent-loop.ts`](../packages/agent/src/agent-loop.ts) — provider stream consumption and `message_update` bridging.\n- [`../src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts) — session-level handling of streaming updates, abort, retry, and persistence.\n",
40
- "python-repl.md": "# Eval Tool Python Backend\n\nThis document describes the Python execution stack in `packages/coding-agent`.\nIt covers tool behavior, runner lifecycle, environment handling, execution semantics, output rendering, supported magics, and operational failure modes.\n\n## Scope and Key Files\n\n- Tool surface: `src/tools/eval.ts`\n- Session/per-call kernel orchestration: `src/eval/py/executor.ts`\n- Subprocess kernel client: `src/eval/py/kernel.ts`\n- Python wrapper / NDJSON server: `src/eval/py/runner.py`\n- Prelude helpers loaded into every kernel: `src/eval/py/prelude.py`\n- MIME bundle renderer (text + structured outputs): `src/eval/py/display.ts`\n- Interactive-mode renderer for user-triggered Python runs: `src/modes/components/eval-execution.ts`\n- Runtime/env filtering and Python resolution: `src/eval/py/runtime.ts`\n\n## What eval's Python backend is\n\nThe `eval` tool executes one or more Python cells inside a long-lived `python3` subprocess that speaks NDJSON over stdin/stdout. No Jupyter, no kernel gateway, no extra pip dependencies — a vanilla Python 3.8+ interpreter is enough. Rich `display()` output (PIL, pandas, plotly, matplotlib figures) keeps working because the wrapper reimplements the MIME-bundle dispatch that IPython previously provided.\n\nTool params:\n\n```ts\n{\n cells: Array<{ code: string; title?: string }>;\n timeout?: number; // seconds, clamped to 1..600, default 30\n reset?: boolean; // reset selected runtime before the first cell only\n}\n```\n\nThe tool is `concurrency = \"exclusive\"` for a session, so calls do not overlap.\n\n## Kernel lifecycle\n\nEach kernel is a single Python subprocess: `python -u <runner.py>`. The runner is bundled with the host binary (Bun text import), written to `~/.omp/python-env`-adjacent tmp cache once per script-hash, and reused by every subsequent spawn.\n\nKernel startup sequence:\n\n1. Availability check (`checkPythonKernelAvailability`) — verifies that a Python interpreter resolves and runs.\n2. Spawn `python -u runner.py` with filtered env and `cwd`.\n3. Send an init request that runs `os.chdir(cwd)`, injects env entries, and adds `cwd` to `sys.path`.\n4. Execute `PYTHON_PRELUDE` (idempotent — only initializes once per process).\n\nKernel shutdown:\n\n- Send `{\"type\": \"exit\"}` over stdin.\n- Wait for process exit with `SHUTDOWN_GRACE_MS` budget.\n- Escalate to `SIGTERM` and finally `SIGKILL` if the process does not exit in time.\n\n## Wire protocol (NDJSON, host ↔ runner)\n\nOne JSON object per line, UTF-8, `\\n` terminated.\n\nHost → runner:\n\n```jsonc\n{\"id\": \"<reqId>\", \"code\": \"<source>\", \"silent\": false, \"storeHistory\": true}\n{\"type\": \"exit\"}\n```\n\nRunner → host:\n\n```jsonc\n{\"type\": \"started\", \"id\": \"<reqId>\"}\n{\"type\": \"stdout\", \"id\": \"<reqId>\", \"data\": \"...\"}\n{\"type\": \"stderr\", \"id\": \"<reqId>\", \"data\": \"...\"}\n{\"type\": \"display\", \"id\": \"<reqId>\", \"bundle\": {<mime>: <value>}}\n{\"type\": \"result\", \"id\": \"<reqId>\", \"bundle\": {<mime>: <value>}}\n{\"type\": \"error\", \"id\": \"<reqId>\", \"ename\": \"...\", \"evalue\": \"...\", \"traceback\": [\"...\"]}\n{\"type\": \"done\", \"id\": \"<reqId>\", \"status\": \"ok\"|\"error\", \"executionCount\": N, \"cancelled\": false}\n```\n\nStatus events the prelude emits (e.g. `_emit_status(\"find\", count=…)`) ship inside display bundles under `application/x-omp-status` so the existing TUI status renderer keeps working.\n\n## Magics\n\nThe runner's source transformer rewrites IPython-style magics to plain Python calls before parsing. Supported set:\n\n| Magic | Effect |\n| --- | --- |\n| `%pip <args>` | `python -m pip <args>` with live streaming output. Newly installed packages are evicted from `sys.modules` so the next `import` picks up the fresh install. |\n| `%cd <path>` | `os.chdir(path)` (with `~` expansion); emits status event. |\n| `%pwd` | Returns `os.getcwd()`. |\n| `%ls [path]` | Returns `sorted(os.listdir(path))`. |\n| `%env [KEY[=VAL]]` | List, read, or set env vars (matches prelude `env()` semantics). |\n| `%set_env KEY VALUE` | Set `os.environ[KEY]`. |\n| `%time <expr>` / `%timeit <expr>` | Time the expression; emits status event with elapsed ms. |\n| `%who` / `%whos` | List user-namespace names. |\n| `%reset` | Clear user globals and re-inject prelude. |\n| `%load <path>` | Read a file into a fresh cell and execute. |\n| `%run <path>` | `runpy.run_path` and merge globals back. |\n| `%%bash` / `%%sh` | Run the cell body via `bash`/`sh`. |\n| `%%capture [name]` | Run body with stdout/stderr captured into `name`. |\n| `%%timeit` | Time the cell body. |\n| `%%writefile <path>` | Write body to file. |\n| `!cmd` / `var = !cmd` | Run command via subprocess shell; returns an SList-style result with `.n` / `.s` helpers. |\n| `var = %name args` | Assignment forms work for line magics and `!cmd`. |\n\nUnknown magic names raise `NameError: UsageError: ...` inside the cell.\n\n## Session persistence semantics\n\n`python.kernelMode` controls retained kernel reuse:\n\n- `session` (default)\n - Reuses kernel sessions keyed by session file plus cwd when a session file exists; otherwise by cwd.\n - Execution is serialized per session via a queue.\n - Idle sessions are evicted after 5 minutes.\n - At most 4 sessions; oldest is evicted on overflow.\n - Heartbeat checks detect dead kernels.\n - Auto-restart allowed once; repeated crash ⇒ hard failure.\n- `per-call`\n - Spawns a fresh subprocess for each request.\n - Shuts the subprocess down after the request.\n - No cross-call state persistence.\n\n### Multi-cell behavior in a single tool call\n\nCells run sequentially in the same kernel instance for that tool call.\n\nIf an intermediate cell fails:\n\n- Earlier cell state remains in memory.\n- Tool returns a targeted error indicating which cell failed.\n- Later cells are not executed.\n\n`reset=true` only applies to the first cell execution in that call.\n\n## Environment filtering and runtime resolution\n\nEnvironment is filtered before launching the runner:\n\n- Allowlist includes core vars like `PATH`, `HOME`, locale vars, `VIRTUAL_ENV`, `PYTHONPATH`, etc.\n- Allow-prefixes: `LC_`, `XDG_`, `PI_`\n- Denylist strips common API keys (OpenAI/Anthropic/Gemini/etc.)\n\nRuntime selection order:\n\n1. Active/located venv (`VIRTUAL_ENV`, then `<cwd>/.venv`, `<cwd>/venv`)\n2. Managed venv at `~/.omp/python-env`\n3. `python` or `python3` on PATH\n\nWhen a venv is selected, its bin/Scripts path is prepended to `PATH`.\n\nThe runner additionally receives `PYTHONUNBUFFERED=1` and `PYTHONIOENCODING=utf-8` so streamed output reaches the host promptly.\n\n## Tool availability and mode selection\n\n`eval.py` / `eval.js` (both default `true`) plus optional `PI_PY` override controls eval backend exposure:\n\n- Python backend only (`eval.py=true`, `eval.js=false`)\n- JavaScript backend only (`eval.py=false`, `eval.js=true`)\n- both backends\n\n`PI_PY` accepted values:\n\n- `0` / `bash` → JavaScript backend only\n- `1` / `py` → Python backend only\n- `mix` / `both` → both backends\n\nIf Python preflight fails and `eval.js` is enabled, `eval` remains available and dispatches to JavaScript unless `language: \"python\"` is explicitly requested.\n\n## Execution flow and cancellation/timeout\n\n### Tool-level timeout\n\n`eval` timeout is in seconds, default 30, clamped to `1..600`. The tool combines caller abort signal and timeout signal with `AbortSignal.any(...)`.\n\n### Kernel execution cancellation\n\nOn abort/timeout:\n\n- The host sends `kill(\"SIGINT\")` to the runner subprocess.\n- The runner's exec-time signal handler raises `KeyboardInterrupt` inside the user code.\n- Result includes `cancelled=true`; timeout path annotates output as `Command timed out after <n> seconds`.\n- Between requests the runner installs `SIG_IGN` for SIGINT so a stray cancel does not tear down the kernel.\n\nIf a second cancel is required (runner stuck in C code), the host escalates to `SIGTERM` and the session restarts on the next call.\n\n### stdin behavior\n\nInteractive stdin is not supported. The runner does not forward `input()` prompts; user code that calls `input()` blocks until cancellation.\n\n## Output capture and rendering\n\n### Captured output classes\n\nFrom runner frames:\n\n- `stdout` / `stderr` → plain text chunks\n- `display` / `result` → rich display handling (MIME bundle)\n- `error` → traceback text\n- `application/x-omp-status` MIME inside `display` → structured status events\n\nDisplay MIME precedence:\n\n1. `text/markdown`\n2. `text/plain`\n3. `text/html` (converted to basic markdown)\n\nAdditionally captured as structured outputs:\n\n- `application/json` → JSON tree data\n- `image/png` / `image/jpeg` → image payloads\n- `application/x-omp-status` → status events\n\n### Matplotlib\n\nThe runner sets `MPLBACKEND=Agg` as an environ default so figures render off-screen. After every cell, `pyplot.get_fignums()` is iterated; each figure is saved to PNG, emitted as an `image/png` display, and closed.\n\n### Storage and truncation\n\nOutput is streamed through `OutputSink` and may be persisted to artifact storage. Tool results can include truncation metadata and `artifact://<id>` for full output recovery.\n\n### Renderer behavior\n\n- Tool renderer (`eval.ts`):\n - shows code-cell blocks with per-cell status\n - collapsed preview defaults to 10 lines\n - supports expanded mode for full output and richer status detail\n- Interactive renderer (`eval-execution.ts`):\n - used for user-triggered Python execution in TUI\n - collapsed preview defaults to 20 lines\n - clamps very long individual lines to 4000 chars for display safety\n - shows cancellation/error/truncation notices\n\n## Operational troubleshooting\n\n- **Python backend not available** — Check `eval.py`, `PI_PY`, and that `python`/`python3` is on PATH. If preflight fails and `eval.js` is enabled, omit `language` or pass `language: \"js\"` to use JavaScript.\n- **No Python on PATH** — Install a system Python 3.8+ or place a venv at `~/.omp/python-env`. `omp setup python --check` reports the resolved interpreter.\n- **Execution hangs then times out** — Increase tool `timeout` (max 600s) if workload is legitimate. For stuck native code, cancellation triggers `SIGINT` first then escalates; the session restarts on the next request.\n- **stdin/input prompts in Python code** — `input()` is not supported; pass data programmatically.\n- **Working directory errors** — Tool validates `cwd` exists and is a directory before execution.\n\n## Relevant environment variables\n\n- `PI_PY` — tool exposure override\n- `PI_PYTHON_SKIP_CHECK=1` — bypass Python preflight/warm checks\n- `PI_PYTHON_INTEGRATION=1` — enable gated integration tests that spawn a real Python\n- `PI_PYTHON_IPC_TRACE=1` — log NDJSON frames exchanged with the runner subprocess\n- `PI_DEBUG_STARTUP=1` — emit startup-stage debug markers\n",
40
+ "python-repl.md": "# Eval Tool Python Backend\n\nThis document describes the Python execution stack in `packages/coding-agent`.\nIt covers tool behavior, runner lifecycle, environment handling, execution semantics, output rendering, supported magics, and operational failure modes.\n\n## Scope and Key Files\n\n- Tool surface: `src/tools/eval.ts`\n- Session/per-call kernel orchestration: `src/eval/py/executor.ts`\n- Subprocess kernel client: `src/eval/py/kernel.ts`\n- Python wrapper / NDJSON server: `src/eval/py/runner.py`\n- Prelude helpers loaded into every kernel: `src/eval/py/prelude.py`\n- MIME bundle renderer (text + structured outputs): `src/eval/py/display.ts`\n- Interactive-mode renderer for user-triggered Python runs: `src/modes/components/eval-execution.ts`\n- Runtime/env filtering and Python resolution: `src/eval/py/runtime.ts`\n\n## What eval's Python backend is\n\nThe `eval` tool executes one or more Python cells inside a long-lived `python3` subprocess that speaks NDJSON over stdin/stdout. No Jupyter, no kernel gateway, no extra pip dependencies — a vanilla Python 3.8+ interpreter is enough. Rich `display()` output (PIL, pandas, plotly, matplotlib figures) keeps working because the wrapper reimplements the MIME-bundle dispatch that IPython previously provided.\n\nTool params:\n\n```ts\n{\n cells: Array<{ code: string; title?: string }>;\n timeout?: number; // seconds, clamped to 1..600, default 30\n reset?: boolean; // reset selected runtime before the first cell only\n}\n```\n\nThe tool is `concurrency = \"exclusive\"` for a session, so calls do not overlap.\n\n## Kernel lifecycle\n\nEach kernel is a single Python subprocess: `python -u <runner.py>`. The runner is bundled with the host binary (Bun text import), written to `~/.omp/python-env`-adjacent tmp cache once per script-hash, and reused by every subsequent spawn.\n\nKernel startup sequence:\n\n1. Availability check (`checkPythonKernelAvailability`) — verifies that a Python interpreter resolves and runs.\n2. Spawn `python -u runner.py` with filtered env and `cwd`.\n3. Send an init request that runs `os.chdir(cwd)`, injects env entries, and adds `cwd` to `sys.path`.\n4. Execute `PYTHON_PRELUDE` (idempotent — only initializes once per process).\n\nKernel shutdown:\n\n- Send `{\"type\": \"exit\"}` over stdin.\n- Wait for process exit with `SHUTDOWN_GRACE_MS` budget.\n- Escalate to `SIGTERM` and finally `SIGKILL` if the process does not exit in time.\n\n## Wire protocol (NDJSON, host ↔ runner)\n\nOne JSON object per line, UTF-8, `\\n` terminated.\n\nHost → runner:\n\n```jsonc\n{\"id\": \"<reqId>\", \"code\": \"<source>\", \"silent\": false, \"storeHistory\": true}\n{\"type\": \"exit\"}\n```\n\nRunner → host:\n\n```jsonc\n{\"type\": \"started\", \"id\": \"<reqId>\"}\n{\"type\": \"stdout\", \"id\": \"<reqId>\", \"data\": \"...\"}\n{\"type\": \"stderr\", \"id\": \"<reqId>\", \"data\": \"...\"}\n{\"type\": \"display\", \"id\": \"<reqId>\", \"bundle\": {<mime>: <value>}}\n{\"type\": \"result\", \"id\": \"<reqId>\", \"bundle\": {<mime>: <value>}}\n{\"type\": \"error\", \"id\": \"<reqId>\", \"ename\": \"...\", \"evalue\": \"...\", \"traceback\": [\"...\"]}\n{\"type\": \"done\", \"id\": \"<reqId>\", \"status\": \"ok\"|\"error\", \"executionCount\": N, \"cancelled\": false}\n```\n\nStatus events the prelude emits (e.g. `_emit_status(\"find\", count=…)`) ship inside display bundles under `application/x-omp-status` so the existing TUI status renderer keeps working.\n\n## Magics\n\nThe runner's source transformer rewrites IPython-style magics to plain Python calls before parsing. Supported set:\n\n| Magic | Effect |\n| --- | --- |\n| `%pip <args>` | `python -m pip <args>` with live streaming output. Newly installed packages are evicted from `sys.modules` so the next `import` picks up the fresh install. |\n| `%cd <path>` | `os.chdir(path)` (with `~` expansion); emits status event. |\n| `%pwd` | Returns `os.getcwd()`. |\n| `%ls [path]` | Returns `sorted(os.listdir(path))`. |\n| `%env [KEY[=VAL]]` | List, read, or set env vars (matches prelude `env()` semantics). |\n| `%set_env KEY VALUE` | Set `os.environ[KEY]`. |\n| `%time <expr>` / `%timeit <expr>` | Time the expression; emits status event with elapsed ms. |\n| `%who` / `%whos` | List user-namespace names. |\n| `%reset` | Clear user globals and re-inject prelude. |\n| `%load <path>` | Read a file into a fresh cell and execute. |\n| `%run <path>` | `runpy.run_path` and merge globals back. |\n| `%%bash` / `%%sh` | Run the cell body via `bash`/`sh`. |\n| `%%capture [name]` | Run body with stdout/stderr captured into `name`. |\n| `%%timeit` | Time the cell body. |\n| `%%writefile <path>` | Write body to file. |\n| `!cmd` / `var = !cmd` | Run command via subprocess shell; returns an SList-style result with `.n` / `.s` helpers. |\n| `var = %name args` | Assignment forms work for line magics and `!cmd`. |\n\nUnknown magic names raise `NameError: UsageError: ...` inside the cell.\n\n## Session persistence semantics\n\n`python.kernelMode` controls retained kernel reuse:\n\n- `session` (default)\n - Reuses kernel sessions keyed by session file plus cwd when a session file exists; otherwise by cwd.\n - Execution is serialized per session via a queue.\n - Idle sessions are evicted after 5 minutes.\n - At most 4 sessions; oldest is evicted on overflow.\n - Heartbeat checks detect dead kernels.\n - Auto-restart allowed once; repeated crash ⇒ hard failure.\n- `per-call`\n - Spawns a fresh subprocess for each request.\n - Shuts the subprocess down after the request.\n - No cross-call state persistence.\n\n### Multi-cell behavior in a single tool call\n\nCells run sequentially in the same kernel instance for that tool call.\n\nIf an intermediate cell fails:\n\n- Earlier cell state remains in memory.\n- Tool returns a targeted error indicating which cell failed.\n- Later cells are not executed.\n\n`reset=true` only applies to the first cell execution in that call.\n\n## Environment filtering and runtime resolution\n\nEnvironment is filtered before launching the runner:\n\n- Allowlist includes core vars like `PATH`, `HOME`, locale vars, `VIRTUAL_ENV`, `PYTHONPATH`, etc.\n- Allow-prefixes: `LC_`, `XDG_`, `PI_`\n- Denylist strips common API keys (OpenAI/Anthropic/Gemini/etc.)\n\nRuntime selection order:\n\n1. Active/located venv (`VIRTUAL_ENV`, then `<cwd>/.venv`, `<cwd>/venv`)\n2. Managed venv at `~/.omp/python-env`\n3. `python` or `python3` on PATH\n\nWhen a venv is selected, its bin/Scripts path is prepended to `PATH`.\n\nThe runner additionally receives `PYTHONUNBUFFERED=1` and `PYTHONIOENCODING=utf-8` so streamed output reaches the host promptly.\n\n## Tool availability and mode selection\n\n`eval.py` / `eval.js` (both default `true`) plus optional `PI_PY` override controls eval backend exposure:\n\n- Python backend only (`eval.py=true`, `eval.js=false`)\n- JavaScript backend only (`eval.py=false`, `eval.js=true`)\n- both backends\n\n`PI_PY` accepted values:\n\n- `0` / `bash` → JavaScript backend only\n- `1` / `py` → Python backend only\n- `mix` / `both` → both backends\n\nIf Python preflight fails and `eval.js` is enabled, `eval` remains available and dispatches to JavaScript unless `language: \"python\"` is explicitly requested.\n\n## Execution flow and cancellation/timeout\n\n### Tool-level timeout\n\n`eval` timeout is in seconds, default 30, clamped to `1..600`. The tool combines caller abort signal and timeout signal with `AbortSignal.any(...)`.\n\n### Kernel execution cancellation\n\nOn abort/timeout:\n\n- The host sends `kill(\"SIGINT\")` to the runner subprocess.\n- The runner's exec-time signal handler raises `KeyboardInterrupt` inside the user code.\n- Result includes `cancelled=true`; timeout path annotates output as `Command timed out after <n> seconds`.\n- Between requests the runner installs `SIG_IGN` for SIGINT so a stray cancel does not tear down the kernel.\n\nIf a second cancel is required (runner stuck in C code), the host escalates to `SIGTERM` and the session restarts on the next call.\n\n### stdin behavior\n\nInteractive stdin is not supported. The runner does not forward `input()` prompts; user code that calls `input()` blocks until cancellation.\n\n## Output capture and rendering\n\n### Captured output classes\n\nFrom runner frames:\n\n- `stdout` / `stderr` → plain text chunks\n- `display` / `result` → rich display handling (MIME bundle)\n- `error` → traceback text\n- `application/x-omp-status` MIME inside `display` → structured status events\n\nDisplay MIME precedence:\n\n1. `text/markdown`\n2. `text/plain`\n3. `text/html` (converted to basic markdown)\n\nAdditionally captured as structured outputs:\n\n- `application/json` → JSON tree data\n- `image/png` / `image/jpeg` → image payloads\n- `application/x-omp-status` → status events\n\n### Matplotlib\n\nThe runner sets `MPLBACKEND=Agg` as an environ default so figures render off-screen. After every cell, `pyplot.get_fignums()` is iterated; each figure is saved to PNG, emitted as an `image/png` display, and closed.\n\n### Storage and truncation\n\nOutput is streamed through `OutputSink` and may be persisted to artifact storage. Tool results can include truncation metadata and `artifact://<id>` for full output recovery.\n\n### Renderer behavior\n\n- Tool renderer (`eval.ts`):\n - shows code-cell blocks with per-cell status\n - collapsed preview defaults to 10 lines\n - supports expanded mode for full output and richer status detail\n- Interactive renderer (`eval-execution.ts`):\n - used for user-triggered Python execution in TUI\n - collapsed preview defaults to 20 lines\n - clamps very long individual lines to 4000 chars for display safety\n - shows cancellation/error/truncation notices\n\n## Operational troubleshooting\n\n- **Python backend not available** — Check `eval.py`, `PI_PY`, and that `python`/`python3` is on PATH. If preflight fails and `eval.js` is enabled, omit `language` or pass `language: \"js\"` to use JavaScript.\n- **No Python on PATH** — Install a system Python 3.8+ or place a venv at `~/.omp/python-env`. `omp setup python --check` reports the resolved interpreter.\n- **Execution hangs then times out** — Increase tool `timeout` (max 600s) if workload is legitimate. For stuck native code, cancellation triggers `SIGINT` first then escalates; the session restarts on the next request.\n- **stdin/input prompts in Python code** — `input()` is not supported; pass data programmatically.\n- **Working directory errors** — Tool validates `cwd` exists and is a directory before execution.\n\n## Relevant environment variables\n\n- `PI_PY` — tool exposure override\n- `PI_PYTHON_SKIP_CHECK=1` — bypass Python preflight/warm checks\n- `PI_PYTHON_INTEGRATION=1` — enable gated integration tests that spawn a real Python\n- `PI_PYTHON_IPC_TRACE=1` — log NDJSON frames exchanged with the runner subprocess\n",
41
41
  "render-mermaid.md": "# RenderMermaid\n\n`RenderMermaid` is an optional built-in tool that renders Mermaid source to terminal-friendly text.\n\n## Enable it\n\nDisabled by default. Turn it on in `/settings` under **Tools → Render Mermaid**, or in `~/.omp/agent/config.yml`:\n\n```yaml\nrenderMermaid:\n enabled: true\n```\n\n## What it does\n\n- Tool name: `render_mermaid`\n- Input: Mermaid source in the required `mermaid` field\n- Output: rendered ASCII/Unicode text, not SVG or PNG\n- Storage: when artifact storage is available, the full render is also saved as an `artifact://...`\n\nThere are no model-specific or environment-variable prerequisites. Once enabled, any model that can call built-in tools can use it.\n\n## Parameters\n\n```json\n{\n \"mermaid\": \"graph TD\\n A[Start] --> B[Stop]\",\n \"config\": {\n \"useAscii\": false,\n \"paddingX\": 2,\n \"paddingY\": 2,\n \"boxBorderPadding\": 0\n }\n}\n```\n\nAvailable `config` fields:\n\n- `useAscii` — `true` for plain ASCII, `false` for Unicode box-drawing characters (default and usually more readable)\n- `paddingX` — horizontal spacing between nodes\n- `paddingY` — vertical spacing between nodes\n- `boxBorderPadding` — inner padding inside node boxes\n\n## Current limitations\n\n`RenderMermaid` uses the `beautiful-mermaid` ASCII renderer. It works best for flowcharts and small diagrams.\n\nComplex sequence diagrams, especially with `alt` / `else` blocks, can become very wide in a terminal. That is current renderer behavior, not a provider or model configuration problem.\n\nIf a sequence diagram is hard to read:\n\n1. Keep Unicode output (`useAscii: false`)\n2. Reduce spacing with a tighter config such as `paddingX: 2`, `paddingY: 2`, `boxBorderPadding: 0`\n3. Prefer smaller sub-diagrams over one large sequence diagram\n4. Open the saved artifact if the inline preview is truncated in the TUI\n\n## Example\n\nInput:\n\n```mermaid\ngraph TD\n A[Start] --> B{Decision}\n B -->|Yes| C[Action]\n B -->|No| D[End]\n```\n\nTypical result:\n\n```text\n┌─────┐\n│Start│\n└─────┘\n │\n ▼\n┌────────┐\n│Decision│\n└────────┘\n```\n",
42
42
  "resolve-tool-runtime.md": "# Resolve tool runtime internals\n\nThis document explains how preview/apply workflows are modeled in coding-agent and how built-in or custom tools can participate via the tool-choice queue and `pushPendingAction`.\n\n## Scope and key files\n\n- [`src/tools/resolve.ts`](../packages/coding-agent/src/tools/resolve.ts)\n- [`src/tools/ast-edit.ts`](../packages/coding-agent/src/tools/ast-edit.ts)\n- [`src/extensibility/custom-tools/types.ts`](../packages/coding-agent/src/extensibility/custom-tools/types.ts)\n- [`src/extensibility/custom-tools/loader.ts`](../packages/coding-agent/src/extensibility/custom-tools/loader.ts)\n- [`src/sdk.ts`](../packages/coding-agent/src/sdk.ts)\n\n## What `resolve` does\n\n`resolve` is a hidden tool that finalizes a pending preview action.\n\n- `action: \"apply\"` executes the queued action's `apply(reason)` callback and returns that result with resolve metadata.\n- `action: \"discard\"` invokes `reject(reason)` if provided; otherwise returns `Discarded: <label>. Reason: <reason>`.\n\nIf no pending action exists, `resolve` fails with:\n\n- `No pending action to resolve. Nothing to apply or discard.`\n\n## Pending actions use the tool-choice queue\n\nPreview producers call `queueResolveHandler(...)`, which pushes a one-shot forced `resolve` directive onto the session tool-choice queue and adds a `resolve-reminder` steering message.\n\nRuntime behavior:\n\n- the queued handler owns the pending `apply`/`reject` callbacks,\n- `resolve` looks up the current queue invoker with `session.peekQueueInvoker()`,\n- if the model rejects the forced tool choice, the queue directive is requeued,\n- `resolve` does not maintain a separate pending-action stack.\n\nMultiple pending previews therefore follow the active tool-choice queue ordering, not an independent pending-action store.\n\n## Built-in producer example (`ast_edit`)\n\n`ast_edit` previews structural replacements first. When the preview has replacements and is not applied yet, it queues a resolve handler that contains:\n\n- label (human-readable summary)\n- `sourceToolName` (`ast_edit`)\n- `apply(reason: string)` callback that reruns AST edit with `dryRun: false`\n\n`resolve(action=\"apply\", reason=\"...\")` passes `reason` into this callback.\n\n## Custom tools: `pushPendingAction`\n\nCustom tools can register resolve-compatible pending actions through `CustomToolAPI.pushPendingAction(...)`. The custom tool loader forwards these actions to `queueResolveHandler(...)` when that hook is available.\n\n`CustomToolPendingAction`:\n\n- `label: string` (required)\n- `apply(reason: string): Promise<AgentToolResult<unknown>>` (required) — invoked on apply; `reason` is the string passed to `resolve`\n- `reject?(reason: string): Promise<AgentToolResult<unknown> | undefined>` (optional) — invoked on discard; return value replaces the default \"Discarded\" message if provided\n- `details?: unknown` exists on the public custom-tool type but is not currently forwarded by the loader into resolve metadata\n- `sourceToolName?: string` (optional, defaults to `\"custom_tool\"`)\n\n### Minimal usage example\n\n```ts\nimport type { CustomToolFactory } from \"@oh-my-pi/pi-coding-agent\";\n\nconst factory: CustomToolFactory = (pi) => ({\n name: \"batch_rename_preview\",\n label: \"Batch Rename Preview\",\n description: \"Previews renames and defers commit to resolve\",\n parameters: pi.typebox.Type.Object({\n files: pi.typebox.Type.Array(pi.typebox.Type.String()),\n }),\n\n async execute(_toolCallId, params) {\n const previewSummary = `Prepared rename plan for ${params.files.length} files`;\n\n pi.pushPendingAction({\n label: `Batch rename: ${params.files.length} files`,\n sourceToolName: \"batch_rename_preview\",\n apply: async (reason) => {\n // apply writes here\n return {\n content: [\n { type: \"text\", text: `Applied batch rename. Reason: ${reason}` },\n ],\n };\n },\n reject: async (reason) => {\n // optional: cleanup or notify on discard\n return {\n content: [\n { type: \"text\", text: `Discarded batch rename. Reason: ${reason}` },\n ],\n };\n },\n });\n\n return {\n content: [\n {\n type: \"text\",\n text: `${previewSummary}. Call resolve to apply or discard.`,\n },\n ],\n };\n },\n});\n\nexport default factory;\n```\n\n## Runtime availability and failures\n\n`pushPendingAction` is wired by the custom tool loader through the active session's resolve queue hook.\n\nIf the runtime did not provide the resolve queue hook, `pushPendingAction` throws:\n\n- `Pending action store unavailable for custom tools in this runtime.`\n\n## Tool-choice behavior\n\nWhen `queueResolveHandler(...)` registers a preview, the agent runtime forces a one-shot `resolve` tool choice so pending previews are explicitly finalized before normal tool flow continues.\n\n## Developer guidance\n\n- Use pending actions only for destructive or high-impact operations that should support explicit apply/discard.\n- Keep `label` concise and specific; it is shown in resolve renderer output.\n- Ensure `apply(reason)` is deterministic and idempotent enough for one-shot execution; `reason` is informational and should not change behavior.\n- Implement `reject(reason)` when the discard needs cleanup (temp state, locks, notifications); omit it for stateless previews where the default message suffices.\n- If your tool can stage multiple previews, remember they are mediated by the tool-choice queue rather than a separate pending-action stack.\n",
43
- "rpc.md": "# RPC Protocol Reference\n\nRPC mode runs the coding agent as a newline-delimited JSON protocol over stdio.\n\n- **stdin**: commands (`RpcCommand`), extension UI responses, and host-tool updates/results\n- **stdout**: a ready frame, command responses (`RpcResponse`), session/agent events, extension UI requests, host-tool requests/cancellations\n\nPrimary implementation:\n\n- `src/modes/rpc/rpc-mode.ts`\n- `src/modes/rpc/rpc-types.ts`\n- `src/session/agent-session.ts`\n- `packages/agent/src/agent.ts`\n- `packages/agent/src/agent-loop.ts`\n\n## Startup\n\n```bash\nomp --mode rpc [regular CLI options]\n```\n\nBehavior notes:\n\n- `@file` CLI arguments are rejected in RPC mode.\n- RPC mode disables automatic session title generation by default to avoid an extra model call.\n- RPC mode resets workflow-altering `todo.*`, `task.*`, `async.*`, and `bash.autoBackground.*` settings to their built-in defaults instead of inheriting user overrides.\n- The process reads stdin as JSONL (`readJsonl(Bun.stdin.stream())`).\n- At startup it writes `{ \"type\": \"ready\" }` before processing commands.\n- When stdin closes, pending host-tool calls are rejected and the process exits with code `0`.\n- Responses/events are written as one JSON object per line.\n\n## Transport and Framing\n\nEach frame is a single JSON object followed by `\\n`.\n\nThere is no envelope beyond the object shape itself.\n\n### Outbound frame categories (stdout)\n\n1. Ready frame (`{ type: \"ready\" }`)\n2. `RpcResponse` (`{ type: \"response\", ... }`)\n3. `AgentSessionEvent` objects (`agent_start`, `message_update`, etc.)\n4. `RpcExtensionUIRequest` (`{ type: \"extension_ui_request\", ... }`)\n5. Host tool requests/cancellations (`host_tool_call`, `host_tool_cancel`)\n6. Extension errors (`{ type: \"extension_error\", extensionPath, event, error }`)\n\n### Inbound frame categories (stdin)\n\n1. `RpcCommand`\n2. `RpcExtensionUIResponse` (`{ type: \"extension_ui_response\", ... }`)\n3. Host tool updates/results (`host_tool_update`, `host_tool_result`)\n\n## Request/Response Correlation\n\nAll commands accept optional `id?: string`.\n\n- If provided, normal command responses echo the same `id`.\n- `RpcClient` relies on this for pending-request resolution.\n\nImportant edge behavior from runtime:\n\n- Unknown command responses are emitted with `id: undefined` (even if the request had an `id`).\n- Parse/handler exceptions in the input loop emit `command: \"parse\"` with `id: undefined`.\n- `prompt` and `abort_and_prompt` return immediate success, then may emit a later error response with the **same** id if async prompt scheduling fails.\n\n## Command Schema (canonical)\n\n`RpcCommand` is defined in `src/modes/rpc/rpc-types.ts`:\n\n### Prompting\n\n- `{ id?, type: \"prompt\", message: string, images?: ImageContent[], streamingBehavior?: \"steer\" | \"followUp\" }`\n- `{ id?, type: \"steer\", message: string, images?: ImageContent[] }`\n- `{ id?, type: \"follow_up\", message: string, images?: ImageContent[] }`\n- `{ id?, type: \"abort\" }`\n- `{ id?, type: \"abort_and_prompt\", message: string, images?: ImageContent[] }`\n- `{ id?, type: \"new_session\", parentSession?: string }`\n\n### State\n\n- `{ id?, type: \"get_state\" }`\n- `{ id?, type: \"set_todos\", phases: TodoPhase[] }`\n- `{ id?, type: \"set_host_tools\", tools: RpcHostToolDefinition[] }`\n\n### Model\n\n- `{ id?, type: \"set_model\", provider: string, modelId: string }`\n- `{ id?, type: \"cycle_model\" }`\n- `{ id?, type: \"get_available_models\" }`\n\n### Thinking\n\n- `{ id?, type: \"set_thinking_level\", level: ThinkingLevel }`\n- `{ id?, type: \"cycle_thinking_level\" }`\n\n### Queue modes\n\n- `{ id?, type: \"set_steering_mode\", mode: \"all\" | \"one-at-a-time\" }`\n- `{ id?, type: \"set_follow_up_mode\", mode: \"all\" | \"one-at-a-time\" }`\n- `{ id?, type: \"set_interrupt_mode\", mode: \"immediate\" | \"wait\" }`\n\n### Compaction\n\n- `{ id?, type: \"compact\", customInstructions?: string }`\n- `{ id?, type: \"set_auto_compaction\", enabled: boolean }`\n\n### Retry\n\n- `{ id?, type: \"set_auto_retry\", enabled: boolean }`\n- `{ id?, type: \"abort_retry\" }`\n\n### Bash\n\n- `{ id?, type: \"bash\", command: string }`\n- `{ id?, type: \"abort_bash\" }`\n\n### Session\n\n- `{ id?, type: \"get_session_stats\" }`\n- `{ id?, type: \"export_html\", outputPath?: string }`\n- `{ id?, type: \"switch_session\", sessionPath: string }`\n- `{ id?, type: \"branch\", entryId: string }`\n- `{ id?, type: \"get_branch_messages\" }`\n- `{ id?, type: \"get_last_assistant_text\" }`\n- `{ id?, type: \"set_session_name\", name: string }`\n\n### Messages\n\n- `{ id?, type: \"get_messages\" }`\n\n## Response Schema\n\nAll command results use `RpcResponse`:\n\n- Success: `{ id?, type: \"response\", command: <command>, success: true, data?: ... }`\n- Failure: `{ id?, type: \"response\", command: string, success: false, error: string }`\n\nData payloads are command-specific and defined in `rpc-types.ts`.\n\n### `get_state` payload\n\n```json\n{\n \"model\": { \"provider\": \"...\", \"id\": \"...\" },\n \"thinkingLevel\": \"off|minimal|low|medium|high|xhigh\",\n \"isStreaming\": false,\n \"isCompacting\": false,\n \"steeringMode\": \"all|one-at-a-time\",\n \"followUpMode\": \"all|one-at-a-time\",\n \"interruptMode\": \"immediate|wait\",\n \"sessionFile\": \"...\",\n \"sessionId\": \"...\",\n \"sessionName\": \"...\",\n \"autoCompactionEnabled\": true,\n \"messageCount\": 0,\n \"queuedMessageCount\": 0,\n \"todoPhases\": [\n {\n \"id\": \"phase-1\",\n \"name\": \"Todos\",\n \"tasks\": [\n {\n \"id\": \"task-1\",\n \"content\": \"Map the tool surface\",\n \"status\": \"in_progress\"\n }\n ]\n }\n ],\n \"systemPrompt\": \"...\",\n \"dumpTools\": [\n {\n \"name\": \"read\",\n \"description\": \"Read files and URLs\",\n \"parameters\": {}\n }\n ]\n}\n```\n\n### `set_todos` payload\n\nReplaces the in-memory todo state for the current session and returns the normalized phase list:\n\n```json\n{\n \"id\": \"req_2\",\n \"type\": \"set_todos\",\n \"phases\": [\n {\n \"id\": \"phase-1\",\n \"name\": \"Evaluation\",\n \"tasks\": [\n {\n \"id\": \"task-1\",\n \"content\": \"Map the read tool surface\",\n \"status\": \"in_progress\"\n },\n {\n \"id\": \"task-2\",\n \"content\": \"Exercise edit operations\",\n \"status\": \"pending\"\n }\n ]\n }\n ]\n}\n```\n\nThis is useful for hosts that want to pre-seed a plan before the first prompt.\n\n### `set_host_tools` payload\n\nReplaces the current set of host-owned tools that the RPC server may call back\ninto over stdio:\n\n```json\n{\n \"id\": \"req_3\",\n \"type\": \"set_host_tools\",\n \"tools\": [\n {\n \"name\": \"echo_host\",\n \"label\": \"Echo Host\",\n \"description\": \"Echo a value from the embedding host\",\n \"parameters\": {\n \"type\": \"object\",\n \"properties\": {\n \"message\": { \"type\": \"string\" }\n },\n \"required\": [\"message\"],\n \"additionalProperties\": false\n }\n }\n ]\n}\n```\n\nThe response payload is:\n\n```json\n{\n \"toolNames\": [\"echo_host\"]\n}\n```\n\nThese tools are added to the active session tool registry before the next model\ncall. Re-sending `set_host_tools` replaces the previous host-owned set.\n\n## Event Stream Schema\n\nRPC mode forwards `AgentSessionEvent` objects from `AgentSession.subscribe(...)`.\n\nCommon event types:\n\n- `agent_start`, `agent_end`\n- `turn_start`, `turn_end`\n- `message_start`, `message_update`, `message_end`\n- `tool_execution_start`, `tool_execution_update`, `tool_execution_end`\n- `auto_compaction_start`, `auto_compaction_end`\n- `auto_retry_start`, `auto_retry_end`\n- `ttsr_triggered`\n- `todo_reminder`\n- `todo_auto_clear`\n\nExtension runner errors are emitted separately as:\n\n```json\n{\n \"type\": \"extension_error\",\n \"extensionPath\": \"...\",\n \"event\": \"...\",\n \"error\": \"...\"\n}\n```\n\n`message_update` includes streaming deltas in `assistantMessageEvent` (text/thinking/toolcall deltas).\n\n## Prompt/Queue Concurrency and Ordering\n\nThis is the most important operational behavior.\n\n### Immediate ack vs completion\n\n`prompt` and `abort_and_prompt` are **acknowledged immediately**:\n\n```json\n{ \"id\": \"req_1\", \"type\": \"response\", \"command\": \"prompt\", \"success\": true }\n```\n\nThat means:\n\n- command acceptance != run completion\n- final completion is observed via `agent_end`\n\n### While streaming\n\n`AgentSession.prompt()` requires `streamingBehavior` during active streaming:\n\n- `\"steer\"` => queued steering message (interrupt path)\n- `\"followUp\"` => queued follow-up message (post-turn path)\n\nIf omitted during streaming, prompt fails.\n\n### Queue defaults\n\nFrom `packages/agent/src/agent.ts` defaults:\n\n- `steeringMode`: `\"one-at-a-time\"`\n- `followUpMode`: `\"one-at-a-time\"`\n- `interruptMode`: `\"immediate\"`\n\n### Mode semantics\n\n- `set_steering_mode` / `set_follow_up_mode`\n - `\"one-at-a-time\"`: dequeue one queued message per turn\n - `\"all\"`: dequeue entire queue at once\n- `set_interrupt_mode`\n - `\"immediate\"`: tool execution checks steering between tool calls; pending steering can abort remaining tool calls in the turn\n - `\"wait\"`: defer steering until turn completion\n\n## Extension UI Sub-Protocol\n\nExtensions in RPC mode use request/response UI frames.\n\n### Outbound request\n\n`RpcExtensionUIRequest` (`type: \"extension_ui_request\"`) methods:\n\n- `select`, `confirm`, `input`, `editor`, `cancel`\n- `notify`, `setStatus`, `setWidget`, `setTitle`, `set_editor_text`\n\nRuntime note:\n\n- Automatic session title generation is disabled in RPC mode, and `setTitle` UI\n requests are also suppressed by default because most hosts do not have a\n meaningful terminal-title surface. Set `PI_RPC_EMIT_TITLE=1` to opt back in to\n the UI event only.\n\nExample:\n\n```json\n{\n \"type\": \"extension_ui_request\",\n \"id\": \"123\",\n \"method\": \"confirm\",\n \"title\": \"Confirm\",\n \"message\": \"Continue?\",\n \"timeout\": 30000\n}\n```\n\n### Inbound response\n\n`RpcExtensionUIResponse` (`type: \"extension_ui_response\"`):\n\n- `{ type: \"extension_ui_response\", id: string, value: string }`\n- `{ type: \"extension_ui_response\", id: string, confirmed: boolean }`\n- `{ type: \"extension_ui_response\", id: string, cancelled: true, timedOut?: boolean }`\n\nIf a dialog has a timeout, RPC mode resolves to a default value when timeout/abort fires.\n\n## Host Tool Sub-Protocol\n\nRPC hosts can expose custom tools to the agent by sending `set_host_tools`, then\nserving execution requests over the same transport.\n\n### Outbound request\n\nWhen the agent wants the host to execute one of those tools, RPC mode emits:\n\n```json\n{\n \"type\": \"host_tool_call\",\n \"id\": \"host_1\",\n \"toolCallId\": \"toolu_123\",\n \"toolName\": \"echo_host\",\n \"arguments\": { \"message\": \"hello\" }\n}\n```\n\nIf the tool execution is later aborted, RPC mode emits:\n\n```json\n{\n \"type\": \"host_tool_cancel\",\n \"id\": \"host_cancel_1\",\n \"targetId\": \"host_1\"\n}\n```\n\n### Inbound updates and completion\n\nHosts can optionally stream progress:\n\n```json\n{\n \"type\": \"host_tool_update\",\n \"id\": \"host_1\",\n \"partialResult\": {\n \"content\": [{ \"type\": \"text\", \"text\": \"working\" }]\n }\n}\n```\n\nCompletion uses:\n\n```json\n{\n \"type\": \"host_tool_result\",\n \"id\": \"host_1\",\n \"result\": {\n \"content\": [{ \"type\": \"text\", \"text\": \"done\" }]\n }\n}\n```\n\nSet top-level `isError: true` on `host_tool_result` to reject the pending host tool call and surface the returned text content as a tool error.\n\n## Error Model and Recoverability\n\n### Command-level failures\n\nFailures are `success: false` with string `error`.\n\n```json\n{\n \"id\": \"req_2\",\n \"type\": \"response\",\n \"command\": \"set_model\",\n \"success\": false,\n \"error\": \"Model not found: provider/model\"\n}\n```\n\n### Recoverability expectations\n\n- Most command failures are recoverable; process remains alive.\n- Malformed JSONL / parse-loop exceptions emit a `parse` error response and continue reading subsequent lines.\n- Empty `set_session_name` is rejected (`Session name cannot be empty`).\n- Extension UI responses with unknown `id` are ignored.\n- Process termination conditions are stdin close or explicit extension-triggered shutdown after the current command.\n\n## Compact Command Flows\n\n### 1) Prompt and stream\n\nstdin:\n\n```json\n{ \"id\": \"req_1\", \"type\": \"prompt\", \"message\": \"Summarize this repo\" }\n```\n\nstdout sequence (typical):\n\n```json\n{ \"id\": \"req_1\", \"type\": \"response\", \"command\": \"prompt\", \"success\": true }\n{ \"type\": \"agent_start\" }\n{ \"type\": \"message_update\", \"assistantMessageEvent\": { \"type\": \"text_delta\", \"delta\": \"...\" }, \"message\": { \"role\": \"assistant\", \"content\": [] } }\n{ \"type\": \"agent_end\", \"messages\": [] }\n```\n\n### 2) Prompt during streaming with explicit queue policy\n\nstdin:\n\n```json\n{\n \"id\": \"req_2\",\n \"type\": \"prompt\",\n \"message\": \"Also include risks\",\n \"streamingBehavior\": \"followUp\"\n}\n```\n\n### 3) Inspect and tune queue behavior\n\nstdin:\n\n```json\n{ \"id\": \"q1\", \"type\": \"get_state\" }\n{ \"id\": \"q2\", \"type\": \"set_steering_mode\", \"mode\": \"all\" }\n{ \"id\": \"q3\", \"type\": \"set_interrupt_mode\", \"mode\": \"wait\" }\n```\n\n### 4) Extension UI round trip\n\nstdout:\n\n```json\n{\n \"type\": \"extension_ui_request\",\n \"id\": \"ui_7\",\n \"method\": \"input\",\n \"title\": \"Branch name\",\n \"placeholder\": \"feature/...\"\n}\n```\n\nstdin:\n\n```json\n{ \"type\": \"extension_ui_response\", \"id\": \"ui_7\", \"value\": \"feature/rpc-host\" }\n```\n\n## Notes on `RpcClient` helper\n\n`src/modes/rpc/rpc-client.ts` is a convenience wrapper, not the protocol definition.\n\nCurrent helper characteristics:\n\n- Spawns `bun <cliPath> --mode rpc`\n- Correlates responses by generated `req_<n>` ids\n- Dispatches only recognized `AgentEvent` types to listeners\n- Supports host-owned custom tools via `setCustomTools()` and automatic handling of `host_tool_call` / `host_tool_cancel`\n- Does **not** expose helper methods for every protocol command (for example, `set_interrupt_mode` and `set_session_name` are in protocol types but not wrapped as dedicated methods)\n\nUse raw protocol frames if you need complete surface coverage.\n",
43
+ "rpc.md": "# RPC Protocol Reference\n\nRPC mode runs the coding agent as a newline-delimited JSON protocol over stdio.\n\n- **stdin**: commands (`RpcCommand`), extension UI responses, and host-tool updates/results\n- **stdout**: a ready frame, command responses (`RpcResponse`), session/agent events, extension UI requests, host-tool requests/cancellations\n\nPrimary implementation:\n\n- `src/modes/rpc/rpc-mode.ts`\n- `src/modes/rpc/rpc-types.ts`\n- `src/session/agent-session.ts`\n- `packages/agent/src/agent.ts`\n- `packages/agent/src/agent-loop.ts`\n\n## Startup\n\n```bash\nomp --mode rpc [regular CLI options]\n```\n\nBehavior notes:\n\n- `@file` CLI arguments are rejected in RPC mode.\n- RPC mode disables automatic session title generation by default to avoid an extra model call.\n- RPC mode resets workflow-altering `todo.*`, `task.*`, `async.*`, and `bash.autoBackground.*` settings to their built-in defaults instead of inheriting user overrides.\n- The process reads stdin as JSONL (`readJsonl(Bun.stdin.stream())`).\n- At startup it writes `{ \"type\": \"ready\" }` before processing commands.\n- When stdin closes, pending host-tool calls are rejected and the process exits with code `0`.\n- Responses/events are written as one JSON object per line.\n\n## Transport and Framing\n\nEach frame is a single JSON object followed by `\\n`.\n\nThere is no envelope beyond the object shape itself.\n\n### Outbound frame categories (stdout)\n\n1. Ready frame (`{ type: \"ready\" }`)\n2. `RpcResponse` (`{ type: \"response\", ... }`)\n3. `AgentSessionEvent` objects (`agent_start`, `message_update`, etc.)\n4. `RpcExtensionUIRequest` (`{ type: \"extension_ui_request\", ... }`)\n5. Host tool requests/cancellations (`host_tool_call`, `host_tool_cancel`)\n6. Host URI requests/cancellations (`host_uri_request`, `host_uri_cancel`)\n7. Extension errors (`{ type: \"extension_error\", extensionPath, event, error }`)\n\n### Inbound frame categories (stdin)\n\n1. `RpcCommand`\n2. `RpcExtensionUIResponse` (`{ type: \"extension_ui_response\", ... }`)\n3. Host tool updates/results (`host_tool_update`, `host_tool_result`)\n4. Host URI results (`host_uri_result`)\n\n## Request/Response Correlation\n\nAll commands accept optional `id?: string`.\n\n- If provided, normal command responses echo the same `id`.\n- `RpcClient` relies on this for pending-request resolution.\n\nImportant edge behavior from runtime:\n\n- Unknown command responses are emitted with `id: undefined` (even if the request had an `id`).\n- Parse/handler exceptions in the input loop emit `command: \"parse\"` with `id: undefined`.\n- `prompt` and `abort_and_prompt` return immediate success, then may emit a later error response with the **same** id if async prompt scheduling fails.\n\n## Command Schema (canonical)\n\n`RpcCommand` is defined in `src/modes/rpc/rpc-types.ts`:\n\n### Prompting\n\n- `{ id?, type: \"prompt\", message: string, images?: ImageContent[], streamingBehavior?: \"steer\" | \"followUp\" }`\n- `{ id?, type: \"steer\", message: string, images?: ImageContent[] }`\n- `{ id?, type: \"follow_up\", message: string, images?: ImageContent[] }`\n- `{ id?, type: \"abort\" }`\n- `{ id?, type: \"abort_and_prompt\", message: string, images?: ImageContent[] }`\n- `{ id?, type: \"new_session\", parentSession?: string }`\n\n### State\n\n- `{ id?, type: \"get_state\" }`\n- `{ id?, type: \"set_todos\", phases: TodoPhase[] }`\n- `{ id?, type: \"set_host_tools\", tools: RpcHostToolDefinition[] }`\n- `{ id?, type: \"set_host_uri_schemes\", schemes: RpcHostUriSchemeDefinition[] }`\n\n### Model\n\n- `{ id?, type: \"set_model\", provider: string, modelId: string }`\n- `{ id?, type: \"cycle_model\" }`\n- `{ id?, type: \"get_available_models\" }`\n\n### Thinking\n\n- `{ id?, type: \"set_thinking_level\", level: ThinkingLevel }`\n- `{ id?, type: \"cycle_thinking_level\" }`\n\n### Queue modes\n\n- `{ id?, type: \"set_steering_mode\", mode: \"all\" | \"one-at-a-time\" }`\n- `{ id?, type: \"set_follow_up_mode\", mode: \"all\" | \"one-at-a-time\" }`\n- `{ id?, type: \"set_interrupt_mode\", mode: \"immediate\" | \"wait\" }`\n\n### Compaction\n\n- `{ id?, type: \"compact\", customInstructions?: string }`\n- `{ id?, type: \"set_auto_compaction\", enabled: boolean }`\n\n### Retry\n\n- `{ id?, type: \"set_auto_retry\", enabled: boolean }`\n- `{ id?, type: \"abort_retry\" }`\n\n### Bash\n\n- `{ id?, type: \"bash\", command: string }`\n- `{ id?, type: \"abort_bash\" }`\n\n### Session\n\n- `{ id?, type: \"get_session_stats\" }`\n- `{ id?, type: \"export_html\", outputPath?: string }`\n- `{ id?, type: \"switch_session\", sessionPath: string }`\n- `{ id?, type: \"branch\", entryId: string }`\n- `{ id?, type: \"get_branch_messages\" }`\n- `{ id?, type: \"get_last_assistant_text\" }`\n- `{ id?, type: \"set_session_name\", name: string }`\n\n### Messages\n\n- `{ id?, type: \"get_messages\" }`\n\n## Response Schema\n\nAll command results use `RpcResponse`:\n\n- Success: `{ id?, type: \"response\", command: <command>, success: true, data?: ... }`\n- Failure: `{ id?, type: \"response\", command: string, success: false, error: string }`\n\nData payloads are command-specific and defined in `rpc-types.ts`.\n\n### `get_state` payload\n\n```json\n{\n \"model\": { \"provider\": \"...\", \"id\": \"...\" },\n \"thinkingLevel\": \"off|minimal|low|medium|high|xhigh\",\n \"isStreaming\": false,\n \"isCompacting\": false,\n \"steeringMode\": \"all|one-at-a-time\",\n \"followUpMode\": \"all|one-at-a-time\",\n \"interruptMode\": \"immediate|wait\",\n \"sessionFile\": \"...\",\n \"sessionId\": \"...\",\n \"sessionName\": \"...\",\n \"autoCompactionEnabled\": true,\n \"messageCount\": 0,\n \"queuedMessageCount\": 0,\n \"todoPhases\": [\n {\n \"id\": \"phase-1\",\n \"name\": \"Todos\",\n \"tasks\": [\n {\n \"id\": \"task-1\",\n \"content\": \"Map the tool surface\",\n \"status\": \"in_progress\"\n }\n ]\n }\n ],\n \"systemPrompt\": \"...\",\n \"dumpTools\": [\n {\n \"name\": \"read\",\n \"description\": \"Read files and URLs\",\n \"parameters\": {}\n }\n ]\n}\n```\n\n### `set_todos` payload\n\nReplaces the in-memory todo state for the current session and returns the normalized phase list:\n\n```json\n{\n \"id\": \"req_2\",\n \"type\": \"set_todos\",\n \"phases\": [\n {\n \"id\": \"phase-1\",\n \"name\": \"Evaluation\",\n \"tasks\": [\n {\n \"id\": \"task-1\",\n \"content\": \"Map the read tool surface\",\n \"status\": \"in_progress\"\n },\n {\n \"id\": \"task-2\",\n \"content\": \"Exercise edit operations\",\n \"status\": \"pending\"\n }\n ]\n }\n ]\n}\n```\n\nThis is useful for hosts that want to pre-seed a plan before the first prompt.\n\n### `set_host_tools` payload\n\nReplaces the current set of host-owned tools that the RPC server may call back\ninto over stdio:\n\n```json\n{\n \"id\": \"req_3\",\n \"type\": \"set_host_tools\",\n \"tools\": [\n {\n \"name\": \"echo_host\",\n \"label\": \"Echo Host\",\n \"description\": \"Echo a value from the embedding host\",\n \"parameters\": {\n \"type\": \"object\",\n \"properties\": {\n \"message\": { \"type\": \"string\" }\n },\n \"required\": [\"message\"],\n \"additionalProperties\": false\n }\n }\n ]\n}\n```\n\nThe response payload is:\n\n```json\n{\n \"toolNames\": [\"echo_host\"]\n}\n```\n\nThese tools are added to the active session tool registry before the next model\ncall. Re-sending `set_host_tools` replaces the previous host-owned set.\n\n### `set_host_uri_schemes` payload\n\nReplaces the current set of host-owned URL schemes the RPC server should\ndispatch reads/writes through:\n\n```json\n{\n \"id\": \"req_4\",\n \"type\": \"set_host_uri_schemes\",\n \"schemes\": [\n {\n \"scheme\": \"db\",\n \"description\": \"Virtual db row files\",\n \"writable\": true,\n \"immutable\": false\n }\n ]\n}\n```\n\nThe response payload is:\n\n```json\n{\n \"schemes\": [\"db\"]\n}\n```\n\nSchemes are case-insensitive on the wire and normalized to lowercase before\nthe response is sent. Re-sending `set_host_uri_schemes` replaces the entire\nprevious set — schemes missing from the new list are unregistered.\n\n## Event Stream Schema\n\nRPC mode forwards `AgentSessionEvent` objects from `AgentSession.subscribe(...)`.\n\nCommon event types:\n\n- `agent_start`, `agent_end`\n- `turn_start`, `turn_end`\n- `message_start`, `message_update`, `message_end`\n- `tool_execution_start`, `tool_execution_update`, `tool_execution_end`\n- `auto_compaction_start`, `auto_compaction_end`\n- `auto_retry_start`, `auto_retry_end`\n- `ttsr_triggered`\n- `todo_reminder`\n- `todo_auto_clear`\n\nExtension runner errors are emitted separately as:\n\n```json\n{\n \"type\": \"extension_error\",\n \"extensionPath\": \"...\",\n \"event\": \"...\",\n \"error\": \"...\"\n}\n```\n\n`message_update` includes streaming deltas in `assistantMessageEvent` (text/thinking/toolcall deltas).\n\n## Prompt/Queue Concurrency and Ordering\n\nThis is the most important operational behavior.\n\n### Immediate ack vs completion\n\n`prompt` and `abort_and_prompt` are **acknowledged immediately**:\n\n```json\n{ \"id\": \"req_1\", \"type\": \"response\", \"command\": \"prompt\", \"success\": true }\n```\n\nThat means:\n\n- command acceptance != run completion\n- final completion is observed via `agent_end`\n\n### While streaming\n\n`AgentSession.prompt()` requires `streamingBehavior` during active streaming:\n\n- `\"steer\"` => queued steering message (interrupt path)\n- `\"followUp\"` => queued follow-up message (post-turn path)\n\nIf omitted during streaming, prompt fails.\n\n### Queue defaults\n\nFrom `packages/agent/src/agent.ts` defaults:\n\n- `steeringMode`: `\"one-at-a-time\"`\n- `followUpMode`: `\"one-at-a-time\"`\n- `interruptMode`: `\"immediate\"`\n\n### Mode semantics\n\n- `set_steering_mode` / `set_follow_up_mode`\n - `\"one-at-a-time\"`: dequeue one queued message per turn\n - `\"all\"`: dequeue entire queue at once\n- `set_interrupt_mode`\n - `\"immediate\"`: tool execution checks steering between tool calls; pending steering can abort remaining tool calls in the turn\n - `\"wait\"`: defer steering until turn completion\n\n## Extension UI Sub-Protocol\n\nExtensions in RPC mode use request/response UI frames.\n\n### Outbound request\n\n`RpcExtensionUIRequest` (`type: \"extension_ui_request\"`) methods:\n\n- `select`, `confirm`, `input`, `editor`, `cancel`\n- `notify`, `setStatus`, `setWidget`, `setTitle`, `set_editor_text`\n\nRuntime note:\n\n- Automatic session title generation is disabled in RPC mode, and `setTitle` UI\n requests are also suppressed by default because most hosts do not have a\n meaningful terminal-title surface. Set `PI_RPC_EMIT_TITLE=1` to opt back in to\n the UI event only.\n\nExample:\n\n```json\n{\n \"type\": \"extension_ui_request\",\n \"id\": \"123\",\n \"method\": \"confirm\",\n \"title\": \"Confirm\",\n \"message\": \"Continue?\",\n \"timeout\": 30000\n}\n```\n\n### Inbound response\n\n`RpcExtensionUIResponse` (`type: \"extension_ui_response\"`):\n\n- `{ type: \"extension_ui_response\", id: string, value: string }`\n- `{ type: \"extension_ui_response\", id: string, confirmed: boolean }`\n- `{ type: \"extension_ui_response\", id: string, cancelled: true, timedOut?: boolean }`\n\nIf a dialog has a timeout, RPC mode resolves to a default value when timeout/abort fires.\n\n## Host Tool Sub-Protocol\n\nRPC hosts can expose custom tools to the agent by sending `set_host_tools`, then\nserving execution requests over the same transport.\n\n### Outbound request\n\nWhen the agent wants the host to execute one of those tools, RPC mode emits:\n\n```json\n{\n \"type\": \"host_tool_call\",\n \"id\": \"host_1\",\n \"toolCallId\": \"toolu_123\",\n \"toolName\": \"echo_host\",\n \"arguments\": { \"message\": \"hello\" }\n}\n```\n\nIf the tool execution is later aborted, RPC mode emits:\n\n```json\n{\n \"type\": \"host_tool_cancel\",\n \"id\": \"host_cancel_1\",\n \"targetId\": \"host_1\"\n}\n```\n\n### Inbound updates and completion\n\nHosts can optionally stream progress:\n\n```json\n{\n \"type\": \"host_tool_update\",\n \"id\": \"host_1\",\n \"partialResult\": {\n \"content\": [{ \"type\": \"text\", \"text\": \"working\" }]\n }\n}\n```\n\nCompletion uses:\n\n```json\n{\n \"type\": \"host_tool_result\",\n \"id\": \"host_1\",\n \"result\": {\n \"content\": [{ \"type\": \"text\", \"text\": \"done\" }]\n }\n}\n```\n\nSet top-level `isError: true` on `host_tool_result` to reject the pending host tool call and surface the returned text content as a tool error.\n\n## Host URI Sub-Protocol\n\nRPC hosts can also own custom URL schemes (virtual files). After\n`set_host_uri_schemes`, every read of `<scheme>://…` and write of\n`<scheme>://…` (when registered as `writable`) is bounced back to the host\nover the same transport.\n\n### Outbound request\n\nWhen a session tool resolves a host-owned URL, RPC mode emits:\n\n```json\n{\n \"type\": \"host_uri_request\",\n \"id\": \"uri_1\",\n \"operation\": \"read\",\n \"url\": \"db://users/42\"\n}\n```\n\nWrites look the same with `\"operation\": \"write\"` and an additional\n`\"content\": \"...\"` field carrying the full replacement bytes.\n\nIf the request is later aborted (caller cancels, session ends), RPC mode\nemits:\n\n```json\n{\n \"type\": \"host_uri_cancel\",\n \"id\": \"uri_cancel_1\",\n \"targetId\": \"uri_1\"\n}\n```\n\n### Inbound result\n\nFor successful reads:\n\n```json\n{\n \"type\": \"host_uri_result\",\n \"id\": \"uri_1\",\n \"content\": \"id=42\\nname=Alice\\n\",\n \"contentType\": \"text/plain\",\n \"notes\": [\"fresh from cache\"],\n \"immutable\": false\n}\n```\n\nFor successful writes, omit content:\n\n```json\n{ \"type\": \"host_uri_result\", \"id\": \"uri_1\" }\n```\n\nTo reject the request, set `isError: true` and either populate `error` with\na message or fall back to `content` for textual error surfacing:\n\n```json\n{\n \"type\": \"host_uri_result\",\n \"id\": \"uri_1\",\n \"isError\": true,\n \"error\": \"row 42 not found\"\n}\n```\n\n### Constraints\n\n- The agent's `edit` tool does not target host URIs. Hosts that want to\n mutate virtual files expose `write` and let the model use the `write` tool\n with replacement content.\n- Schemes are global to the process; `set_host_uri_schemes` replaces the\n previous set, unregistering anything not in the new list.\n- Schemes are normalized to lowercase before registration.\n\n## Error Model and Recoverability\n\n### Command-level failures\n\nFailures are `success: false` with string `error`.\n\n```json\n{\n \"id\": \"req_2\",\n \"type\": \"response\",\n \"command\": \"set_model\",\n \"success\": false,\n \"error\": \"Model not found: provider/model\"\n}\n```\n\n### Recoverability expectations\n\n- Most command failures are recoverable; process remains alive.\n- Malformed JSONL / parse-loop exceptions emit a `parse` error response and continue reading subsequent lines.\n- Empty `set_session_name` is rejected (`Session name cannot be empty`).\n- Extension UI responses with unknown `id` are ignored.\n- Process termination conditions are stdin close or explicit extension-triggered shutdown after the current command.\n\n## Compact Command Flows\n\n### 1) Prompt and stream\n\nstdin:\n\n```json\n{ \"id\": \"req_1\", \"type\": \"prompt\", \"message\": \"Summarize this repo\" }\n```\n\nstdout sequence (typical):\n\n```json\n{ \"id\": \"req_1\", \"type\": \"response\", \"command\": \"prompt\", \"success\": true }\n{ \"type\": \"agent_start\" }\n{ \"type\": \"message_update\", \"assistantMessageEvent\": { \"type\": \"text_delta\", \"delta\": \"...\" }, \"message\": { \"role\": \"assistant\", \"content\": [] } }\n{ \"type\": \"agent_end\", \"messages\": [] }\n```\n\n### 2) Prompt during streaming with explicit queue policy\n\nstdin:\n\n```json\n{\n \"id\": \"req_2\",\n \"type\": \"prompt\",\n \"message\": \"Also include risks\",\n \"streamingBehavior\": \"followUp\"\n}\n```\n\n### 3) Inspect and tune queue behavior\n\nstdin:\n\n```json\n{ \"id\": \"q1\", \"type\": \"get_state\" }\n{ \"id\": \"q2\", \"type\": \"set_steering_mode\", \"mode\": \"all\" }\n{ \"id\": \"q3\", \"type\": \"set_interrupt_mode\", \"mode\": \"wait\" }\n```\n\n### 4) Extension UI round trip\n\nstdout:\n\n```json\n{\n \"type\": \"extension_ui_request\",\n \"id\": \"ui_7\",\n \"method\": \"input\",\n \"title\": \"Branch name\",\n \"placeholder\": \"feature/...\"\n}\n```\n\nstdin:\n\n```json\n{ \"type\": \"extension_ui_response\", \"id\": \"ui_7\", \"value\": \"feature/rpc-host\" }\n```\n\n## Notes on `RpcClient` helper\n\n`src/modes/rpc/rpc-client.ts` is a convenience wrapper, not the protocol definition.\n\nCurrent helper characteristics:\n\n- Spawns `bun <cliPath> --mode rpc`\n- Correlates responses by generated `req_<n>` ids\n- Dispatches only recognized `AgentEvent` types to listeners\n- Supports host-owned custom tools via `setCustomTools()` and automatic handling of `host_tool_call` / `host_tool_cancel`\n- Does **not** expose helper methods for every protocol command (for example, `set_interrupt_mode` and `set_session_name` are in protocol types but not wrapped as dedicated methods)\n\nUse raw protocol frames if you need complete surface coverage.\n",
44
44
  "rulebook-matching-pipeline.md": "# Rulebook Matching Pipeline\n\nThis document describes how coding-agent discovers rules from supported config formats, normalizes them into a single `Rule` shape, resolves precedence conflicts, and splits the result into:\n\n- **Rulebook rules** (available to the model via system prompt + `rule://` URLs)\n- **TTSR rules** (time-travel stream interruption rules)\n\nIt reflects the current implementation, including partial semantics and metadata that is parsed but not enforced.\n\n## Implementation files\n\n- [`../src/capability/rule.ts`](../packages/coding-agent/src/capability/rule.ts)\n- [`../src/capability/index.ts`](../packages/coding-agent/src/capability/index.ts)\n- [`../src/discovery/index.ts`](../packages/coding-agent/src/discovery/index.ts)\n- [`../src/discovery/helpers.ts`](../packages/coding-agent/src/discovery/helpers.ts)\n- [`../src/discovery/builtin.ts`](../packages/coding-agent/src/discovery/builtin.ts)\n- [`../src/discovery/cursor.ts`](../packages/coding-agent/src/discovery/cursor.ts)\n- [`../src/discovery/windsurf.ts`](../packages/coding-agent/src/discovery/windsurf.ts)\n- [`../src/discovery/cline.ts`](../packages/coding-agent/src/discovery/cline.ts)\n- [`../src/sdk.ts`](../packages/coding-agent/src/sdk.ts)\n- [`../src/system-prompt.ts`](../packages/coding-agent/src/system-prompt.ts)\n- [`../src/internal-urls/rule-protocol.ts`](../packages/coding-agent/src/internal-urls/rule-protocol.ts)\n- [`../src/utils/frontmatter.ts`](../packages/coding-agent/src/utils/frontmatter.ts)\n\n## 1. Canonical rule shape\n\nAll providers normalize source files into `Rule`:\n\n```ts\ninterface Rule {\n name: string;\n path: string;\n content: string;\n globs?: string[];\n alwaysApply?: boolean;\n description?: string;\n condition?: string[];\n scope?: string[];\n interruptMode?: \"never\" | \"prose-only\" | \"tool-only\" | \"always\";\n _source: SourceMeta;\n}\n```\n\nCapability identity is `rule.name` (`ruleCapability.key = rule => rule.name`).\n\nConsequence: precedence and deduplication are **name-based only**. Two different files with the same `name` are considered the same logical rule.\n\n## 2. Discovery sources and normalization\n\n`src/discovery/index.ts` auto-registers providers. For `rules`, current providers are:\n\n- `native` (priority `100`)\n- `cursor` (priority `50`)\n- `windsurf` (priority `50`)\n- `cline` (priority `40`)\n\n### Native provider (`builtin.ts`)\n\nLoads `.omp` rules from:\n\n- project: `<cwd>/.omp/rules/*.{md,mdc}`\n- user: `~/.omp/agent/rules/*.{md,mdc}`\n\nNormalization:\n\n- `name` = filename without `.md`/`.mdc`\n- frontmatter parsed via `parseFrontmatter`\n- `content` = body (frontmatter stripped)\n- `globs`, `alwaysApply`, `description`, `condition`/legacy `ttsr_trigger`, `scope`, and `interruptMode` are parsed by `buildRuleFromMarkdown`\n\nImportant caveat: `condition` values that look like file globs are converted into `tool:edit(...)` / `tool:write(...)` scope shorthands with catch-all condition `.*`.\n\n### Cursor provider (`cursor.ts`)\n\nLoads from:\n\n- user: `~/.cursor/rules/*.{mdc,md}`\n- project: `<cwd>/.cursor/rules/*.{mdc,md}`\n\nNormalization (`transformMDCRule`):\n\n- `description`: kept only if string\n- `alwaysApply`: only `true` is preserved (`false` becomes `undefined`)\n- `globs`: accepts array (string elements only) or single string\n- `condition`/legacy `ttsr_trigger`, `scope`, and `interruptMode` are parsed by shared rule helpers\n- `name` from filename without extension\n\n### Windsurf provider (`windsurf.ts`)\n\nLoads from:\n\n- user: `~/.codeium/windsurf/memories/global_rules.md` (fixed rule name `global_rules`)\n- project: `<cwd>/.windsurf/rules/*.md`\n\nNormalization:\n\n- `globs`: array-of-string or single string\n- `alwaysApply`, `description`, `condition`/legacy `ttsr_trigger`, `scope`, and `interruptMode` parsed by shared rule helpers\n- `name` is fixed to `global_rules` for the user global file and derived from filename for project rules\n\n### Cline provider (`cline.ts`)\n\nSearches upward from `cwd` for nearest `.clinerules`:\n\n- if directory: loads `*.md` inside it\n- if file: loads single file as rule named `clinerules`\n\nNormalization:\n\n- `globs`: array-of-string or single string\n- `alwaysApply`, `description`, `condition`/legacy `ttsr_trigger`, `scope`, and `interruptMode` parsed by shared rule helpers\n- `name` is fixed to `clinerules` for a `.clinerules` file and derived from filename for `.clinerules/*.md`\n\n## 3. Frontmatter parsing behavior and ambiguity\n\nAll providers use `parseFrontmatter` (`utils/frontmatter.ts`) with these semantics:\n\n1. Frontmatter is parsed only when content starts with `---` and has a closing `\\n---`.\n2. Body is trimmed after frontmatter extraction.\n3. If YAML parse fails:\n - warning is logged,\n - parser falls back to simple `key: value` line parsing (`^(\\w+):\\s*(.*)$`).\n\nAmbiguity consequences:\n\n- Fallback parser does not support arrays, nested objects, quoting rules, or hyphenated keys.\n- Fallback values become strings (for example `alwaysApply: true` becomes string `\"true\"`), so providers requiring boolean/string types may drop metadata.\n- `ttsr_trigger` works in fallback (underscore key); keys like `thinking-level` would not.\n- Files without valid frontmatter still load as rules with empty metadata and full content body.\n\n## 4. Provider precedence and deduplication\n\n`loadCapability(\"rules\")` (`capability/index.ts`) merges provider outputs and then deduplicates by `rule.name`.\n\n### Precedence model\n\n- Providers are ordered by priority descending.\n- Equal priority keeps registration order (`cursor` before `windsurf` from `discovery/index.ts`).\n- Dedup is first-wins: first encountered rule name is kept; later same-name items are marked `_shadowed` in `all` and excluded from `items`.\n\nEffective rule provider order is currently:\n\n1. `native` (100)\n2. `cursor` (50)\n3. `windsurf` (50)\n4. `cline` (40)\n\n### Intra-provider ordering caveat\n\nWithin a provider, item order comes from `loadFilesFromDir` glob result ordering plus explicit push order. This is deterministic enough for normal use but not explicitly sorted in code.\n\nNotable source-order differences:\n\n- `native` appends project then user config dirs.\n- `cursor` appends user then project results.\n- `windsurf` appends user `global_rules` first, then project rules.\n- `cline` loads only nearest `.clinerules` source.\n\n## 5. Split into Rulebook, Always-Apply, and TTSR buckets\n\nAfter rule discovery in `createAgentSession` (`sdk.ts`):\n\n1. All discovered rules are scanned.\n2. Rules with `condition` entries are registered into `TtsrManager`; legacy `ttsr_trigger` / `ttsrTrigger` are accepted during rule parsing as condition fallbacks.\n3. A separate `rulebookRules` list is built with this predicate:\n\n```ts\n!isTtsrRule && rule.alwaysApply !== true && !!rule.description;\n```\n\n4. An `alwaysApplyRules` list is built:\n\n```ts\n!isTtsrRule && rule.alwaysApply === true;\n```\n\n### Bucket behavior\n\n- **TTSR bucket**: any rule with a non-empty parsed `condition` that `TtsrManager.addRule(...)` accepts. Takes priority over other buckets.\n- **Always-apply bucket**: `alwaysApply === true`, not TTSR. Full content injected into system prompt. Resolvable via `rule://`.\n- **Rulebook bucket**: must have description, must not be TTSR, must not be `alwaysApply`. Listed in system prompt by name+description; content read on demand via `rule://`.\n- A rule with both `condition` and `alwaysApply` goes to TTSR only (TTSR takes priority).\n- A rule with both `alwaysApply` and `description` goes to always-apply only (not rulebook).\n\n## 6. How metadata affects runtime surfaces\n\n### `description`\n\n- Required for inclusion in rulebook.\n- Rendered in system prompt `<rules>` block.\n- Missing description means rule is not available via `rule://` and not listed in system prompt rules.\n\n### `globs`\n\n- Carried through on `Rule`.\n- Rendered as `<glob>...</glob>` entries in the system prompt rules block.\n- Exposed in rules UI state (`extensions` mode list).\n- **Not enforced for automatic matching in this pipeline.** There is no runtime glob matcher selecting rules by current file/tool target.\n\n### `alwaysApply`\n\n- Parsed and preserved by providers.\n- Used in UI display (`\"always\"` trigger label in extensions state manager).\n- Used as an exclusion condition from `rulebookRules`.\n- **Full rule content is auto-injected into the system prompt** (before the rulebook rules section).\n- Rule is also addressable via `rule://<name>` for re-reading.\n\n### `condition`, `scope`, and `interruptMode`\n\n- `condition` is the current TTSR trigger field; legacy `ttsr_trigger` / `ttsrTrigger` are accepted as fallback inputs during parsing.\n- `scope` narrows TTSR matching scope. A condition token that looks like a file glob becomes `tool:edit(<glob>)` and `tool:write(<glob>)` scope entries plus catch-all condition `.*`.\n- `interruptMode` can override the global TTSR interrupt mode for the rule.\n\n## 7. System prompt inclusion path\n\n`buildSystemPromptInternal` receives both `rules` (rulebook) and `alwaysApplyRules`.\n\nAlways-apply rules are rendered first, injecting their raw content directly into the prompt.\n\nRulebook rules are rendered in a `# Rules` section with:\n\n- `Read rule://<name> when working in matching domain`\n- Each rule's `name`, `description`, and optional `<glob>` list\n\nThis is advisory/contextual: prompt text asks the model to read applicable rules, but code does not enforce glob applicability.\n\n## 8. `rule://` internal URL behavior\n\n`RuleProtocolHandler` is registered with:\n\n```ts\nnew RuleProtocolHandler({\n getRules: () => [...rulebookRules, ...alwaysApplyRules],\n});\n```\n\nImplications:\n\n- `rule://<name>` resolves against both **rulebookRules** and **alwaysApplyRules**.\n- TTSR-only rules and rules with no description and no `alwaysApply` are not addressable via `rule://`.\n- Resolution is exact name match.\n- Unknown names return error listing available rule names.\n- Returned content is raw `rule.content` (frontmatter stripped), content type `text/markdown`.\n\n## 9. Known partial / non-enforced semantics\n\n1. Provider descriptions mention legacy files (`.cursorrules`, `.windsurfrules`), but current loader code paths do not actually read those files.\n2. `globs` metadata is surfaced to prompt/UI but not enforced by rule selection logic.\n3. Rule selection for `rule://` includes rulebook and always-apply rules, but not TTSR-only rules.\n4. Discovery warnings (`loadCapability(\"rules\").warnings`) are produced but `createAgentSession` does not currently surface/log them in this path.\n",
45
45
  "sdk.md": "# SDK\n\nThe SDK is the in-process integration surface for `@oh-my-pi/pi-coding-agent`.\nUse it when you want direct access to agent state, event streaming, tool wiring, and session control from your own Bun/Node process.\n\nIf you need cross-language/process isolation, use RPC mode instead.\n\n## Installation\n\n```bash\nbun add @oh-my-pi/pi-coding-agent\n```\n\n## Entry points\n\n`@oh-my-pi/pi-coding-agent` exports the SDK APIs from the package root (and also via `@oh-my-pi/pi-coding-agent/sdk`).\n\nCore exports for embedders:\n\n- `createAgentSession`\n- `SessionManager`\n- `Settings`\n- `AuthStorage`\n- `ModelRegistry`\n- `discoverAuthStorage`\n- Discovery helpers (`discoverExtensions`, `discoverSkills`, `discoverContextFiles`, `discoverPromptTemplates`, `discoverSlashCommands`, `discoverCustomTSCommands`, `discoverMCPServers`)\n- Tool factory surface (`createTools`, `BUILTIN_TOOLS`, tool classes)\n\n## Quick start (auto-discovery defaults)\n\n```ts\nimport { createAgentSession } from \"@oh-my-pi/pi-coding-agent\";\n\nconst { session, modelFallbackMessage } = await createAgentSession();\n\nif (modelFallbackMessage) {\n process.stderr.write(`${modelFallbackMessage}\\n`);\n}\n\nconst unsubscribe = session.subscribe((event) => {\n if (\n event.type === \"message_update\" &&\n event.assistantMessageEvent.type === \"text_delta\"\n ) {\n process.stdout.write(event.assistantMessageEvent.delta);\n }\n});\n\nawait session.prompt(\"Summarize this repository in 3 bullets.\");\nunsubscribe();\nawait session.dispose();\n```\n\n## What `createAgentSession()` discovers by default\n\n`createAgentSession()` follows “provide to override, omit to discover”.\n\nIf omitted, it resolves:\n\n- `cwd`: `getProjectDir()`\n- `agentDir`: `~/.omp/agent` (via `getAgentDir()`)\n- `authStorage`: `discoverAuthStorage(agentDir)`\n- `modelRegistry`: `new ModelRegistry(authStorage)` + background `refreshInBackground()` when the registry is not provided\n- `settings`: `await Settings.init({ cwd, agentDir })`\n- `sessionManager`: `SessionManager.create(cwd)` (file-backed)\n- skills/context files/prompt templates/slash commands/extensions/custom TS commands\n- built-in tools via `createTools(...)`\n- MCP tools (enabled by default)\n- LSP integration (enabled by default)\n- `eventBus`: new `EventBus()` unless supplied\n\n### Required vs optional inputs\n\nTypically you must provide only what you want to control:\n\n- **Must provide**: nothing for a minimal session\n- **Usually provide explicitly** in embedders:\n - `sessionManager` (if you need in-memory or custom location)\n - `authStorage` + `modelRegistry` (if you own credential/model lifecycle)\n - `model` or `modelPattern` (if deterministic model selection matters)\n - `settings` (if you need isolated/test config)\n\n## Session manager behavior (persistent vs in-memory)\n\n`AgentSession` always uses a `SessionManager`; behavior depends on which factory you use.\n\n### File-backed (default)\n\n```ts\nimport { createAgentSession, SessionManager } from \"@oh-my-pi/pi-coding-agent\";\n\nconst { session } = await createAgentSession({\n sessionManager: SessionManager.create(process.cwd()),\n});\n\nconsole.log(session.sessionFile); // absolute .jsonl path\n```\n\n- Persists conversation/messages/state deltas to session files.\n- Supports resume/open/list/fork workflows.\n- `session.sessionFile` is defined.\n\n### In-memory\n\n```ts\nimport { createAgentSession, SessionManager } from \"@oh-my-pi/pi-coding-agent\";\n\nconst { session } = await createAgentSession({\n sessionManager: SessionManager.inMemory(),\n});\n\nconsole.log(session.sessionFile); // undefined\n```\n\n- No filesystem persistence.\n- Useful for tests, ephemeral workers, request-scoped agents.\n- Session methods still work, but persistence-specific behaviors (file resume/fork paths) are naturally limited.\n\n### Resume/open/list helpers\n\n```ts\nimport { SessionManager } from \"@oh-my-pi/pi-coding-agent\";\n\nconst recent = await SessionManager.continueRecent(process.cwd());\nconst listed = await SessionManager.list(process.cwd());\nconst opened = listed[0] ? await SessionManager.open(listed[0].path) : null;\n```\n\n## Model and auth wiring\n\n`createAgentSession()` uses `ModelRegistry` + `AuthStorage` for model selection and API key resolution.\n\n### Explicit wiring\n\n```ts\nimport {\n createAgentSession,\n discoverAuthStorage,\n ModelRegistry,\n SessionManager,\n} from \"@oh-my-pi/pi-coding-agent\";\n\nconst authStorage = await discoverAuthStorage();\nconst modelRegistry = new ModelRegistry(authStorage);\nawait modelRegistry.refresh();\n\nconst available = modelRegistry.getAvailable();\nif (available.length === 0)\n throw new Error(\"No authenticated models available\");\n\nconst { session } = await createAgentSession({\n authStorage,\n modelRegistry,\n model: available[0],\n thinkingLevel: \"medium\",\n sessionManager: SessionManager.inMemory(),\n});\n```\n\n### Selection order when `model` is omitted\n\nWhen no explicit `model`/`modelPattern` is provided:\n\n1. restore model from existing session (if restorable + key available)\n2. settings default model role (`default`)\n3. first available model with valid auth\n\nIf restore fails, `modelFallbackMessage` explains fallback.\n\n### Auth priority\n\n`AuthStorage.getApiKey(...)` resolves in this order:\n\n1. runtime override (`setRuntimeApiKey`)\n2. stored credentials in `agent.db`\n3. provider environment variables\n4. custom-provider resolver fallback (if configured)\n\n## Event subscription model\n\nSubscribe with `session.subscribe(listener)`; it returns an unsubscribe function.\n\n```ts\nconst unsubscribe = session.subscribe((event) => {\n switch (event.type) {\n case \"agent_start\":\n case \"turn_start\":\n case \"tool_execution_start\":\n break;\n case \"message_update\":\n if (event.assistantMessageEvent.type === \"text_delta\") {\n process.stdout.write(event.assistantMessageEvent.delta);\n }\n break;\n }\n});\n```\n\n`AgentSessionEvent` includes core `AgentEvent` plus session-level events:\n\n- `auto_compaction_start` / `auto_compaction_end`\n- `auto_retry_start` / `auto_retry_end`\n- `retry_fallback_applied` / `retry_fallback_succeeded`\n- `ttsr_triggered`\n- `todo_reminder` / `todo_auto_clear`\n- `irc_message`\n\n## Prompt lifecycle\n\n`session.prompt(text, options?)` is the primary entry point.\n\nBehavior:\n\n1. optional command/template expansion (`/` commands, custom commands, file slash commands, prompt templates)\n2. if currently streaming:\n - requires `streamingBehavior: \"steer\" | \"followUp\"`\n - queues instead of throwing work away\n3. if idle:\n - validates model + API key\n - appends user message\n - starts agent turn\n\nRelated APIs:\n\n- `sendUserMessage(content, { deliverAs? })`\n- `steer(text, images?)`\n- `followUp(text, images?)`\n- `sendCustomMessage({ customType, content, ... }, { deliverAs?, triggerTurn? })`\n- `abort()`\n\n## Tools and extension integration\n\n### Built-ins and filtering\n\n- Built-ins come from `createTools(...)` and `BUILTIN_TOOLS`.\n- `toolNames` acts as an allowlist for built-ins.\n- `customTools` and extension-registered tools are still included.\n- Hidden tools (for example `yield`) are opt-in unless required by options.\n\n```ts\nconst { session } = await createAgentSession({\n toolNames: [\"read\", \"grep\", \"find\", \"write\"],\n requireYieldTool: true,\n});\n```\n\n### Extensions\n\n- `extensions`: inline `ExtensionFactory[]`\n- `additionalExtensionPaths`: load extra extension files\n- `disableExtensionDiscovery`: disable automatic extension scanning\n- `preloadedExtensions`: reuse already loaded extension set\n\n### Runtime tool set changes\n\n`AgentSession` supports runtime activation updates:\n\n- `getActiveToolNames()`\n- `getAllToolNames()`\n- `setActiveToolsByName(names)`\n- `refreshMCPTools(mcpTools)`\n\nSystem prompt is rebuilt to reflect active tool changes.\n\n## Discovery helpers\n\nUse these when you want partial control without recreating internal discovery logic:\n\n- `discoverAuthStorage(agentDir?)`\n- `discoverExtensions(cwd?)`\n- `discoverSkills(cwd?, _agentDir?, settings?)`\n- `discoverContextFiles(cwd?, _agentDir?)`\n- `discoverPromptTemplates(cwd?, agentDir?)`\n- `discoverSlashCommands(cwd?)`\n- `discoverCustomTSCommands(cwd?, agentDir?)`\n- `discoverMCPServers(cwd?)`\n- `buildSystemPrompt(options?)`\n\n## Subagent-oriented options\n\nFor SDK consumers building orchestrators (similar to task executor flow):\n\n- `outputSchema`: passes structured output expectation into tool context\n- `requireYieldTool`: forces `yield` tool inclusion\n- `taskDepth`: recursion-depth context for nested task sessions\n- `parentTaskPrefix`: artifact naming prefix for nested task outputs\n\nThese are optional for normal single-agent embedding.\n\n## `createAgentSession()` return value\n\n```ts\ntype CreateAgentSessionResult = {\n session: AgentSession;\n extensionsResult: LoadExtensionsResult;\n setToolUIContext: (uiContext: ExtensionUIContext, hasUI: boolean) => void;\n mcpManager?: MCPManager;\n modelFallbackMessage?: string;\n lspServers?: Array<{\n name: string;\n status: \"ready\" | \"error\";\n fileTypes: string[];\n error?: string;\n }>;\n eventBus: EventBus;\n};\n```\n\nUse `setToolUIContext(...)` only if your embedder provides UI capabilities that tools/extensions should call into.\n\n## Minimal controlled embed example\n\n```ts\nimport {\n createAgentSession,\n discoverAuthStorage,\n ModelRegistry,\n SessionManager,\n Settings,\n} from \"@oh-my-pi/pi-coding-agent\";\n\nconst authStorage = await discoverAuthStorage();\nconst modelRegistry = new ModelRegistry(authStorage);\nawait modelRegistry.refresh();\n\nconst settings = Settings.isolated({\n \"compaction.enabled\": true,\n \"retry.enabled\": true,\n});\n\nconst { session } = await createAgentSession({\n authStorage,\n modelRegistry,\n settings,\n sessionManager: SessionManager.inMemory(),\n toolNames: [\"read\", \"grep\", \"find\", \"edit\", \"write\"],\n enableMCP: false,\n enableLsp: true,\n});\n\nsession.subscribe((event) => {\n if (\n event.type === \"message_update\" &&\n event.assistantMessageEvent.type === \"text_delta\"\n ) {\n process.stdout.write(event.assistantMessageEvent.delta);\n }\n});\n\nawait session.prompt(\"Find all TODO comments in this repo and propose fixes.\");\nawait session.dispose();\n```\n",
46
46
  "secrets.md": "# Secret Obfuscation\n\nPrevents sensitive values (API keys, tokens, passwords) from being sent to LLM providers. When enabled, secrets are replaced with deterministic placeholders before leaving the process, and restored in tool call arguments returned by the model.\n\n## Enabling\n\nDisabled by default. Toggle via `/settings` UI or directly in `config.yml`:\n\n```yaml\nsecrets:\n enabled: true\n```\n\n## How it works\n\n1. On session startup, secrets are collected from two sources:\n - **Environment variables** whose names match common secret patterns (`KEY`, `SECRET`, `TOKEN`, `PASSWORD`, `PASS`, `AUTH`, `CREDENTIAL`, `PRIVATE`, `OAUTH`) with values >= 8 characters\n - **`secrets.yml` files** (see below)\n\n2. Outbound text messages to the LLM have secret values replaced with deterministic placeholders like `#AB12#`.\n\n3. Session context/tool arguments returned from the model are deep-walked and obfuscation placeholders are restored to original values before display or execution.\n\nTwo modes control what happens to each secret:\n\n| Mode | Behavior | Reversible |\n| --------------------- | ------------------------------------------------------- | ----------------------------------------------- |\n| `obfuscate` (default) | Replaced with deterministic placeholder `#[A-Z0-9]{4}#` | Yes (deobfuscated in tool args/session context) |\n| `replace` | Replaced with deterministic same-length string | No (one-way) |\n\n## secrets.yml\n\nDefine custom secret entries in YAML. Two locations are checked:\n\n| Level | Path | Purpose |\n| ------- | -------------------------- | --------------------------- |\n| Global | `~/.omp/agent/secrets.yml` | Secrets across all projects |\n| Project | `<cwd>/.omp/secrets.yml` | Project-specific secrets |\n\nProject entries override global entries with matching `content`.\n\n### Schema\n\nEach entry in the array has these fields:\n\n| Field | Type | Required | Description |\n| ------------- | ---------------------------- | -------- | ------------------------------------------------- |\n| `type` | `\"plain\"` or `\"regex\"` | Yes | Match strategy |\n| `content` | string | Yes | The secret value (plain) or regex pattern (regex) |\n| `mode` | `\"obfuscate\"` or `\"replace\"` | No | Default: `\"obfuscate\"` |\n| `replacement` | string | No | Custom replacement (replace mode only) |\n| `flags` | string | No | Regex flags (regex type only) |\n\n### Examples\n\n#### Plain secrets\n\n```yaml\n# Obfuscate a specific API key (default mode)\n- type: plain\n content: sk-proj-abc123def456\n\n# Replace a database password with a fixed string\n- type: plain\n content: hunter2\n mode: replace\n replacement: \"********\"\n```\n\n#### Regex secrets\n\n```yaml\n# Obfuscate any AWS-style key\n- type: regex\n content: \"AKIA[0-9A-Z]{16}\"\n\n# Case-insensitive match with explicit flags\n- type: regex\n content: \"api[_-]?key\\\\s*=\\\\s*\\\\w+\"\n flags: \"i\"\n\n# Regex literal syntax (pattern and flags in one string)\n- type: regex\n content: \"/bearer\\\\s+[a-zA-Z0-9._~+\\\\/=-]+/i\"\n```\n\nRegex entries always scan globally (the `g` flag is enforced automatically). The regex literal syntax `/pattern/flags` is supported as an alternative to separate `content` + `flags` fields. Escaped slashes within the pattern (`\\\\/`) are handled correctly.\n\n#### Replace mode with regex\n\n```yaml\n# One-way replace connection strings (not reversible)\n- type: regex\n content: \"postgres://[^\\\\s]+\"\n mode: replace\n replacement: \"postgres://***\"\n```\n\n## Interaction with env var detection\n\nEnvironment variables are collected first, then file-defined entries are appended. File entries can cover secrets that don't live in env vars (config files, hardcoded values, etc.). If the same plain value appears in both env and file entries, the env entry's obfuscate-mode mapping is used first.\n\n## Key files\n\n- `packages/coding-agent/src/secrets/index.ts` -- loading, merging, env var collection\n- `packages/coding-agent/src/secrets/obfuscator.ts` -- `SecretObfuscator` class, placeholder generation, message obfuscation\n- `packages/coding-agent/src/secrets/regex.ts` -- regex literal parsing and compilation\n- `packages/coding-agent/src/config/settings-schema.ts` -- `secrets.enabled` setting definition\n",
@@ -61,21 +61,20 @@ export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
61
61
  "tools/ask.md": "# ask\n\n> Prompts the interactive user for one or more choices or free-form answers.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/ask.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/ask.md`\n- Key collaborators:\n - `packages/coding-agent/src/config/settings-schema.ts` — `ask.timeout` / `ask.notify` defaults\n - `packages/coding-agent/src/modes/theme/theme.ts` — checkbox and tree glyphs for TUI rendering\n - `packages/coding-agent/src/tui.ts` — status-line rendering\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `questions` | `Question[]` | Yes | One or more questions. Empty arrays are rejected by schema and also guarded at runtime. |\n\n### `Question`\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `id` | `string` | Yes | Stable identifier used in multi-question results. |\n| `question` | `string` | Yes | Prompt text shown to the user. |\n| `options` | `{ label: string }[]` | Yes | Explicit options. The UI always appends `Other (type your own)`; callers must not include it. |\n| `multi` | `boolean` | No | Enables multi-select mode. Default: `false`. |\n| `recommended` | `number` | No | Zero-based recommended option index. In single-select mode the label gets ` (Recommended)` appended in the UI. |\n\n## Outputs\n- Single-shot result.\n- `content[0].text` is plain text:\n - single question: `User selected: ...` and/or `User provided custom input: ...`\n - multiple questions: `User answers:` followed by one line per `id`\n- `details`:\n - single question: `{ question, options, multi, selectedOptions, customInput? }`\n - multiple questions: `{ results: QuestionResult[] }`, where each item includes `id`, `question`, `options`, `multi`, `selectedOptions`, and optional `customInput`\n- Cancellation and headless cases throw instead of returning a structured success result.\n\n## Flow\n1. `AskTool.createIf()` only registers the tool when `session.hasUI` is true; headless sessions never get it.\n2. `execute()` requires `context.ui`; if missing it aborts the context and throws `ToolAbortError(\"Ask tool requires interactive mode\")`.\n3. It reads `ask.timeout` from settings, converts seconds to milliseconds, and disables timeout entirely while plan mode is enabled (`packages/coding-agent/src/tools/ask.ts`).\n4. If `ask.notify` is not `off`, it sends a terminal notification: `Waiting for input`.\n5. For each question, `askSingleQuestion()` drives either:\n - single-select list + optional editor for `Other`\n - multi-select checkbox loop + `Done selecting` sentinel + optional editor for `Other`\n6. In multi-question mode, left/right arrow handlers enable back/forward navigation between questions and preserve prior selections.\n7. If a timeout fires before any selection/custom input, the tool auto-selects the recommended option, or the first option when no valid `recommended` index exists.\n8. If the user cancels without timeout, `execute()` aborts the tool context and throws `ToolAbortError(\"Ask tool was cancelled by the user\")`.\n9. On success it formats human-readable text plus structured `details`; the TUI renderer uses `details` for rich display.\n\n## Modes / Variants\n- Single question: returns flattened `details` fields for one question.\n- Multiple questions: returns `details.results[]` and allows back/forward navigation across questions.\n- Single-select: one option or custom input.\n- Multi-select: toggled checkbox list, `Done selecting` sentinel only when forward navigation is not active.\n\n## Side Effects\n- User-visible prompts / interactive UI\n - Opens a selection dialog via `context.ui.select(...)`.\n - Opens a text editor dialog via `context.ui.editor(...)` for `Other`.\n - Sends a terminal notification unless `ask.notify=off`.\n- Session state\n - Reads plan-mode state to disable timeouts.\n - Calls `context.abort()` on headless use or user cancellation.\n- Background work / cancellation\n - Wraps UI waits in `untilAborted(...)` so abort signals interrupt pending dialogs.\n\n## Limits & Caps\n- `questions` must contain at least 1 item (`askSchema` in `packages/coding-agent/src/tools/ask.ts`).\n- `ask.timeout` default is `30` seconds; `0` disables timeout (`packages/coding-agent/src/config/settings-schema.ts`).\n- Prompt guidance says provide 2-5 options, but code does not enforce that (`packages/coding-agent/src/prompts/tools/ask.md`).\n- Timeout only applies to the option picker; once the user chooses `Other`, the editor has no timeout (`packages/coding-agent/src/prompts/tools/ask.md`).\n\n## Errors\n- Missing interactive UI: throws `ToolAbortError(\"Ask tool requires interactive mode\")`.\n- User cancels picker/editor without timeout: throws `ToolAbortError(\"Ask tool was cancelled by the user\")`.\n- Abort signal during input: converted to `ToolAbortError(\"Ask input was cancelled\")`.\n- Empty `questions` at runtime returns a text error payload instead of throwing: `Error: questions must not be empty`.\n\n## Notes\n- `recommended` is only a UI hint; invalid indexes are ignored.\n- In single-select mode the returned `selectedOptions` value strips the appended ` (Recommended)` suffix.\n- Multi-select results preserve selection order by `Set` insertion order, not original option order after arbitrary toggles.\n- Option labels and prompt text are returned verbatim in `details`; the tool does not interpret them beyond UI affordances like `Other` and ` (Recommended)`.\n",
62
62
  "tools/ast-edit.md": "# ast_edit\n\n> Preview and apply structural rewrites over source files via native ast-grep.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/ast-edit.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/ast-edit.md`\n- Key collaborators:\n - `crates/pi-natives/src/ast.rs` — native rewrite planning and file mutation\n - `crates/pi-natives/src/language/mod.rs` — language aliases and extension inference\n - `packages/coding-agent/src/tools/path-utils.ts` — path/glob parsing and multi-path resolution\n - `packages/coding-agent/src/tools/resolve.ts` — preview/apply queueing\n - `packages/coding-agent/src/tools/render-utils.ts` — parse-error dedupe and display caps\n - `packages/coding-agent/src/utils/file-display-mode.ts` — hashline vs line-number diff references\n - `packages/coding-agent/src/hashline/hash.ts` — stable hashline diff anchors\n - `packages/natives/native/index.d.ts` — JS-visible native binding contract\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `ops` | `{ pat: string; out: string }[]` | Yes | One or more rewrite rules. `pat` must be non-empty. Duplicate `pat` values fail before native execution. Empty `out` deletes the matched node. |\n| `paths` | `string[]` | Yes | One or more files, directories, globs, or internal URLs with backing files. Empty entries are rejected. Globs are forbidden for internal URLs. |\n\nShared AST pattern grammar and language catalog: see [`ast_grep`](./ast-grep.md#inputs).\n\n- `ast_edit` uses the same `$NAME`, `$_`, `$$$NAME`, and `$$$` metavariable semantics.\n- The tool prompt adds rewrite-specific constraints:\n - metavariable names must be uppercase and must stand for whole AST nodes,\n - captures from `pat` are substituted into `out`,\n - each rewrite is a 1:1 structural substitution; one capture cannot expand into multiple sibling nodes unless the grammar itself permits that expansion at that position.\n\n## Outputs\n- Single-shot preview result from `ast_edit` itself.\n- Model-facing `content` is one text block showing proposed edits, grouped by file for directory/multi-file runs.\n - Each change renders as two lines: `-REF|before` and `+REF|after` in hashline mode, or `-LINE:COLUMN before` / `+LINE:COLUMN after` when hashlines are off.\n - Only the first line of each `before`/`after` snippet is shown, truncated to 120 characters in the wrapper.\n - `Limit reached; narrow paths.` and formatted parse issues are appended when applicable.\n- If no rewrites match, text is `No replacements made` plus formatted parse issues when present.\n- `details` includes aggregate preview metadata:\n - `totalReplacements`, `filesTouched`, `filesSearched`, `applied`, `limitReached`\n - optional `parseErrors`, `scopePath`, `files`, `fileReplacements`, `displayContent`, `meta`\n- The tool always previews first (`applied: false` in the direct result). Actual file writes happen only later through `resolve(action: \"apply\", ...)`.\n- When preview produced replacements, `ast_edit` also queues a pending `resolve` action. Successful apply returns a separate `resolve` result, not another `ast_edit` result.\n\n## Flow\n1. `AstEditTool.execute()` validates each op in `packages/coding-agent/src/tools/ast-edit.ts`:\n - empty `pat` fails,\n - at least one op is required,\n - duplicate `pat` values fail,\n - ops are converted to a `Record<pattern, replacement>`.\n2. The wrapper reads `PI_MAX_AST_FILES` via `$envpos(..., 1000)` and uses that as the native `maxFiles` cap for both preview and apply.\n3. Path normalization, internal URL handling, missing-path partitioning, and multi-path resolution follow the same `path-utils.ts` flow as `ast_grep`.\n4. The wrapper stats the resolved base path to decide whether to render grouped directory output.\n5. `runAstEditOnce(...)` always runs native `astEdit(...)` with `dryRun: true` and `failOnParseError: false` on the first pass.\n6. Native `ast_edit` in `crates/pi-natives/src/ast.rs`:\n - normalizes the rewrite map and sorts rules by pattern string,\n - resolves strictness (`smart` by default),\n - collects candidate files from a file or gitignore-aware directory scan,\n - infers a single language for the whole call unless `lang` was supplied,\n - compiles every rewrite pattern for that language,\n - parses each file, skips files with syntax-error trees, collects `replace_by(...)` edits for every match, enforces replacement and file caps, and returns textual before/after slices plus source ranges.\n7. The TS wrapper deduplicates parse errors, groups changes by file, and renders preview diff lines.\n8. If preview found replacements and `applied` is false, `queueResolveHandler(...)` registers a forced `resolve` action and injects a `resolve-reminder` steering message.\n9. On `resolve(action: \"apply\")`, the queued callback reruns the same rewrite set with `dryRun: false`, recomputes counts, and rejects the apply as an error if the live result no longer matches the preview (`stalePreview`).\n10. On a non-stale apply, the callback returns `Applied N replacements in M files.`; on discard, `resolve` returns a discard message without mutating files.\n\n## Modes / Variants\n- Single file: preview or apply against one file.\n- Directory + optional glob: native scan walks the directory, then filters by compiled glob.\n- Multiple explicit paths/globs: wrapper unions them into one synthetic scope or runs per-target native calls when paths only meet at root.\n- Internal URL inputs: only supported when the router resolves them to a backing file path.\n- Preview mode: always the direct `ast_edit` tool result.\n- Apply mode: only reachable through the queued `resolve` callback after a preview.\n- Hashline output mode vs plain line/column mode: controlled by `resolveFileDisplayMode()`.\n\n## Side Effects\n- Filesystem\n - Preview reads files and scans directories.\n - Apply rewrites files in place with `std::fs::write(...)`, but only when the computed output differs from the original source.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Queues a one-shot forced `resolve` tool choice through `queueResolveHandler(...)`.\n - Adds a `resolve-reminder` steering message.\n- User-visible prompts / interactive UI\n - Direct `ast_edit` results are previews.\n - Follow-up apply/discard is exposed through the hidden `resolve` tool.\n- Background work / cancellation\n - Native preview/apply work runs on a blocking worker via `task::blocking(...)`.\n - Cancellation and optional native timeout are cooperative through `CancelToken::heartbeat()`.\n\n## Limits & Caps\n- File cap exposed by the wrapper: `PI_MAX_AST_FILES`, default `1000`, in `packages/coding-agent/src/tools/ast-edit.ts`.\n- Native `maxFiles` and `maxReplacements` are both clamped to at least `1` when provided in `crates/pi-natives/src/ast.rs`.\n- The wrapper never sets `maxReplacements`; native behavior therefore defaults to effectively unbounded replacements for a run.\n- Parse issues are rendered with at most `PARSE_ERRORS_LIMIT = 20` lines in `packages/coding-agent/src/tools/render-utils.ts`; `details.parseErrors` is deduplicated but not capped.\n- Directory scans use `include_hidden: true`, `use_gitignore: true`, and skip `node_modules` unless the glob text explicitly mentions `node_modules` in `crates/pi-natives/src/ast.rs`.\n- No separate glob-expansion count cap exists. Candidate count is whatever the resolved path/glob expands to after gitignore filtering, then native `maxFiles` stops mutations after the configured number of touched files.\n- Preview text truncates each rendered `before` and `after` first line to 120 characters in `packages/coding-agent/src/tools/ast-edit.ts`.\n\n## Errors\n- TS wrapper throws `ToolError` for empty patterns, duplicate rewrite patterns, empty path entries, unsupported internal-URL globs, internal URLs without `sourcePath`, and missing paths.\n- Native code returns hard errors for:\n - inability to infer one language across all candidates when `lang` is absent,\n - unsupported explicit `lang`,\n - bad glob compilation or unreadable search roots,\n - overlapping computed edits (`Overlapping replacements detected; refine pattern to avoid ambiguous edits`),\n - out-of-bounds edit ranges or non-UTF-8 replacement text,\n - write failures during apply,\n - cancellation or timeout.\n- With `failOnParseError: false` (the wrapper always uses this), pattern compile failures and file parse failures become `parseErrors` instead of aborting the whole run.\n- If every rewrite pattern fails to compile, native `ast_edit` returns a successful zero-replacement result with `parseErrors` populated.\n- Files containing tree-sitter error nodes are skipped for rewriting; they do not get partial edits.\n- Apply can fail after a successful preview if the preview becomes stale. The resolve callback compares replacement totals and per-file counts and returns an error result rather than applying a mismatched preview silently.\n\n## Notes\n- `ast_edit` does not expose the native `lang`, `strictness`, `selector`, `maxReplacements`, `failOnParseError`, or `timeoutMs` fields to the model. The runtime fixes the call shape to a preview-first, smart-strictness, best-effort parse mode.\n- Because the wrapper does not expose `lang`, mixed-language rewrites only succeed when every candidate infers to the same canonical language. This is stricter than `ast_grep`.\n- Idempotency is not enforced syntactically. A rewrite like `foo($A) -> foo($A)` previews zero changes because output equals input; a rewrite that keeps matching its own output may still produce replacements on repeated calls.\n- Rewrites are accumulated per file, then applied from the end of the file backward after an overlap check. Independent matches can coexist; overlapping matches abort the run.\n- Native rewrite rule order is by pattern-string sort, not by the original `ops` array order, because `normalize_rewrite_map(...)` sorts the `(pattern, rewrite)` pairs.\n- Preview/apply parity is validated only by totals and per-file counts, not by a byte-for-byte diff of every replacement payload.",
63
63
  "tools/ast-grep.md": "# ast_grep\n\n> Structural code search over supported source files via native ast-grep.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/ast-grep.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/ast-grep.md`\n- Key collaborators:\n - `crates/pi-natives/src/ast.rs` — native scan, parse, match engine\n - `crates/pi-natives/src/language/mod.rs` — language aliases and extension inference\n - `packages/coding-agent/src/tools/path-utils.ts` — path/glob parsing and multi-path resolution\n - `packages/coding-agent/src/tools/render-utils.ts` — parse-error dedupe and display caps\n - `packages/coding-agent/src/tools/match-line-format.ts` — anchor-prefixed match rendering\n - `packages/coding-agent/src/utils/file-display-mode.ts` — hashline vs line-number output mode\n - `packages/natives/native/index.d.ts` — JS-visible native binding contract\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `pat` | `string` | Yes | Single AST pattern. The wrapper trims it and rejects empty strings. |\n| `paths` | `string[]` | Yes | One or more files, directories, globs, or internal URLs with backing files. Empty entries are rejected. Globs are forbidden for internal URLs. |\n| `skip` | `number` | No | Match offset. Defaults to `0`, then `Math.floor(...)`; negatives and non-finite values fail. |\n\nPattern grammar and language support exposed to the model:\n- `$NAME` — capture one AST node.\n- `$_` — match one AST node without binding.\n- `$$$NAME` — capture zero or more AST nodes; ast-grep stops lazily at the next satisfiable node.\n- `$$$` — match zero or more AST nodes without binding.\n- Metavariable names must be uppercase and must stand for whole AST nodes, not partial tokens or string fragments.\n- Reusing the same metavariable requires identical code at each occurrence.\n- Patterns must parse as one valid AST node for the inferred target language.\n- Supported canonical languages come from `SupportLang::all_langs()` in `crates/pi-natives/src/language/mod.rs`: `astro`, `bash`, `c`, `cmake`, `cpp`, `csharp`, `dart`, `clojure`, `css`, `diff`, `dockerfile`, `elixir`, `erlang`, `go`, `graphql`, `haskell`, `hcl`, `html`, `ini`, `java`, `javascript`, `json`, `just`, `julia`, `kotlin`, `lua`, `make`, `markdown`, `nix`, `objc`, `ocaml`, `odin`, `perl`, `php`, `powershell`, `protobuf`, `python`, `r`, `regex`, `ruby`, `rust`, `scala`, `solidity`, `sql`, `starlark`, `svelte`, `swift`, `toml`, `tlaplus`, `tsx`, `typescript`, `verilog`, `vue`, `xml`, `yaml`, `zig`.\n\n## Outputs\n- Single-shot tool result.\n- Model-facing `content` is one text block:\n - grouped by file for directory/multi-file searches,\n - match lines rendered as `*LINE+HASH|text` in hashline mode or `*LINE|text` otherwise,\n - continuation lines for multi-line matches rendered with a leading space,\n - optional `meta: NAME=value` lines when ast-grep captured metavariables.\n- If no matches are found, text is `No matches found` or `No matches found. Parse issues mean the query may be mis-scoped; narrow paths before concluding absence.` plus formatted parse issues.\n- If the wrapper truncates visible results, the text ends with `Result limit reached; narrow paths or increase limit.`\n- `details` includes counts and metadata, not full match payloads:\n - `matchCount`, `fileCount`, `filesSearched`, `limitReached`\n - optional `parseErrors`, `scopePath`, `files`, `fileMatches`, `displayContent`, `meta`\n- Native ranges (`byteStart`, `byteEnd`, `startLine`, `startColumn`, `endLine`, `endColumn`) exist only inside the native result; the wrapper does not emit them directly to the model.\n\n## Flow\n1. `AstGrepTool.execute()` validates `pat`, normalizes `skip`, and normalizes each `paths` entry in `packages/coding-agent/src/tools/ast-grep.ts`.\n2. Internal URLs are resolved through `session.internalRouter`; entries without `sourcePath` fail, and internal-URL globs fail early.\n3. For multiple path inputs, `partitionExistingPaths()` drops missing bases only when at least one surviving base remains; if all bases are missing the call fails.\n4. `parseSearchPath()` splits a single path into `basePath` plus optional `glob`. `resolveExplicitSearchPaths()` collapses multiple inputs into a common base plus a brace-union glob, or separate `targets` when the only common base is a filesystem root.\n5. The wrapper stats the resolved base path to decide whether output should be grouped as a directory result.\n6. Execution dispatches to either:\n - one native `astGrep(...)` call for a single resolved base, or\n - `runMultiTargetAstGrep(...)`, which calls the native binding once per target, rebases paths back to the common root, sorts globally, then applies `skip` and the wrapper limit.\n7. Native `ast_grep` in `crates/pi-natives/src/ast.rs`:\n - normalizes and deduplicates patterns,\n - resolves a `MatchStrictness` (`smart` by default),\n - collects candidate files from a file or gitignore-aware directory scan,\n - infers language per candidate from extension unless `lang` was provided,\n - compiles the pattern separately for each language present,\n - reads each file, reports syntax-error trees as parse issues, runs `find_all`, and optionally captures metavariable bindings.\n8. Native results are sorted by path and source position, then paged by `offset`/`limit`.\n9. The TS wrapper normalizes parse-error strings, deduplicates them, groups matches by formatted path, renders anchor lines, appends limit/parse notices, and returns `toolResult(...).text(...).done()`.\n\n## Modes / Variants\n- Single file: native path is the file; output is a flat list of rendered match lines.\n- Directory + optional glob: native scan walks the directory, then filters by compiled glob.\n- Multiple explicit paths/globs: wrapper unions them into one synthetic scope or runs per-target native calls when paths only meet at root.\n- Internal URL inputs: only supported when the router can resolve them to a backing file path.\n- Hashline output mode vs plain line-number mode: controlled by `resolveFileDisplayMode()`; hashline mode requires the edit tool and non-raw, mutable sources.\n\n## Side Effects\n- Filesystem\n - Stats input paths in the TS wrapper.\n - Native code reads matched files and scans directories through `fs_cache`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - None beyond normal tool transcript/result metadata.\n- Background work / cancellation\n - Native work runs on a blocking worker via `task::blocking(...)`.\n - Cancellation and optional native timeout are cooperative through `CancelToken::heartbeat()`.\n\n## Limits & Caps\n- Wrapper-visible result cap: `DEFAULT_AST_LIMIT = 50` in `packages/coding-agent/src/tools/ast-grep.ts`.\n - Single-target calls rely on the native default limit of 50 in `crates/pi-natives/src/ast.rs`.\n - Multi-target calls fetch `skip + 50 + 1` matches per target, then re-page after global sort.\n- Native `limit` is clamped to at least `1`; omitted `offset` defaults to `0` in `crates/pi-natives/src/ast.rs`.\n- Parse issues are rendered with at most `PARSE_ERRORS_LIMIT = 20` lines in `packages/coding-agent/src/tools/render-utils.ts`; `details.parseErrors` itself is only deduplicated, not capped.\n- Directory scans use `include_hidden: true`, `use_gitignore: true`, and skip `node_modules` unless the glob text explicitly mentions `node_modules` in `crates/pi-natives/src/ast.rs`.\n- No hard file-count cap is applied by the wrapper or native `ast_grep`; candidate count is whatever the resolved path/glob expands to after gitignore filtering.\n- Multi-path union deduplicates identical path inputs before resolution in `resolveExplicitSearchPaths()`.\n\n## Errors\n- TS wrapper throws `ToolError` for empty patterns, invalid `skip`, empty path entries, unsupported internal-URL globs, internal URLs without `sourcePath`, and missing paths.\n- Native code returns hard errors for:\n - unsupported explicit `lang`,\n - inability to infer language for a candidate when `lang` is not supplied,\n - invalid AST pattern compilation for every relevant language,\n - unreadable search roots or bad glob compilation,\n - cancellation (`Aborted: Signal`) or timeout (`Aborted: Timeout`).\n- File-level parse failures and many per-language pattern compile failures are non-fatal: they are accumulated in `parseErrors` and surfaced alongside successful matches.\n- `no matches` is not an error, even when parse issues were recorded.\n\n## Notes\n- `pat` is always wrapped into a one-element `patterns` array by the TS tool; the model cannot send multiple patterns through `ast_grep` even though the native binding supports it.\n- `ast_grep` can search mixed-language trees because native compilation happens per discovered language, but the prompt still tells the model to keep calls single-language when possible to reduce parse noise.\n- Pattern compilation is per language present in the candidate set. One pattern can succeed for some languages and generate per-file parse errors for others in the same run.\n- A file with tree-sitter error nodes still gets searched; the syntax warning is additive, not a skip condition.\n- For glob semantics, `*.ts` matches only direct children while `**/*.ts` recurses; this is covered by native tests in `crates/pi-natives/src/ast.rs`.\n- Output anchors are intended for follow-up tools, but the exact anchor format depends on session edit mode (`hashline` vs line-number mode).",
64
- "tools/bash.md": "# bash\n\n> Execute a shell command in the session workspace, with optional PTY or background-job handling.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/bash.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/bash.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/bash-interactive.ts` — PTY/TUI execution path.\n - `packages/coding-agent/src/tools/bash-interceptor.ts` — blocks tool-better shell patterns.\n - `packages/coding-agent/src/tools/bash-skill-urls.ts` — expands internal URLs to paths.\n - `packages/coding-agent/src/exec/bash-executor.ts` — non-PTY shell execution.\n - `packages/coding-agent/src/session/streaming-output.ts` — tail buffer, truncation, artifact spill.\n - `packages/coding-agent/src/tools/tool-timeouts.ts` — timeout clamp bounds.\n - `packages/coding-agent/src/config/settings-schema.ts` — default interceptor rules.\n - `docs/bash-tool-runtime.md` — deeper executor/runtime notes; use as the companion doc for shell-session internals.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `command` | `string` | Yes | Shell command text to execute. A leading `cd <path> && ...` is rewritten into `cwd` only when `cwd` was omitted. |\n| `env` | `Record<string, string>` | No | Extra environment variables. Keys must match `^[A-Za-z_][A-Za-z0-9_]*$` or the tool throws. Values also go through internal-URL expansion. |\n| `timeout` | `number` | No | Timeout in seconds. Default `300`; clamped to `1..3600` by `clampTimeout(\"bash\", ...)`. |\n| `cwd` | `string` | No | Working directory, resolved against `session.cwd` via `resolveToCwd`. Must exist and be a directory. |\n| `pty` | `boolean` | No | Request PTY mode. Default `false`. PTY is used only when `pty: true`, `PI_NO_PTY !== \"1\"`, and the tool context has a UI. |\n| `async` | `boolean` | No | Background execution request. Present only when `async.enabled` is true for the session. Returns immediately with a job id instead of waiting. |\n\n## Outputs\nThe tool returns a single `text` content block plus optional `details`.\n\n- Success, foreground:\n - `content[0].text`: command output, or `(no output)` when the command produced nothing.\n - `details.timeoutSeconds`: effective timeout after clamping.\n - `details.requestedTimeoutSeconds`: only present when the requested timeout was clamped.\n - `details.meta.truncation`: present when output was truncated in memory; includes `artifactId` when full output spilled to an artifact.\n- Success, background start (`async: true` or auto-background):\n - `content[0].text`: optional preview tail, timeout notice if any, then `Background job <id> started: <label>` with follow-up instructions.\n - `details.async`: `{ state: \"running\", jobId, type: \"bash\" }`.\n- Background progress / completion:\n - delivered through `onUpdate` / async job manager, not the initial return.\n - running updates contain tail text and `details.async.state: \"running\"` only after the job is considered backgrounded.\n - completion/failure updates carry final text and `details.async.state: \"completed\" | \"failed\"`.\n- Failure:\n - the tool throws `ToolError` / `ToolAbortError`; non-zero exits are surfaced as errors, not success results.\n\nStdout and stderr are merged before the model sees them. Non-zero exit codes are appended to the thrown error text as `Command exited with code <n>`.\n\n## Flow\n1. `BashTool.execute()` in `packages/coding-agent/src/tools/bash.ts` reads `command`, normalizes `env`, and defaults `timeout` to `300`.\n2. If `cwd` is absent, it rewrites a leading `cd <path> && ...` into the structured `cwd` field and strips that prefix from `command`.\n3. If `async: true` is requested while `async.enabled` is off, it throws `ToolError` before any execution.\n4. If `bashInterceptor.enabled` is on, `checkBashInterception()` runs against both the original command and the `cd`-stripped command. A matching enabled rule throws before URL expansion or execution.\n5. `expandInternalUrls()` rewrites supported internal URLs inside `command`, each `env` value, and protocol-looking `cwd` values. Command/env replacements are shell-escaped unless `noEscape` is requested by the caller path.\n6. `resolveToCwd()` resolves `cwd` against `session.cwd`; `fs.stat()` verifies that the target exists and is a directory.\n7. `clampTimeout(\"bash\", requestedTimeoutSec)` enforces `TOOL_TIMEOUTS.bash` (`default: 300`, `min: 1`, `max: 3600`). When clamped, `#buildCompletedResult()` / `#buildBackgroundStartResult()` append a notice line.\n8. Execution path splits:\n 1. `async: true` -> `#startManagedBashJob()` registers a session async job and returns immediately.\n 2. Non-PTY with `bash.autoBackground.enabled` and an async job manager -> starts a managed job, waits up to `min(thresholdMs, timeoutMs - 1000)`, and either returns the completed result or converts the run into a background job.\n 3. Otherwise runs foreground execution.\n9. Foreground non-PTY calls `executeBash()` from `packages/coding-agent/src/exec/bash-executor.ts`.\n10. Foreground PTY calls `runInteractiveBashPty()` from `packages/coding-agent/src/tools/bash-interactive.ts`.\n11. Both paths allocate an output artifact first when `session.allocateOutputArtifact` is available. The artifact path/id are passed into the sink so large output can spill to disk.\n12. `executeBash()` loads shell settings, optional shell snapshot, and shell minimizer settings, then runs via a persistent native `Shell` session or one-shot `executeShell()`. `docs/bash-tool-runtime.md` covers that path in detail.\n13. `runInteractiveBashPty()` creates a `PtySession`, overlays an xterm-backed console UI, forwards user key input into the PTY, captures output through `OutputSink`, and kills the PTY on dismiss/dispose.\n14. On completion, `#buildCompletedResult()` formats `(no output)` when needed, attaches truncation metadata from the `OutputSink` summary, and re-checks exit status / timeout / cancellation before returning.\n15. On non-zero exit, timeout, missing exit status, or cancellation, `#buildResultText()` throws with the captured output included in the error message.\n\n## Modes / Variants\n1. Foreground non-PTY\n - Default path.\n - Uses `executeBash()`.\n - Streams tail-only updates through `streamTailUpdates()` and `TailBuffer(DEFAULT_MAX_BYTES)`.\n2. Foreground PTY\n - Requires `pty: true`, UI context, and `PI_NO_PTY !== \"1\"`.\n - Uses `runInteractiveBashPty()` and a `PtySession` overlay.\n - Supports interactive input; `Esc` kills the session from the overlay.\n3. Explicit background job\n - Requires `async: true` and `async.enabled`.\n - Registers a job with `session.asyncJobManager` and returns `{ state: \"running\", jobId }` immediately.\n4. Auto-backgrounded non-PTY job\n - Requires `bash.autoBackground.enabled`, no PTY, and an async job manager.\n - Starts like a foreground managed job, then backgrounds it when it outlives the wait window.\n5. Intercepted command\n - No subprocess created.\n - Returns a `ToolError` pointing the model at `read`, `search`, `find`, `edit`, or `write`.\n\n## Side Effects\n- Filesystem\n - Validates `cwd` with `fs.stat()`.\n - May allocate and write artifact files for full output (`bash`) and minimizer-preserved raw output (`bash-original`).\n - `expandInternalUrls(..., { ensureLocalParentDirs: true })` creates parent directories for `local://` paths before execution.\n- Subprocesses / native bindings\n - Non-PTY uses native shell execution via `@oh-my-pi/pi-natives` (`Shell.run()` or `executeShell()`).\n - PTY uses native `PtySession.start()`.\n- Session state\n - Reads session settings for async, auto-background, interceptor, tool availability, and shell configuration.\n - Registers jobs with `session.asyncJobManager` for explicit/auto background runs.\n - Uses `session.getSessionId()` to isolate shell reuse and async session keys.\n - Uses `session.allocateOutputArtifact()` for spill files.\n- User-visible prompts / interactive UI\n - PTY mode opens a TUI overlay titled `Console` and forwards input to the PTY.\n - Background start messages direct the agent to `job` and to read `jobs://<id>`.\n- Background work / cancellation\n - Async and auto-background jobs continue after the initial tool return.\n - Cancellation aborts the native run; PTY overlay dismissal also kills the PTY.\n\n## Limits & Caps\n- Default timeout: `300s` (`TOOL_TIMEOUTS.bash.default` in `packages/coding-agent/src/tools/tool-timeouts.ts`).\n- Timeout clamp: `1..3600s` (`TOOL_TIMEOUTS.bash.min/max`).\n- Auto-background default threshold: `60_000ms` (`DEFAULT_AUTO_BACKGROUND_THRESHOLD_MS` in `packages/coding-agent/src/tools/bash.ts`), further capped to `timeoutMs - 1000` by `#resolveAutoBackgroundWaitMs()`.\n- Hard kill grace beyond requested timeout in non-PTY executor: `5_000ms` (`HARD_TIMEOUT_GRACE_MS` in `packages/coding-agent/src/exec/bash-executor.ts`).\n- In-memory output tail cap: `50 * 1024` bytes (`DEFAULT_MAX_BYTES` in `packages/coding-agent/src/session/streaming-output.ts`). Once exceeded, the sink keeps only the tail window in memory.\n- Streaming callback throttle in `executeBash()`: `50ms` between `onChunk` calls when streaming is enabled.\n- TUI collapsed preview: `10` visual lines (`BASH_DEFAULT_PREVIEW_LINES`) when rendered inline in the agent UI; this is a renderer cap, not a tool output cap.\n\n## Errors\n- Input validation:\n - invalid env key -> `ToolError(\"Invalid bash env name: <key>\")`.\n - async requested while disabled -> `ToolError(\"Async bash execution is disabled...\")`.\n - missing async job manager -> `ToolError(\"Async job manager unavailable for this session.\")`.\n - missing/bad `cwd` -> `ToolError(\"Working directory does not exist: ...\")` or `ToolError(\"Working directory is not a directory: ...\")`.\n- Interceptor:\n - matched command -> `ToolError` with `Blocked: <rule.message>` and the original command.\n - invalid interceptor regexes are silently skipped by `compileRules()`.\n- Internal URL expansion:\n - unsupported scheme, unknown skill, path traversal, missing router support, or router resolution failures all throw `ToolError` from `packages/coding-agent/src/tools/bash-skill-urls.ts`.\n- Execution:\n - non-zero exit -> thrown `ToolError` containing captured output plus `Command exited with code <n>`.\n - missing exit code -> thrown `ToolError` with `Command failed: missing exit status`.\n - timeout -> thrown `ToolError`; PTY uses `Command timed out after <n> seconds`, non-PTY executor returns cancelled output that `BashTool` converts to an error.\n - user abort -> `ToolAbortError` when the caller signal is aborted.\n- Artifact allocation / artifact save failures are swallowed in `saveBashOriginalArtifact()` and `OutputSink.#createFileSink()`; execution continues without that artifact.\n\n## Notes\n- `strict = true` and `concurrency = \"exclusive\"` are set on `BashTool`; the tool does not run concurrently with another bash tool call in the same session.\n- `command` and `env` URL expansions shell-escape replacements; `cwd` expansion uses `noEscape: true` because it becomes a filesystem path argument, not shell text.\n- `checkBashInterception()` blocks only when the matching rule's `tool` name is present in `ctx.toolNames`; missing tools disable their corresponding rule.\n- Default interceptor rules come from `DEFAULT_BASH_INTERCEPTOR_RULES` in `packages/coding-agent/src/config/settings-schema.ts`:\n - `cat|head|tail|less|more` -> `read`\n - `grep|rg|ripgrep|ag|ack` -> `search`\n - `find|fd|locate` with name/type/glob flags -> `find`\n - `sed -i`, `perl -i`, `awk -i inplace` -> `edit`\n - `echo|printf|cat <<` with redirection -> `write`\n- PTY mode is ignored in non-UI contexts and when `PI_NO_PTY=1`; the tool silently falls back to non-PTY execution.\n- Non-PTY runs merge `NON_INTERACTIVE_ENV` with `env`; PTY runs also prepend `NON_INTERACTIVE_ENV` before custom env values.\n- When the shell minimizer rewrites output inside `executeBash()`, the visible output is replaced with minimized text and a `[raw output: artifact://<id>]` footer may be appended if `onMinimizedSave` persisted the original text.\n- The TUI renderer parses partial JSON to recover `env` assignments early in streaming previews; that behavior is display-only.\n- For executor internals that are not tool-specific — shell session reuse keys, snapshots, prefix handling, and native timeout behavior — see `docs/bash-tool-runtime.md`.\n",
64
+ "tools/bash.md": "# bash\n\n> Execute a shell command in the session workspace, with optional PTY or background-job handling.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/bash.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/bash.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/bash-interactive.ts` — PTY/TUI execution path.\n - `packages/coding-agent/src/tools/bash-interceptor.ts` — blocks tool-better shell patterns.\n - `packages/coding-agent/src/tools/bash-skill-urls.ts` — expands internal URLs to paths.\n - `packages/coding-agent/src/exec/bash-executor.ts` — non-PTY shell execution.\n - `packages/coding-agent/src/session/streaming-output.ts` — tail buffer, truncation, artifact spill.\n - `packages/coding-agent/src/tools/tool-timeouts.ts` — timeout clamp bounds.\n - `packages/coding-agent/src/config/settings-schema.ts` — default interceptor rules.\n - `docs/bash-tool-runtime.md` — deeper executor/runtime notes; use as the companion doc for shell-session internals.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `command` | `string` | Yes | Shell command text to execute. A leading `cd <path> && ...` is rewritten into `cwd` only when `cwd` was omitted. |\n| `env` | `Record<string, string>` | No | Extra environment variables. Keys must match `^[A-Za-z_][A-Za-z0-9_]*$` or the tool throws. Values also go through internal-URL expansion. |\n| `timeout` | `number` | No | Timeout in seconds. Default `300`; clamped to `1..3600` by `clampTimeout(\"bash\", ...)`. |\n| `cwd` | `string` | No | Working directory, resolved against `session.cwd` via `resolveToCwd`. Must exist and be a directory. |\n| `pty` | `boolean` | No | Request PTY mode. Default `false`. PTY is used only when `pty: true`, `PI_NO_PTY !== \"1\"`, and the tool context has a UI. |\n| `async` | `boolean` | No | Background execution request. Present only when `async.enabled` is true for the session. Returns immediately with a job id instead of waiting. |\n\n## Outputs\nThe tool returns a single `text` content block plus optional `details`.\n\n- Success, foreground:\n - `content[0].text`: command output, or `(no output)` when the command produced nothing.\n - `details.timeoutSeconds`: effective timeout after clamping.\n - `details.requestedTimeoutSeconds`: only present when the requested timeout was clamped.\n - `details.meta.truncation`: present when output was truncated in memory; includes `artifactId` when full output spilled to an artifact.\n- Success, background start (`async: true` or auto-background):\n - `content[0].text`: optional preview tail, timeout notice if any, then `Background job <id> started: <label>` with follow-up instructions.\n - `details.async`: `{ state: \"running\", jobId, type: \"bash\" }`.\n- Background progress / completion:\n - delivered through `onUpdate` / async job manager, not the initial return.\n - running updates contain tail text and `details.async.state: \"running\"` only after the job is considered backgrounded.\n - completion/failure updates carry final text and `details.async.state: \"completed\" | \"failed\"`.\n- Failure:\n - the tool throws `ToolError` / `ToolAbortError`; non-zero exits are surfaced as errors, not success results.\n\nStdout and stderr are merged before the model sees them. Non-zero exit codes are appended to the thrown error text as `Command exited with code <n>`.\n\n## Flow\n1. `BashTool.execute()` in `packages/coding-agent/src/tools/bash.ts` reads `command`, normalizes `env`, and defaults `timeout` to `300`.\n2. If `cwd` is absent, it rewrites a leading `cd <path> && ...` into the structured `cwd` field and strips that prefix from `command`.\n3. If `async: true` is requested while `async.enabled` is off, it throws `ToolError` before any execution.\n4. If `bashInterceptor.enabled` is on, `checkBashInterception()` runs against both the original command and the `cd`-stripped command. A matching enabled rule throws before URL expansion or execution.\n5. `expandInternalUrls()` rewrites supported internal URLs inside `command`, each `env` value, and protocol-looking `cwd` values. Command/env replacements are shell-escaped unless `noEscape` is requested by the caller path.\n6. `resolveToCwd()` resolves `cwd` against `session.cwd`; `fs.stat()` verifies that the target exists and is a directory.\n7. `clampTimeout(\"bash\", requestedTimeoutSec)` enforces `TOOL_TIMEOUTS.bash` (`default: 300`, `min: 1`, `max: 3600`). When clamped, `#buildCompletedResult()` / `#buildBackgroundStartResult()` append a notice line.\n8. Execution path splits:\n 1. `async: true` -> `#startManagedBashJob()` registers a session async job and returns immediately.\n 2. Non-PTY with `bash.autoBackground.enabled` and an async job manager -> starts a managed job, waits up to `min(thresholdMs, timeoutMs - 1000)`, and either returns the completed result or converts the run into a background job.\n 3. Otherwise runs foreground execution.\n9. Foreground non-PTY calls `executeBash()` from `packages/coding-agent/src/exec/bash-executor.ts`.\n10. Foreground PTY calls `runInteractiveBashPty()` from `packages/coding-agent/src/tools/bash-interactive.ts`.\n11. Both paths allocate an output artifact first when `session.allocateOutputArtifact` is available. The artifact path/id are passed into the sink so large output can spill to disk.\n12. `executeBash()` loads shell settings, optional shell snapshot, and shell minimizer settings, then runs via a persistent native `Shell` session or one-shot `executeShell()`. `docs/bash-tool-runtime.md` covers that path in detail.\n13. `runInteractiveBashPty()` creates a `PtySession`, overlays an xterm-backed console UI, forwards user key input into the PTY, captures output through `OutputSink`, and kills the PTY on dismiss/dispose.\n14. On completion, `#buildCompletedResult()` formats `(no output)` when needed, attaches truncation metadata from the `OutputSink` summary, and re-checks exit status / timeout / cancellation before returning.\n15. On non-zero exit, timeout, missing exit status, or cancellation, `#buildResultText()` throws with the captured output included in the error message.\n\n## Modes / Variants\n1. Foreground non-PTY\n - Default path.\n - Uses `executeBash()`.\n - Streams tail-only updates through `streamTailUpdates()` and `TailBuffer(DEFAULT_MAX_BYTES)`.\n2. Foreground PTY\n - Requires `pty: true`, UI context, and `PI_NO_PTY !== \"1\"`.\n - Uses `runInteractiveBashPty()` and a `PtySession` overlay.\n - Supports interactive input; `Esc` kills the session from the overlay.\n3. Explicit background job\n - Requires `async: true` and `async.enabled`.\n - Registers a job with `session.asyncJobManager` and returns `{ state: \"running\", jobId }` immediately.\n4. Auto-backgrounded non-PTY job\n - Requires `bash.autoBackground.enabled`, no PTY, and an async job manager.\n - Starts like a foreground managed job, then backgrounds it when it outlives the wait window.\n5. Intercepted command\n - No subprocess created.\n - Returns a `ToolError` pointing the model at `read`, `search`, `find`, `edit`, or `write`.\n\n## Side Effects\n- Filesystem\n - Validates `cwd` with `fs.stat()`.\n - May allocate and write artifact files for full output (`bash`) and minimizer-preserved raw output (`bash-original`).\n - `expandInternalUrls(..., { ensureLocalParentDirs: true })` creates parent directories for `local://` paths before execution.\n- Subprocesses / native bindings\n - Non-PTY uses native shell execution via `@oh-my-pi/pi-natives` (`Shell.run()` or `executeShell()`).\n - PTY uses native `PtySession.start()`.\n- Session state\n - Reads session settings for async, auto-background, interceptor, tool availability, and shell configuration.\n - Registers jobs with `session.asyncJobManager` for explicit/auto background runs.\n - Uses `session.getSessionId()` to isolate shell reuse and async session keys.\n - Uses `session.allocateOutputArtifact()` for spill files.\n- User-visible prompts / interactive UI\n - PTY mode opens a TUI overlay titled `Console` and forwards input to the PTY.\n - Background start messages direct the agent to the `job` tool (use `list: true` for a snapshot, or pass `poll: [id]` to wait).\n- Background work / cancellation\n - Async and auto-background jobs continue after the initial tool return.\n - Cancellation aborts the native run; PTY overlay dismissal also kills the PTY.\n\n## Limits & Caps\n- Default timeout: `300s` (`TOOL_TIMEOUTS.bash.default` in `packages/coding-agent/src/tools/tool-timeouts.ts`).\n- Timeout clamp: `1..3600s` (`TOOL_TIMEOUTS.bash.min/max`).\n- Auto-background default threshold: `60_000ms` (`DEFAULT_AUTO_BACKGROUND_THRESHOLD_MS` in `packages/coding-agent/src/tools/bash.ts`), further capped to `timeoutMs - 1000` by `#resolveAutoBackgroundWaitMs()`.\n- Hard kill grace beyond requested timeout in non-PTY executor: `5_000ms` (`HARD_TIMEOUT_GRACE_MS` in `packages/coding-agent/src/exec/bash-executor.ts`).\n- In-memory output tail cap: `50 * 1024` bytes (`DEFAULT_MAX_BYTES` in `packages/coding-agent/src/session/streaming-output.ts`). Once exceeded, the sink keeps only the tail window in memory.\n- Streaming callback throttle in `executeBash()`: `50ms` between `onChunk` calls when streaming is enabled.\n- TUI collapsed preview: `10` visual lines (`BASH_DEFAULT_PREVIEW_LINES`) when rendered inline in the agent UI; this is a renderer cap, not a tool output cap.\n\n## Errors\n- Input validation:\n - invalid env key -> `ToolError(\"Invalid bash env name: <key>\")`.\n - async requested while disabled -> `ToolError(\"Async bash execution is disabled...\")`.\n - missing async job manager -> `ToolError(\"Async job manager unavailable for this session.\")`.\n - missing/bad `cwd` -> `ToolError(\"Working directory does not exist: ...\")` or `ToolError(\"Working directory is not a directory: ...\")`.\n- Interceptor:\n - matched command -> `ToolError` with `Blocked: <rule.message>` and the original command.\n - invalid interceptor regexes are silently skipped by `compileRules()`.\n- Internal URL expansion:\n - unsupported scheme, unknown skill, path traversal, missing router support, or router resolution failures all throw `ToolError` from `packages/coding-agent/src/tools/bash-skill-urls.ts`.\n- Execution:\n - non-zero exit -> thrown `ToolError` containing captured output plus `Command exited with code <n>`.\n - missing exit code -> thrown `ToolError` with `Command failed: missing exit status`.\n - timeout -> thrown `ToolError`; PTY uses `Command timed out after <n> seconds`, non-PTY executor returns cancelled output that `BashTool` converts to an error.\n - user abort -> `ToolAbortError` when the caller signal is aborted.\n- Artifact allocation / artifact save failures are swallowed in `saveBashOriginalArtifact()` and `OutputSink.#createFileSink()`; execution continues without that artifact.\n\n## Notes\n- `strict = true` and `concurrency = \"exclusive\"` are set on `BashTool`; the tool does not run concurrently with another bash tool call in the same session.\n- `command` and `env` URL expansions shell-escape replacements; `cwd` expansion uses `noEscape: true` because it becomes a filesystem path argument, not shell text.\n- `checkBashInterception()` blocks only when the matching rule's `tool` name is present in `ctx.toolNames`; missing tools disable their corresponding rule.\n- Default interceptor rules come from `DEFAULT_BASH_INTERCEPTOR_RULES` in `packages/coding-agent/src/config/settings-schema.ts`:\n - `cat|head|tail|less|more` -> `read`\n - `grep|rg|ripgrep|ag|ack` -> `search`\n - `find|fd|locate` with name/type/glob flags -> `find`\n - `sed -i`, `perl -i`, `awk -i inplace` -> `edit`\n - `echo|printf|cat <<` with redirection -> `write`\n- PTY mode is ignored in non-UI contexts and when `PI_NO_PTY=1`; the tool silently falls back to non-PTY execution.\n- Non-PTY runs merge `NON_INTERACTIVE_ENV` with `env`; PTY runs also prepend `NON_INTERACTIVE_ENV` before custom env values.\n- When the shell minimizer rewrites output inside `executeBash()`, the visible output is replaced with minimized text and a `[raw output: artifact://<id>]` footer may be appended if `onMinimizedSave` persisted the original text.\n- The TUI renderer parses partial JSON to recover `env` assignments early in streaming previews; that behavior is display-only.\n- For executor internals that are not tool-specific — shell session reuse keys, snapshots, prefix handling, and native timeout behavior — see `docs/bash-tool-runtime.md`.\n",
65
65
  "tools/browser.md": "# browser\n\n> Open, reuse, close, and script Puppeteer tabs against headless Chromium or CDP-attached apps.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/browser.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/browser.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/browser/tab-supervisor.ts` — global tab registry; worker lifecycle; run/close coordination.\n - `packages/coding-agent/src/tools/browser/tab-worker.ts` — executes `run` code; implements the `tab` helper API.\n - `packages/coding-agent/src/tools/browser/tab-worker-entry.ts` — worker-thread transport bootstrap.\n - `packages/coding-agent/src/tools/browser/registry.ts` — browser-handle registry keyed by browser kind.\n - `packages/coding-agent/src/tools/browser/launch.ts` — Puppeteer loading, Chromium resolution/download, headless launch, stealth injection.\n - `packages/coding-agent/src/tools/browser/attach.ts` — CDP attach/reuse, target picking, spawned-app process handling.\n - `packages/coding-agent/src/tools/browser/tab-protocol.ts` — worker init/run/result message schema.\n - `packages/coding-agent/src/tools/browser/readable.ts` — `tab.extract()` readability extraction.\n - `packages/coding-agent/src/tools/browser/render.ts` — TUI rendering for `open`/`close` status lines and `run` JS cells.\n - `packages/coding-agent/src/tools/puppeteer/00_stealth_tampering.txt` — mask patched functions/descriptors as native.\n - `packages/coding-agent/src/tools/puppeteer/01_stealth_activity.txt` — synthesize visibility/focus/scroll activity.\n - `packages/coding-agent/src/tools/puppeteer/02_stealth_hairline.txt` — fix Modernizr hairline detection.\n - `packages/coding-agent/src/tools/puppeteer/03_stealth_botd.txt` — spoof `navigator.webdriver`, `window.chrome`, and Chrome fingerprint surfaces.\n - `packages/coding-agent/src/tools/puppeteer/04_stealth_iframe.txt` — patch iframe `contentWindow`/`srcdoc` behavior.\n - `packages/coding-agent/src/tools/puppeteer/05_stealth_webgl.txt` — spoof WebGL vendor/renderer/precision.\n - `packages/coding-agent/src/tools/puppeteer/06_stealth_screen.txt` — normalize screen/viewport/device-pixel-ratio values.\n - `packages/coding-agent/src/tools/puppeteer/07_stealth_fonts.txt` — spoof local fonts and perturb canvas text rendering.\n - `packages/coding-agent/src/tools/puppeteer/08_stealth_audio.txt` — spoof audio latency/sample-rate and perturb offline rendering.\n - `packages/coding-agent/src/tools/puppeteer/09_stealth_locale.txt` — force locale/languages/timezone/date strings.\n - `packages/coding-agent/src/tools/puppeteer/10_stealth_plugins.txt` — synthesize `navigator.plugins`/`navigator.mimeTypes`.\n - `packages/coding-agent/src/tools/puppeteer/11_stealth_hardware.txt` — spoof `navigator.hardwareConcurrency`.\n - `packages/coding-agent/src/tools/puppeteer/12_stealth_codecs.txt` — spoof media codec support.\n - `packages/coding-agent/src/tools/puppeteer/13_stealth_worker.txt` — carry UA/platform spoofing into `Worker`/`SharedWorker`.\n\n## Inputs\n\n### Shared fields\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `action` | `\"open\" \\| \"close\" \\| \"run\"` | Yes | Dispatches to the open/close/run path. |\n| `name` | `string` | No | Tab id. Defaults to `\"main\"`. Tabs live in a process-global map, so the same name is reused across later calls and in-process subagents until closed. |\n| `timeout` | `number` | No | Tool wall-clock timeout in seconds. Defaults to `30`; clamped to the browser tool range before execution. |\n\n### `action: \"open\"`\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `url` | `string` | No | Navigate after the tab is ready. Existing reusable tabs also navigate when `url` is supplied. |\n| `viewport` | `{ width: number; height: number; scale?: number }` | No | Requested viewport. For headless launch this becomes the initial viewport; for a page it is applied with `page.setViewport()`. `scale` maps to Puppeteer `deviceScaleFactor`. |\n| `wait_until` | `\"load\" \\| \"domcontentloaded\" \\| \"networkidle0\" \\| \"networkidle2\"` | No | Navigation wait condition. Defaults to `\"networkidle2\"` where omitted. |\n| `dialogs` | `\"accept\" \\| \"dismiss\"` | No | Installs a page `dialog` handler that auto-accepts or auto-dismisses dialogs. Omitted means no handler. |\n| `app` | `{ path?: string; cdp_url?: string; args?: string[]; target?: string }` | No | Selects browser kind. No `app` uses the session `browser.headless` setting. `app.path` is resolved against the session cwd and used as the executable path for spawn/attach reuse. `app.cdp_url` connects to an existing CDP endpoint. `args` are appended only when spawning `app.path`. `target` is only used for attached/spawned-app page selection. |\n\n### `action: \"close\"`\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `all` | `boolean` | No | Close every known tab. Omitted closes only `name`. |\n| `kill` | `boolean` | No | When a tab release drops a spawned-app browser handle to refcount 0, also terminate its process tree. Has no effect on headless shutdown and only disconnects connected CDP browsers. |\n\n### `action: \"run\"`\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `code` | `string` | Yes | Async-function body executed in a VM context with `page`, `browser`, `tab`, `display`, `assert`, `wait`, `console`, timers, `URL`, `TextEncoder`, `TextDecoder`, and `Buffer` in scope. |\n\n## Outputs\nThe tool returns one result per call; no streaming partial output is emitted from the browser implementation itself.\n\n- `open`: text content with `Opened` or `Reused`, browser description, URL, and optional title. `details` includes `action`, `name`, `browser`, `url`, `viewport`, and the same text in `details.result`.\n- `close`: text content with either `Closed ...` or `No tab named ...`. `details` includes `action`, `name`, and `details.result`.\n- `run`: ordered `content` array built as:\n 1. every `display(value)` call in execution order,\n 2. final return value, JSON-stringified unless already a string,\n 3. or `Ran code on tab \"...\"` if nothing else was produced.\n- `display(value)` coercion in `packages/coding-agent/src/tools/browser/tab-worker.ts`:\n - `{ type: \"image\", data: string, mimeType: string }` becomes image content,\n - `string` becomes text content,\n - other values become pretty JSON text when serializable, else `String(value)`.\n- `tab.screenshot()` also appends text plus an image content item unless `silent: true`; `details.screenshots` records persisted screenshot metadata `{ dest, mimeType, bytes, width, height }`.\n- `run` `details` includes `action`, `name`, current `browser`/`url` when the tab exists, optional `screenshots`, and `details.result` containing only the concatenated text outputs.\n\n## Flow\n1. `BrowserTool.execute()` (`packages/coding-agent/src/tools/browser.ts`) abort-checks, clamps `timeout` via `clampTimeout(\"browser\", ...)`, defaults `name` to `\"main\"`, and dispatches on `action`.\n2. `open` resolves browser kind with `resolveBrowserKind()`:\n - `app.cdp_url` → `{ kind: \"connected\" }` after trimming trailing slashes.\n - `app.path` → `{ kind: \"spawned\" }` after resolving against session cwd.\n - otherwise → `{ kind: \"headless\", headless: session.settings.get(\"browser.headless\") }`.\n3. `open` rejects reusing the same tab name across different browser kinds (`sameBrowserKind()`); callers must close first.\n4. `open` acquires a browser handle through `acquireBrowser()` (`packages/coding-agent/src/tools/browser/registry.ts`):\n - existing connected handle is reused by browser-kind key;\n - stale disconnected handles are disposed and recreated;\n - headless launches via `launchHeadlessBrowser()`;\n - `connected` waits for `${cdpUrl}/json/version`, then `puppeteer.connect()`;\n - `spawned` first tries `findReusableCdp()`, else kills same-path processes, allocates a free loopback port, spawns the executable with `--remote-debugging-port=<port>`, waits for CDP, then connects.\n5. `open` acquires a tab through `acquireTab()` (`packages/coding-agent/src/tools/browser/tab-supervisor.ts`):\n - same-name + same-browser + alive tab is reused unless `dialogs` changed;\n - same-name but different browser handle, dead state, or changed dialog policy forces release and recreation;\n - reusing with a new `url` navigates by issuing `await tab.goto(...)` through the worker.\n6. New tabs build a `WorkerInitPayload` in `buildInitPayload()`:\n - headless mode sends `url`, `waitUntil`, `viewport`, `dialogs`, and timeout;\n - attach mode resolves a page with `pickElectronTarget()`, gets its target id, and sends `targetId` plus `dialogs`.\n7. `acquireTab()` spawns a dedicated Bun `Worker` from `tab-worker-entry.ts`; if that fails it falls back to inline execution in the main thread (`spawnInlineWorker()`), preserving behavior but losing protection against synchronous infinite loops.\n8. `WorkerCore.#init()` (`packages/coding-agent/src/tools/browser/tab-worker.ts`) connects back to the browser websocket endpoint. Headless mode opens a new page, applies stealth patches, applies viewport, installs dialog handling if requested, and optionally navigates. Attach mode resolves the requested target page and optionally installs dialog handling.\n9. On success the worker sends `ready` with `{ url, title, viewport, targetId }`; the supervisor stores a `TabSession`, increments browser-handle refcount with `holdBrowser()`, and keeps the tab in a process-global `Map<string, TabSession>`.\n10. `run` requires non-empty `code`, looks up the tab with `getTab()`, then delegates to `runInTab()`.\n11. `runInTabWithSnapshot()` rejects dead tabs and concurrent runs (`Tab ... is busy`), captures session cwd plus optional `browser.screenshotDir`, registers an abort hook, sends a `run` message to the worker, and races the result against `timeoutMs + 750` ms. Timeouts force-kill the tab worker and, for headless tabs, close the orphaned page target.\n12. `WorkerCore.#run()` creates a VM context, exposes the raw Puppeteer `page`/`browser` plus a synthetic `tab` API, and executes `(async () => { ...code... })()` via `vm.runInContext()`.\n13. The `tab` helper API implemented in `#createTabApi()` is:\n - `tab.name: string`\n - `tab.page: Page`\n - `tab.signal?: AbortSignal`\n - `tab.url(): string`\n - `tab.title(): Promise<string>`\n - `tab.goto(url, { waitUntil? })`\n - `tab.observe({ includeAll?, viewportOnly? })`\n - `tab.screenshot({ selector?, fullPage?, save?, silent? })`\n - `tab.extract(format = \"markdown\")`\n - `tab.click(selector)`\n - `tab.type(selector, text)`\n - `tab.fill(selector, value)`\n - `tab.press(key, { selector? })`\n - `tab.scroll(deltaX, deltaY)`\n - `tab.drag(from, to)`\n - `tab.waitFor(selector)`\n - `tab.evaluate(fn, ...args)`\n - `tab.scrollIntoView(selector)`\n - `tab.select(selector, ...values)`\n - `tab.uploadFile(selector, ...filePaths)`\n - `tab.waitForUrl(pattern, { timeout? })`\n - `tab.waitForResponse(pattern, { timeout? })`\n - `tab.id(n)`\n14. Selector handling in `normalizeSelector()` accepts plain CSS and Puppeteer query handlers, and rewrites legacy Playwright-style prefixes `p-text/`, `p-xpath/`, `p-pierce/`, `p-aria/`; other `p-*` prefixes throw a `ToolError`.\n15. `tab.observe()` clears the element cache, takes a Puppeteer accessibility snapshot, filters to interactive nodes unless `includeAll`, optionally filters to viewport-visible nodes, assigns numeric ids, caches `ElementHandle`s, and returns URL/title/viewport/scroll metadata plus `elements`.\n16. `tab.id(n)` resolves the cached `ElementHandle`, verifies `el.isConnected`, and throws a stale-id error after cache invalidation if the DOM changed or the cache was cleared.\n17. `tab.goto()` clears the cached element ids before navigating. Any new `tab.observe()` also clears and rebuilds the cache.\n18. `tab.click()` uses a custom retry loop for `text/...` selectors to find an actionable visible match; other selectors use `page.locator(...).click()` with the run timeout.\n19. `tab.screenshot()` captures either the whole page or a selector PNG, downsizes a copy for model output, chooses a persistence path, writes the image to disk, records metadata, and optionally emits text + image display entries.\n20. `display()` calls accumulate in an array. After code finishes, the worker posts `{ displays, returnValue, screenshots }`; `BrowserTool.#run()` appends the return value as trailing text content when not `undefined`.\n21. `close` releases one tab or all tabs via `releaseTab()` / `releaseAllTabs()`. Each tab aborts pending runs, asks the worker to close, waits up to `750` ms for a `closed` ack, terminates the worker, decrements browser refcount, and disposes the browser handle when refcount reaches zero.\n\n## Modes / Variants\n- **Action dispatch**\n - `open` — acquire/reuse browser + tab.\n - `close` — release one tab or all tabs.\n - `run` — execute JS inside the tab worker.\n- **Browser kind**\n - **Headless**: launches local Chromium with Puppeteer, applies stealth patches, and creates a fresh page per tab.\n - **Spawned app (`app.path`)**: reuses an existing CDP-enabled process for that executable when possible; otherwise kills same-path processes, spawns the executable with remote debugging enabled, then attaches. No stealth patches are injected.\n - **Connected browser (`app.cdp_url`)**: attaches to an already-running CDP endpoint. No process ownership; close only disconnects.\n- **Target selection for attached/spawned browsers**\n - With `app.target`, `pickElectronTarget()` returns the first page whose URL or title contains the case-insensitive substring.\n - Without `app.target`, it skips titles/URLs matching `request handler|devtools|background page|background host|service worker` and otherwise falls back to the first page.\n- **Worker mode**\n - **Dedicated worker**: normal path; user code runs off the main thread and can be aborted even when it blocks synchronously.\n - **Inline fallback**: activated when Bun worker spawn fails; behavior matches, but synchronous infinite loops on user code cannot be interrupted.\n- **Dialog policy**\n - No `dialogs` field: no auto-handler.\n - `accept`/`dismiss`: page `dialog` events are handled automatically.\n - Changing dialog policy on an existing live tab forces tab recreation instead of mutating the worker in place.\n- **Screenshot persistence**\n - `save` provided: persist full-resolution PNG at the resolved cwd-relative or absolute path.\n - `browser.screenshotDir` session setting set: persist full-resolution PNG under that directory with a timestamped filename.\n - Neither set: persist the resized image to a temp-file path under the OS temp dir.\n\n## Side Effects\n- Filesystem\n - `loadPuppeteer()` writes `{}` to `<puppeteer-safe-dir>/package.json` before importing `puppeteer-core`.\n - First headless launch may download Chromium into the Puppeteer cache directory returned by `getPuppeteerDir()`.\n - `tab.screenshot()` creates parent directories and writes image files.\n - `tab.uploadFile()` resolves supplied paths against the session cwd.\n- Network\n - CDP attach paths poll `http://127.0.0.1:<port>/json/version` or the supplied `cdp_url` `/json/version`.\n - Headless/browser-attach sessions create CDP websocket connections.\n - Headless first-use Chromium download uses `@puppeteer/browsers`.\n - User `page` / `tab` operations perform normal browser network traffic.\n- Subprocesses / native bindings\n - Headless mode launches Chromium through Puppeteer.\n - `app.path` mode may spawn the target executable via `Bun.spawn()`.\n - `killExistingByPath()` / `gracefulKillTreeOnce()` use `@oh-my-pi/pi-natives` process inspection/termination.\n - Worker mode uses Bun `Worker`; fallback mode does not.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Browser handles are cached in a process-global `Map` keyed by browser kind in `packages/coding-agent/src/tools/browser/registry.ts`.\n - Tabs are cached in a process-global `Map` keyed by `name` in `packages/coding-agent/src/tools/browser/tab-supervisor.ts`.\n - `run` captures session cwd and optional `browser.screenshotDir` for screenshot/save path resolution.\n - `restartForModeChange()` drops only headless tabs.\n- User-visible prompts / interactive UI\n - None beyond normal tool output. Dialog auto-handling is invisible unless it fails and emits debug logs.\n- Background work / cancellation\n - `open`, `run`, CDP waits, and browser actions thread through abort signals.\n - A timed-out `run` aborts the worker execution path and can tear down the tab.\n\n## Limits & Caps\n- Tool timeout clamp: default `30` s, min `1` s, max `30` s (`TOOL_TIMEOUTS.browser` in `packages/coding-agent/src/tools/tool-timeouts.ts`).\n- Supervisor grace period around init/run/close: `750` ms (`GRACE_MS` in `packages/coding-agent/src/tools/browser/tab-supervisor.ts`).\n- Puppeteer protocol timeout for launch/connect operations: `60_000` ms (`BROWSER_PROTOCOL_TIMEOUT_MS` in `packages/coding-agent/src/tools/browser/launch.ts`).\n- Connected-browser CDP readiness wait: `5_000` ms before `puppeteer.connect()` (`packages/coding-agent/src/tools/browser/registry.ts`).\n- Spawned-app CDP readiness wait after spawn: `30_000` ms (`packages/coding-agent/src/tools/browser/registry.ts`).\n- CDP polling cadence: 150 ms in `waitForCdp()` (`packages/coding-agent/src/tools/browser/attach.ts`).\n- Headless default viewport: `1365x768` at `deviceScaleFactor: 1.25` (`DEFAULT_VIEWPORT` in `packages/coding-agent/src/tools/browser/launch.ts`).\n- Screenshot model-attachment resize cap: `maxWidth 1024`, `maxHeight 1024`, `maxBytes 150 * 1024`, `jpegQuality 70` (`packages/coding-agent/src/tools/browser/tab-worker.ts`).\n- `tab.waitForUrl()` polling interval: `200` ms (`packages/coding-agent/src/tools/browser/tab-worker.ts`).\n- Drag simulation uses `12` mouse-move steps (`packages/coding-agent/src/tools/browser/tab-worker.ts`).\n\n## Errors\n- `BrowserTool.execute()` converts DOM-style `AbortError` into `ToolAbortError`; other errors propagate.\n- `run` hard-fails on missing code: `Missing required parameter 'code' for action 'run'.`\n- `open` fails when reusing a name across browser kinds: `Tab \"...\" is bound to a different browser (...). Close it first.`\n- `runInTabWithSnapshot()` fails when the tab is absent/dead (`Tab \"...\" is not alive. Reopen it.`) or already running (`Tab \"...\" is busy`).\n- Worker init failures and run failures are serialized through `RunErrorPayload`; `ToolError` and abort state are reconstructed on the host side by `errorFromPayload()`.\n- Attached-target mismatches surface as:\n - `No page targets available on the attached browser`\n - `No page target matched \"...\". Available pages:\\n...`\n - `Target ... is no longer available on the attached browser`\n- Spawned-app path validation requires an absolute executable path, not an app bundle path.\n- Spawn/attach failures are wrapped into `ToolError`s such as `Timed out waiting for CDP endpoint ...`, `Failed to attach to ...`, or `Connected to ... but puppeteer.connect failed: ...`.\n- `tab` helper errors are user-visible `ToolError`s, including unsupported selector prefix, stale/unknown element id, invalid drag target, missing upload files, non-`<select>` for `tab.select()`, non-file-input for `tab.uploadFile()`, and screenshot selector misses.\n- On run timeout, the worker reports `Browser code execution timed out after <ms>ms`; the supervisor may escalate to `Browser code execution hung past grace; tab killed` if the worker does not respond after the grace window.\n\n## Notes\n- `loadPuppeteer()` and `loadPuppeteerInWorker()` temporarily redirect `cwd` to a safe Puppeteer directory before importing `puppeteer-core`, because Puppeteer probes the current working directory during module load.\n- Headless launch prefers a detected system Chrome/Chromium, then `PUPPETEER_EXECUTABLE_PATH`, and only then downloads Chromium.\n- Headless launch always passes `--no-sandbox`, `--disable-setuid-sandbox`, `--disable-blink-features=AutomationControlled`, and a `--window-size=...` matching the initial viewport. It also ignores Puppeteer default args `--disable-extensions`, `--disable-default-apps`, and `--disable-component-extensions-with-background-pages`.\n- Proxy-related env vars only affect headless launch: `PUPPETEER_PROXY`, `PUPPETEER_PROXY_BYPASS_LOOPBACK`, and `PUPPETEER_PROXY_IGNORE_CERT_ERRORS`.\n- Stealth patches are applied only in headless mode. Spawned or externally connected browsers are intentionally left untouched.\n- `applyStealthPatches()` also strips Puppeteer's `//# sourceURL=__puppeteer_evaluation_script__` suffix from CDP `Runtime.evaluate` / `Runtime.callFunctionOn` payloads.\n- `tab.extract()` reads `page.content()`, runs Readability first, then falls back to `main article`/`article`/`main`/`[role='main']`/`body`, and returns `null` if neither extraction path yields content.\n- `close(all: true, kill: false)` disconnects from spawned/connected browsers when the last tab closes but leaves spawned app processes running.\n- Headless orphan cleanup is best-effort: if a worker dies before closing its page, the supervisor searches browser targets by `targetId` and closes that page.\n- Console methods inside `run` do not appear in tool output; they are forwarded as debug/warn/error logs through the worker transport.",
66
66
  "tools/calc.md": "# calc\n\n> Evaluates one or more arithmetic expressions and returns formatted numeric results.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/calculator.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/calculator.md`\n- Key collaborators:\n - `packages/coding-agent/src/tui.ts` — status lines and tree-list rendering\n - `packages/coding-agent/src/tools/render-utils.ts` — preview limits and formatting helpers\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `calculations` | `Calculation[]` | Yes | Batch of expressions to evaluate in order. |\n\n### `Calculation`\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `expression` | `string` | Yes | Arithmetic expression string. |\n| `prefix` | `string` | Yes | Prepended verbatim to the rendered numeric result. |\n| `suffix` | `string` | Yes | Appended verbatim to the rendered numeric result. |\n\n## Outputs\n- Single-shot result.\n- `content[0].text` is the newline-joined `prefix + value + suffix` string for each calculation.\n- `details.results` is an array of `{ expression, value, output }`.\n- On renderer fallback, if `details` is missing but `content[0].text` exists, the TUI tries to pair each output line with the original expressions from call args.\n\n## Flow\n1. `execute()` wraps evaluation in `untilAborted(...)`.\n2. For each entry, `evaluateExpression(...)` tokenizes the expression, parses it with a recursive-descent parser, rejects non-finite outputs, and normalizes `-0` to `0`.\n3. `tokenizeExpression(...)` accepts whitespace, parentheses, operators, and number literals; any other character throws immediately.\n4. `ExpressionParser` applies precedence in this order: `+ -`, `* / %`, unary `+ -`, exponentiation `**`, parentheses/literals.\n5. Exponentiation is right-associative (`2 ** 3 ** 2` parses as `2 ** (3 ** 2)`).\n6. Each numeric result is formatted with `String(value)` and wrapped with the provided `prefix` and `suffix`.\n7. The tool returns text output plus structured `details`.\n\n## Side Effects\n- Background work / cancellation\n - Supports abort via `untilAborted(...)`.\n- Session state\n - None.\n- Filesystem / Network / Subprocesses\n - None.\n\n## Limits & Caps\n- Supported operators: `+`, `-`, `*`, `/`, `%`, `**` (`packages/coding-agent/src/tools/calculator.ts`).\n- Supported numeric literals:\n - decimal integers/floats, including leading-dot forms like `.5`\n - scientific notation like `1e10`, `2.5E-3`\n - hexadecimal `0x...`\n - binary `0b...`\n - octal `0o...`\n- Results must be finite; `Infinity` and `NaN` are rejected.\n- The renderer collapses long result lists using `PREVIEW_LIMITS.COLLAPSED_ITEMS` from `packages/coding-agent/src/tools/render-utils.ts`.\n\n## Errors\n- Invalid characters: e.g. `Invalid character \"x\" in expression`.\n- Malformed numbers: invalid prefixed literal, invalid exponent, invalid number.\n- Syntax errors: `Unexpected token in expression`, `Unexpected end of expression`, `Missing closing parenthesis`, `Expression is empty`.\n- Non-finite arithmetic: `Expression result is not a finite number`.\n- Any evaluation error aborts the whole batch; the tool does not return partial successes.\n\n## Notes\n- Despite the schema example showing `sqrt(16)`, the parser does not support functions, identifiers, units, or constants; only numeric literals, operators, and parentheses are accepted.\n- Precision is plain JavaScript `number` semantics throughout, including floating-point rounding behavior.\n- `/` and `%` use JavaScript numeric operators directly; there is no integer-only mode or unit handling.\n- Unary operators bind tighter than `*`/`/`/`%` but looser than exponentiation because unary parsing delegates to `#parsePower()`.\n",
67
67
  "tools/checkpoint.md": "# checkpoint\n\n> Mark the current top-level conversation state so later `rewind` can collapse exploratory context into a report.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/checkpoint.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/checkpoint.md`\n- Key collaborators:\n - `packages/coding-agent/src/session/agent-session.ts` — captures the active checkpoint after tool success.\n - `packages/coding-agent/src/session/session-manager.ts` — persists the normal session entry stream; not the active checkpoint marker.\n - `packages/coding-agent/src/tools/index.ts` — registers the tool and gates it behind `checkpoint.enabled`.\n - `packages/coding-agent/src/config/settings-schema.ts` — defines the disabled-by-default feature flag.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `goal` | `string` | Yes | Investigation goal. Required by the TypeBox schema and echoed in the tool result. |\n\n## Outputs\nThe tool returns a single text result plus structured details:\n\n- text body:\n - `Checkpoint created.`\n - `Goal: <goal>`\n - `Run your investigation, then call rewind with a concise report.`\n- `details`:\n - `goal: string`\n - `startedAt: string` — ISO timestamp created inside `CheckpointTool.execute()`\n\nNo checkpoint ID, artifact URI, job handle, file path, or restore token is returned.\n\n## Flow\n1. `CheckpointTool.createIf()` in `packages/coding-agent/src/tools/checkpoint.ts` returns `null` for subagents by checking `session.taskDepth`; only top-level sessions can see the tool.\n2. `CheckpointTool.execute()` rejects subagent calls again with `ToolError(\"Checkpoint not available in subagents.\")`.\n3. It rejects nested checkpoints with `ToolError(\"Checkpoint already active.\")` when `session.getCheckpointState?.()` is already set.\n4. It creates `startedAt = new Date().toISOString()` and returns a normal `toolResult()` payload. The tool itself does not persist anything.\n5. On the later `tool_execution_end` event, `AgentSession` in `packages/coding-agent/src/session/agent-session.ts` detects successful `checkpoint` execution and captures three in-memory fields:\n - `checkpointMessageCount` — current `agent.state.messages.length`, after the checkpoint tool result has already been appended\n - `checkpointEntryId` — `sessionManager.getEntries().at(-1)?.id ?? null`, i.e. the last persisted session entry ID at checkpoint time\n - `startedAt` — copied from tool details or regenerated\n6. `AgentSession` stores that object in its private `#checkpointState` field and clears `#pendingRewindReport`.\n\n## Side Effects\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Sets `AgentSession.#checkpointState` in memory.\n - Records the checkpoint boundary as a message count plus a session entry ID.\n - Enables the later yield guard: if a checkpoint is active and no rewind report is pending, `#enforceRewindBeforeYield()` injects a developer-role warning and schedules another turn.\n- User-visible prompts / interactive UI\n - The tool result tells the model to call `rewind` after the investigation.\n - If the agent tries to `yield` first, `AgentSession` injects:\n\n```text\n<system-warning>\nYou are in an active checkpoint. You MUST call rewind with your investigation findings before yielding. Do NOT yield without completing the checkpoint.\n</system-warning>\n```\n\n## Limits & Caps\n- Availability is gated by `checkpoint.enabled`, default `false`, in `packages/coding-agent/src/config/settings-schema.ts`.\n- The tool is registered as discoverable in `packages/coding-agent/src/tools/index.ts`.\n- Only one active checkpoint is allowed per top-level session.\n- Checkpoint state is not persisted as a dedicated session entry. If the process exits, a resumed session can reload the conversation history, but not the live `#checkpointState` guard.\n- Session persistence still applies to the ordinary checkpoint tool call message. Global session persistence truncation is `MAX_PERSIST_CHARS = 500_000` in `packages/coding-agent/src/session/session-manager.ts`.\n\n## Errors\n- `ToolError(\"Checkpoint not available in subagents.\")` — thrown for subagent sessions.\n- `ToolError(\"Checkpoint already active.\")` — thrown when a prior checkpoint has not been rewound or cleared.\n- The tool body has no local `try/catch`; unexpected exceptions propagate.\n\n## Notes\n- Despite the summary string `Create a git-based checkpoint to save and restore session state`, the implementation does not call git and does not snapshot filesystem state.\n- Captured state is conversation/session metadata only:\n - in-memory message count\n - session entry ID in the session tree\n - timestamp\n- Not captured:\n - working tree contents\n - staged changes\n - artifacts\n - blob-store contents\n - SQLite history rows from `packages/coding-agent/src/session/history-storage.ts`\n - auth or agent records from `packages/coding-agent/src/session/agent-storage.ts`\n- If the turn ends with `stopReason === \"aborted\"` while a checkpoint is active, `AgentSession` clears `#checkpointState` and `#pendingRewindReport` instead of preserving a half-finished checkpoint.\n",
68
68
  "tools/debug.md": "# debug\n\n> Drive one DAP debug session; adjacent debug UI code reuses the same subsystem for logs, raw SSE capture, reports, profiling, and system diagnostics.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/debug.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/debug.md`\n- Key collaborators:\n - `packages/coding-agent/src/dap/session.ts` — session lifecycle, breakpoint/state cache\n - `packages/coding-agent/src/dap/client.ts` — adapter process/socket transport, DAP message loop\n - `packages/coding-agent/src/dap/config.ts` — adapter resolution and auto-selection\n - `packages/coding-agent/src/dap/defaults.json` — built-in adapter definitions\n - `packages/coding-agent/src/dap/types.ts` — request/response/capability shapes\n - `packages/coding-agent/src/tools/tool-timeouts.ts` — per-tool timeout clamp\n - `packages/coding-agent/src/debug/index.ts` — interactive debug selector menu\n - `packages/coding-agent/src/debug/log-viewer.ts` — recent-log TUI viewer\n - `packages/coding-agent/src/debug/raw-sse.ts` — raw SSE TUI viewer\n - `packages/coding-agent/src/debug/raw-sse-buffer.ts` — bounded SSE capture buffer\n - `packages/coding-agent/src/debug/profiler.ts` — CPU/heap profiling helpers\n - `packages/coding-agent/src/debug/report-bundle.ts` — `.tar.gz` report bundling, log source, cache cleanup\n - `packages/coding-agent/src/debug/system-info.ts` — system snapshot collection and env redaction\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `action` | `\"launch\" \\| \"attach\" \\| \"set_breakpoint\" \\| \"remove_breakpoint\" \\| \"set_instruction_breakpoint\" \\| \"remove_instruction_breakpoint\" \\| \"data_breakpoint_info\" \\| \"set_data_breakpoint\" \\| \"remove_data_breakpoint\" \\| \"continue\" \\| \"step_over\" \\| \"step_in\" \\| \"step_out\" \\| \"pause\" \\| \"evaluate\" \\| \"stack_trace\" \\| \"threads\" \\| \"scopes\" \\| \"variables\" \\| \"disassemble\" \\| \"read_memory\" \\| \"write_memory\" \\| \"modules\" \\| \"loaded_sources\" \\| \"custom_request\" \\| \"output\" \\| \"terminate\" \\| \"sessions\"` | Yes | Dispatch key for the tool switch in `packages/coding-agent/src/tools/debug.ts`. |\n| `program` | `string` | No | Launch target path. Required for `launch`. Resolved relative to `cwd` if provided, otherwise session cwd. |\n| `args` | `string[]` | No | Program argv for `launch`. |\n| `adapter` | `string` | No | Explicit adapter name. Otherwise `selectLaunchAdapter()` / `selectAttachAdapter()` auto-pick from `packages/coding-agent/src/dap/config.ts`. |\n| `cwd` | `string` | No | Launch/attach working directory. Defaults to session cwd. |\n| `file` | `string` | No | Source file path for source breakpoints. |\n| `line` | `number` | No | Source line for source breakpoints. |\n| `function` | `string` | No | Function breakpoint name. Mutually exclusive with `file`+`line` in breakpoint actions. |\n| `name` | `string` | No | Data breakpoint info target name. Required for `data_breakpoint_info`. |\n| `condition` | `string` | No | Conditional expression for source/function/instruction/data breakpoints. |\n| `hit_condition` | `string` | No | Hit-count condition for instruction/data breakpoints. |\n| `expression` | `string` | No | Expression or raw debugger command. Required for `evaluate`. |\n| `context` | `string` | No | Evaluate context. Defaults to `\"repl\"`. Passed through as DAP evaluate context. |\n| `frame_id` | `number` | No | Frame selector for `evaluate`, `scopes`, `data_breakpoint_info`. `scopes` and `evaluate` default to the current stopped frame when omitted. |\n| `scope_id` | `number` | No | Variables reference from a scope. Accepted by `variables`; also used as a fallback variables reference for `data_breakpoint_info`. |\n| `variable_ref` | `number` | No | Variables reference for `variables`; preferred over `scope_id` when both are present. |\n| `pid` | `number` | No | Local process id for `attach`. `attach` requires `pid` or `port`. |\n| `port` | `number` | No | Remote attach port. If no adapter is forced, attach prefers `debugpy` when `port` is present. |\n| `host` | `string` | No | Remote attach host for `attach`. |\n| `levels` | `number` | No | Max stack frames for `stack_trace`. |\n| `memory_reference` | `string` | No | Memory reference/address for `disassemble`, `read_memory`, `write_memory`. `disassemble` also accepts it via `instruction_reference` fallback logic in `resolveDisassemblyReference()`. |\n| `instruction_reference` | `string` | No | Instruction breakpoint reference; required for instruction breakpoint actions. |\n| `instruction_count` | `number` | No | Required for `disassemble`. |\n| `instruction_offset` | `number` | No | Instruction offset for `disassemble`. |\n| `count` | `number` | No | Byte count for `read_memory`. Required there. |\n| `data` | `string` | No | Base64 payload for `write_memory`. Required there. |\n| `data_id` | `string` | No | Data breakpoint id. Required for `set_data_breakpoint` / `remove_data_breakpoint`. |\n| `access_type` | `\"read\" \\| \"write\" \\| \"readWrite\"` | No | Access filter for `set_data_breakpoint`. |\n| `command` | `string` | No | Custom DAP request command. Required for `custom_request`. |\n| `arguments` | `Record<string, unknown>` | No | Custom DAP request body for `custom_request`. |\n| `offset` | `number` | No | Offset for instruction breakpoints, disassembly, memory read, memory write. |\n| `resolve_symbols` | `boolean` | No | `disassemble` symbol-resolution flag. |\n| `allow_partial` | `boolean` | No | `write_memory` partial-write allowance. |\n| `start_module` | `number` | No | Modules pagination start index for `modules`. |\n| `module_count` | `number` | No | Modules pagination count for `modules`. |\n| `timeout` | `number` | No | Per-request timeout in seconds. Default `30`, clamped to `5..300`. |\n\n### Action-specific requirements\n- `launch`: `program`\n- `attach`: `pid` or `port`\n- `set_breakpoint` / `remove_breakpoint`: `function`, or `file` + `line`\n- `set_instruction_breakpoint` / `remove_instruction_breakpoint`: `instruction_reference`\n- `data_breakpoint_info`: `name`\n- `set_data_breakpoint` / `remove_data_breakpoint`: `data_id`\n- `evaluate`: `expression`\n- `variables`: `variable_ref` or `scope_id`\n- `disassemble`: capability `supportsDisassembleRequest`, plus `instruction_count`\n- `read_memory`: capability `supportsReadMemoryRequest`, plus `memory_reference` and `count`\n- `write_memory`: capability `supportsWriteMemoryRequest`, plus `memory_reference` and `data`\n- `modules`: capability `supportsModulesRequest`\n- `loaded_sources`: capability `supportsLoadedSourcesRequest`\n- `custom_request`: `command`\n\n### Interactive selector values\n`packages/coding-agent/src/debug/index.ts` also exposes a fixed UI-only selector with values `open-artifacts`, `performance`, `work`, `dump`, `memory`, `logs`, `system`, `raw-sse`, `transcript`, `clear-cache`. These are not model-callable through `debugSchema`; they are local TUI menu routes.\n\n## Outputs\nThe agent tool returns a standard `toolResult()` payload from `packages/coding-agent/src/tools/debug.ts`:\n- `content`: one text block. Every action renders human-readable text; there is no structured JSON block in `content`.\n- `details.action`: echoed action.\n- `details.success`: always initialized `true`; failures surface by throwing before a result is returned.\n- `details.snapshot`: present for actions that operate on or create a session, using `DapSessionSummary` from `packages/coding-agent/src/dap/types.ts`.\n- Action-specific `details` fields:\n - `launch` / `attach`: `adapter`\n - breakpoint actions: `breakpoints`, `functionBreakpoints`, `instructionBreakpoints`, `dataBreakpoints`\n - `data_breakpoint_info`: `dataBreakpointInfo`\n - `continue` / `step_*`: `state`, `timedOut`\n - `threads`: `threads`\n - `stack_trace`: `stackFrames`\n - `scopes`: `scopes`\n - `variables`: `variables`\n - `evaluate`: `evaluation`\n - `disassemble`: `disassembly`\n - `read_memory`: `memoryAddress`, `memoryData`, `unreadableBytes`\n - `write_memory`: `bytesWritten`\n - `modules`: `modules`\n - `loaded_sources`: `sources`\n - `custom_request`: `customBody`\n - `output`: `output`\n - `sessions`: `sessions`\n\nStreaming/UI behavior:\n- The tool renderer merges call and result (`mergeCallAndResult: true`) and renders inline.\n- `debug.ts` itself does not emit progress updates through `_onUpdate`; result delivery is single-shot.\n- The interactive selector is UI-driven instead of model-driven. It swaps TUI components, appends status lines to the chat pane, opens files in external viewers, or writes archives/temp files.\n\nSide-channel artifacts outside the model tool result:\n- `createReportBundle()` writes `omp-report-<timestamp>.tar.gz` under the reports dir and returns the filesystem path to the UI handler.\n- `#handleWorkReport()` writes `/tmp/work-profile-<Date.now()>.svg` before opening it.\n- `RawSseViewerComponent` and `DebugLogViewerComponent` can copy captured text to the clipboard.\n\n## Flow\n1. Tool registration is conditional: `DebugTool.createIf()` in `packages/coding-agent/src/tools/debug.ts` returns `null` unless `session.settings.get(\"debug.enabled\")` is true. `packages/coding-agent/src/tools/index.ts` wires the factory and rechecks the same setting in tool filtering.\n2. `DebugTool.execute()` clamps `params.timeout` through `clampTimeout(\"debug\", params.timeout)` and composes the caller `AbortSignal` with `AbortSignal.timeout(...)`.\n3. `launch` and `attach` resolve cwd/program paths, select an adapter in `packages/coding-agent/src/dap/config.ts`, then delegate to `dapSessionManager.launch()` / `.attach()`.\n4. `DapSessionManager.launch()` / `.attach()` enforce the single-session rule with `#ensureLaunchSlot()`, spawn the adapter through `DapClient.spawn()`, register listeners, send `initialize`, cache capabilities, start listening for an initial stop event before sending `launch`/`attach`, then complete the `initialized` → `configurationDone` handshake in `#completeConfigurationHandshake()`.\n5. `DapClient.spawn()` starts the adapter detached with `NON_INTERACTIVE_ENV`. Most adapters use stdio; socket-mode adapters (`dlv`) use `#spawnSocketUnix()` on Linux or `#spawnSocketClientAddr()` on macOS/other.\n6. `#registerSession()` in `packages/coding-agent/src/dap/session.ts` installs reverse-request handlers:\n - `runInTerminal`: spawns the requested debuggee command detached via `ptree.spawn()` and returns `{ processId }`\n - `startDebugging`: logs the child-session request and returns `{}`; it does not create nested sessions\n - events: `output`, `initialized`, `stopped`, `continued`, `exited`, `terminated` update cached session state\n7. Operational actions (`set_breakpoint`, `evaluate`, `threads`, `read_memory`, `custom_request`, and similar) call `dapSessionManager` methods. Most flow through `#sendRequestWithConfig()`, which first sends `configurationDone` when required, then sends the DAP request, then updates `lastUsedAt`.\n8. Breakpoint actions maintain local cached breakpoint sets in `DapSessionManager` and remap adapter responses back onto those cached records.\n9. `continue` and the three step actions clear cached stop state, subscribe for `stopped`/`terminated`/`exited` before sending the DAP request, then `#awaitStopOutcome()` either returns the new stopped location or reports that the program is still running after timeout.\n10. `pause` sends DAP `pause`, waits for a stopped event if needed, and reuses cached stop state if the program was already stopped.\n11. `stack_trace`, `scopes`, `variables`, and `evaluate` default to the current stopped thread/frame when the caller omits ids and cached state is available.\n12. `output` reads the in-memory output ring from `DapSessionManager.getOutput()`. `terminate` sends `terminate` when supported, always attempts `disconnect`, marks the session terminated, and disposes the client.\n13. `sessions` reads the manager’s current map and formats all summaries. Although the manager stores a map, only one active session can exist because new launch/attach calls are blocked until the active one is terminated or cleaned up.\n14. The interactive selector in `packages/coding-agent/src/debug/index.ts` builds a `SelectList` of fixed values and dispatches each to a handler:\n - `performance`: `startCpuProfile()`, wait for Enter/Escape, stop profiling, read a 30-second work profile with `getWorkProfile(30)`, then bundle via `createReportBundle()`\n - `work`: read `getWorkProfile(30)`, write a temp SVG, open it externally\n - `dump`: create a report bundle immediately\n - `memory`: force GC, call `Bun.generateHeapSnapshot(\"v8\")`, then bundle\n - `logs`: build a `DebugLogSource` and mount `DebugLogViewerComponent`\n - `raw-sse`: resolve a `RawSseDebugBuffer` from the session and mount `RawSseViewerComponent`\n - `system`: call `collectSystemInfo()` and render `formatSystemInfo()` into the chat pane\n - `open-artifacts`: open the current session artifact directory if it exists\n - `transcript`: delegates to `ctx.handleDebugTranscriptCommand()`\n - `clear-cache`: show confirmation, then remove artifact directories older than 30 days with `clearArtifactCache()`\n\n## Modes / Variants\n- **Availability gate**\n - Tool hidden when `debug.enabled` is false.\n- **Adapter selection**\n - `launch`: explicit `adapter` wins; otherwise `selectLaunchAdapter()` ranks available adapters by extension match, root-marker match, then native-debugger preference (`gdb`, `lldb-dap`) for extensionless binaries.\n - `attach`: explicit `adapter` wins; otherwise remote `port` prefers `debugpy`, then native debuggers, then first available adapter.\n- **Transport**\n - stdio adapters: direct `stdin`/`stdout` framing.\n - socket adapters: Unix domain socket on Linux; TCP callback on macOS/other.\n- **DAP agent-tool actions**\n - `launch` — spawn adapter, initialize session, maybe stop on entry; returns formatted session snapshot and `details.adapter`.\n - `attach` — connect to a live process or remote port; same output shape as `launch`.\n - `set_breakpoint` — source or function breakpoint add/update; returns the current breakpoint list for that target.\n - `remove_breakpoint` — source or function breakpoint removal; returns the remaining breakpoint list.\n - `set_instruction_breakpoint` / `remove_instruction_breakpoint` — require `supportsInstructionBreakpoints`; return current instruction breakpoint list.\n - `data_breakpoint_info` — require `supportsDataBreakpoints`; asks the adapter for a `dataId`, access types, and description for `name`.\n - `set_data_breakpoint` / `remove_data_breakpoint` — require `supportsDataBreakpoints`; return the cached data-breakpoint list.\n - `continue` / `step_over` / `step_in` / `step_out` — return text describing whether execution stopped, terminated, or kept running, plus `details.state` and `details.timedOut`.\n - `pause` — interrupts a running target and returns a stopped snapshot.\n - `evaluate` — adapter expression evaluation; defaults context to `repl`.\n - `stack_trace` — fetches frames for the resolved thread.\n - `threads` — fetches current threads.\n - `scopes` — frame scopes for an explicit `frame_id` or the current stopped frame.\n - `variables` — variables for `variable_ref` or `scope_id`.\n - `disassemble` — require `supportsDisassembleRequest`; disassembles around a memory reference.\n - `read_memory` — require `supportsReadMemoryRequest`; returns address, base64 data, unreadable-byte count.\n - `write_memory` — require `supportsWriteMemoryRequest`; writes base64 data and reports bytes written.\n - `modules` — require `supportsModulesRequest`; optional pagination via `start_module` / `module_count`.\n - `loaded_sources` — require `supportsLoadedSourcesRequest`; returns loaded source descriptors.\n - `custom_request` — sends any DAP request name with arbitrary arguments.\n - `output` — dumps captured stdout/stderr/console text from the session cache.\n - `terminate` — disconnects and disposes the active session; returns `No debug session to terminate.` when none exists.\n - `sessions` — lists all cached session summaries.\n- **Interactive selector routes (UI-only)**\n - `logs` — loads today’s log tail and optional older daily log files into `DebugLogViewerComponent`; supports copy, range selection, pid filtering, load-older.\n - `raw-sse` — live view over the session’s `RawSseDebugBuffer`; supports tail-follow, scrolling, copy-all.\n - `performance` — CPU profile + 30-second work profile + report bundle.\n - `memory` — heap snapshot + report bundle.\n - `dump` — report bundle without profiler artifacts.\n - `work` — standalone work-profile flamegraph export/open.\n - `system` — formatted OS/arch/CPU/memory/version/cwd/shell/terminal dump.\n - `open-artifacts` / `transcript` / `clear-cache` — artifact directory open, transcript export, artifact-cache pruning.\n\n## Side Effects\n- Filesystem\n - Resolves program/file/cwd paths against the session cwd.\n - Report creation writes `.tar.gz` bundles and may read the session JSONL, artifact files, subagent session JSONLs, and log files.\n - Work-profile export writes `/tmp/work-profile-<timestamp>.svg`.\n - Log source reads daily log files from the logs dir.\n - Artifact-cache cleanup removes session artifact directories older than the cutoff.\n - `resolveRawSseDebugBuffer()` may attach a non-enumerable `rawSseDebugBuffer` property to the owner object.\n- Network\n - Socket-mode adapters bind/connect local sockets.\n - Remote attach may connect through the adapter to a remote debug port.\n- Subprocesses / native bindings\n - Spawns debugger adapters (`gdb`, `lldb-dap`, `python -m debugpy.adapter`, `dlv`, and others from `defaults.json`) detached.\n - Reverse DAP `runInTerminal` requests spawn the debuggee detached via `ptree.spawn()`.\n - `getWorkProfile(30)` comes from `@oh-my-pi/pi-natives`.\n - CPU profiling uses `node:inspector/promises`; heap snapshots use `Bun.generateHeapSnapshot(\"v8\")`; raw/log viewers sanitize text via `@oh-my-pi/pi-natives`.\n - `openPath()` launches the OS default file/browser handler for artifact dirs and SVGs.\n - Log/raw-SSE viewers can call `copyToClipboard()`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - `DapSessionManager` keeps session summaries, breakpoints, threads, stack frames, stop location, output capture, capabilities, and last-used timestamps in memory.\n - Active-session id is global to the singleton `dapSessionManager`.\n - `RawSseDebugBuffer` stores recent SSE events per owner/session.\n - The tool is `exclusive`; concurrent debug tool calls are blocked by the scheduler.\n- User-visible prompts / interactive UI\n - Debug selector shows confirmation before cache deletion.\n - Performance profiling temporarily hijacks editor Enter/Escape handlers until profiling stops.\n - Log/raw-SSE viewers replace the editor pane with custom components.\n- Background work / cancellation\n - Every DAP request accepts an `AbortSignal`; timeouts and caller cancellation abort the active request, not the whole session lifetime.\n - `DapSessionManager` runs a background cleanup loop every 30 seconds.\n - Raw SSE viewers subscribe to buffer updates until closed.\n\n## Limits & Caps\n- Tool timeout clamp: `default=30`, `min=5`, `max=300` in `packages/coding-agent/src/tools/tool-timeouts.ts`.\n- Per-request DAP default timeout: `DEFAULT_REQUEST_TIMEOUT_MS = 30_000` in `packages/coding-agent/src/dap/client.ts`.\n- Single active session: enforced by `#ensureLaunchSlot()` in `packages/coding-agent/src/dap/session.ts`.\n- Idle session cleanup: `IDLE_TIMEOUT_MS = 10 * 60 * 1000`, checked every `CLEANUP_INTERVAL_MS = 30 * 1000`.\n- Adapter liveness heartbeat: `HEARTBEAT_INTERVAL_MS = 5 * 1000`.\n- Output capture cap: `MAX_OUTPUT_BYTES = 128 * 1024`; older text is trimmed in ~1 KiB slices and `outputTruncated` is recorded.\n- Initial stop capture timeout after launch/attach: `STOP_CAPTURE_TIMEOUT_MS = 5_000`.\n- Socket-mode adapter readiness timeout: `10_000` ms in `waitForCondition()` and TCP connect timeout logic in `packages/coding-agent/src/dap/client.ts`.\n- Raw SSE buffer caps in `packages/coding-agent/src/debug/raw-sse-buffer.ts`:\n - `MAX_RAW_SSE_EVENTS = 1_000`\n - `MAX_RAW_SSE_CHARS = 512_000`\n - `MAX_RAW_SSE_EVENT_CHARS = 64_000` per event, with `: omp-debug-truncated ...` marker appended on trim\n- Log viewer window in `packages/coding-agent/src/debug/log-viewer.ts`:\n - `INITIAL_LOG_CHUNK = 50`\n - `LOAD_OLDER_CHUNK = 50`\n- Report/log ingestion caps in `packages/coding-agent/src/debug/report-bundle.ts`:\n - `MAX_LOG_LINES = 5000` for interactive log reading\n - `MAX_LOG_BYTES = 2 * 1024 * 1024` tail-read ceiling\n - report bundles include only the last `1000` log lines\n - subagent session inclusion is capped at the most recent `10` JSONL files\n- Interactive profiling windows in `packages/coding-agent/src/debug/index.ts`: both performance and work reports request `getWorkProfile(30)`.\n- Artifact cache pruning default: `30` days in `clearArtifactCache()` and the selector confirmation text.\n\n## Errors\n- Parameter validation in `packages/coding-agent/src/tools/debug.ts` throws `ToolError` with explicit messages such as:\n - `program is required for launch`\n - `attach requires pid or port`\n - `set_breakpoint requires file+line or function`\n - `variables requires variable_ref or scope_id`\n - `memory_reference is required for read_memory`\n - `count is required for read_memory`\n - `data is required for write_memory`\n - `command is required for custom_request`\n- Adapter selection failure throws `No debugger adapter available. Installed adapters: ...`.\n- Capability-gated actions throw from `requireCapability(...)`, e.g. `Active adapter does not support memory reads.`\n- No-session and state errors come from `DapSessionManager`, e.g. `No active debug session. Launch or attach first.`, `No active stack frame. Run stack_trace first or supply frame_id.`, `Debugger reported no threads.`\n- Launching a second live session throws `Debug session <id> is still active. Terminate it before launching another.`\n- DAP transport/request failures surface as thrown errors from `DapClient`:\n - `DAP request <command> timed out after <ms>ms`\n - `DAP event <event> timed out after <ms>ms`\n - `DAP adapter <name> is not running`\n - `DAP adapter exited (code N): <stderr>` or `DAP adapter exited unexpectedly (code N)`\n - adapter response `message` when a DAP request fails\n- `continue` / `step_*` are intentionally non-fatal when the target stays running past the timeout: they return `details.timedOut = true` and `state: \"running\"` instead of throwing.\n- `terminate` suppresses adapter errors while sending `terminate`/`disconnect`; it still disposes the client and returns the last summary when possible.\n- Interactive selector handlers report UI errors instead of throwing:\n - profiler start/stop, report bundling, log reading, system-info collection, cache clearing, and artifact opening use `ctx.showError(...)` / `ctx.showWarning(...)`\n - empty logs and empty artifact caches are warnings/status messages, not failures\n - copy failures in log/raw-SSE viewers become status/error text in the UI\n- Report-bundle helpers are intentionally best-effort for many file reads: missing session files, missing artifact dirs, unreadable artifact files, missing log dirs, inaccessible cache dirs, and missing subagent files are skipped silently.\n- `collectSystemInfo()` is best-effort for CPU probing; failure there falls back to `Unknown CPU`.\n\n## Notes\n- `packages/coding-agent/src/prompts/tools/debug.md` tells the model only one active session is supported; that is not advisory, it is enforced in code.\n- `configurationDone` is sent automatically both during launch/attach handshake and lazily before later requests if the adapter required it and the initial handshake did not complete.\n- `startDebugging` reverse requests are acknowledged but not implemented; child debug sessions are not spawned.\n- `output` exposes the merged `output` event stream only; the tool does not distinguish stdout, stderr, and console categories.\n- Session summaries expose `needsConfigurationDone`; this is derived from adapter capabilities and whether `configurationDone` has been sent.\n- Source breakpoint file paths are normalized with `path.resolve()` before caching and sending to the adapter.\n- `evaluate` defaults to `repl`, so the tool can forward raw debugger commands when the adapter supports them.\n- `disassemble` resolves its target from `memory_reference` first, then `instruction_reference`; it throws if neither is present.\n- `RawSseDebugBuffer.recordEvent()` increments `totalEvents` before bounded retention. A snapshot can therefore show fewer retained records than total observed events.\n- Raw SSE buffer listener failures are swallowed so viewer bugs do not break capture.\n- `createDebugLogSource()` walks daily log files newest-first, but `loadOlderLogs()` reverses each requested slice before concatenation so older chunks prepend in chronological order.\n- `clearArtifactCache()` deletes directories by directory mtime, not per-file age.\n- `addDirectoryToArchive()` reads artifact files as text with `Bun.file(...).text()`. Binary artifact contents are not preserved byte-for-byte in the report bundle.\n- The tool renderer truncates displayed output for the TUI preview, but the underlying text result still contains the full returned string.\n",
69
69
  "tools/edit.md": "# edit\n\n> Applies source edits; default mode is the hashline patch language consumed from a single `input` string.\n\n## Source\n- Entry: `packages/coding-agent/src/edit/index.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/hashline.md`\n- Key collaborators:\n - `packages/coding-agent/src/utils/edit-mode.ts` — selects active edit mode\n - `packages/coding-agent/src/hashline/grammar.lark` — custom-tool grammar for hashline mode\n - `packages/coding-agent/src/hashline/input.ts` — splits `@@ PATH` sections (legacy single-`@` headers are still accepted)\n - `packages/coding-agent/src/hashline/parser.ts` — parses ops and payload lines\n - `packages/coding-agent/src/hashline/apply.ts` — validates anchors and applies edits\n - `packages/coding-agent/src/hashline/anchors.ts` — stale-anchor mismatch formatting\n - `packages/coding-agent/src/hashline/recovery.ts` — cache-based stale-anchor recovery\n - `packages/coding-agent/src/hashline/hash.ts` — computes `LINEhh|` anchors shared with `read`/`search`\n - `packages/coding-agent/src/edit/file-read-cache.ts` — per-session read snapshot cache\n - `packages/coding-agent/src/tools/read.ts` — emits anchored lines and records read snapshots\n - `packages/coding-agent/src/tools/search.ts` — records sparse snapshots from matches/context\n - `packages/coding-agent/src/tools/fs-cache-invalidation.ts` — invalidates FS scan caches after writes\n - `packages/coding-agent/src/edit/streaming.ts` — computes in-flight diff previews for the TUI\n\n## Inputs\n\n### Hashline mode (default)\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `input` | `string` | Yes | One or more edit sections. First non-blank line must be `@@ PATH` (legacy single-`@` is still accepted) unless the caller supplies the legacy fallback `path` outside the model schema and the body already looks like hashline ops (`packages/coding-agent/src/hashline/input.ts`). Optional `*** Begin Patch` / `*** End Patch` envelope is ignored if present. |\n\nPatch language inside `input`:\n\n- Section header: `@@ PATH`\n- Insert after: `+ ANCHOR`\n- Insert before: `< ANCHOR`\n- Delete range: `- A..B`\n- Replace range: `= A..B`\n- Payload line: `~TEXT` by default; separator is `HL_EDIT_SEP` and can be overridden once at process start by `PI_HL_SEP` (`packages/coding-agent/src/hashline/hash.ts`)\n- Special anchors: `BOF`, `EOF`\n- Anchor token: `<line><2-char-hash>`, for example `41th`\n\nAnchors come from `read`/`search` output. `read` formats lines as `LINEhh|TEXT` via `formatHashLine` / `formatHashLines` in `packages/coding-agent/src/hashline/hash.ts`; copy only the token left of `|` into op lines.\n\nOther edit modes exist (`replace`, `patch`, `vim`, `apply_patch`) and are selected outside the tool payload by `resolveEditMode()` in `packages/coding-agent/src/utils/edit-mode.ts`. Their schemas are different; this document covers the default hashline mode.\n\n## Outputs\n- Single-shot tool result; hashline mode does not use a `resolve` preview/apply handshake.\n- `content` contains one text block per call. For a successful single-file edit it is either:\n - `<path>:` plus a compact diff preview from `packages/coding-agent/src/hashline/diff-preview.ts`, or\n - `Updated <path>` / `Created <path>` when no compact preview text is emitted.\n- Parse or recovery warnings are appended as:\n\n```text\nWarnings:\n...\n```\n\n- `details` is `EditToolDetails` from `packages/coding-agent/src/edit/renderer.ts`:\n - `diff`: unified diff string\n - `firstChangedLine`: first changed post-edit line\n - `diagnostics`: LSP/format result if available\n - `op`: `\"create\"` or `\"update\"` for hashline mode\n - `meta`: output metadata\n - `perFileResults`: present for multi-section input\n- Multi-section input returns one aggregated result with combined text and per-file details.\n- While the model is still typing arguments, the TUI can compute a diff preview with `packages/coding-agent/src/edit/streaming.ts`; that preview is not a deferred action and does not block execution.\n\n## Flow\n1. `EditTool.execute()` in `packages/coding-agent/src/edit/index.ts` resolves the active mode. Default is `hashline`; `customFormat` exposes `packages/coding-agent/src/hashline/grammar.lark` with `$HFMT$` / `$HSEP$` placeholders filled from `packages/coding-agent/src/hashline/hash.ts`.\n2. `executeHashlineSingle()` in `packages/coding-agent/src/hashline/execute.ts` splits the raw `input` into `@PATH` sections with `splitHashlineInputs()`.\n3. If multiple sections target the same path, `mergeSamePathSections()` concatenates them before execution so every op still refers to the original file snapshot.\n4. Multi-section calls run a preflight pass (`preflightHashlineSection()`): parse ops, enforce plan-mode write rules, load the current file, reject anchor-scoped edits against missing files, reject auto-generated files, apply edits in memory, and fail if the result is a no-op. This prevents partial batches.\n5. `parseHashlineWithWarnings()` in `packages/coding-agent/src/hashline/parser.ts` tokenizes the diff body:\n - ignores blank lines and optional `*** Begin Patch`\n - stops at `*** End Patch`\n - stops at `*** Abort` and emits `ABORT_WARNING`\n - turns `+` / `<` payload runs into one `insert` edit per payload line\n - turns `- A..B` into one `delete` edit per line in the range\n - turns `= A..B` into inserts before `A`, then deletes for `A..B`; no payload means replace with a single empty line\n6. `applyHashlineEdits()` in `packages/coding-agent/src/hashline/apply.ts` validates every referenced anchor before mutating anything. Each anchor hash is recomputed from current file content with `computeLineHash()`.\n7. If any anchor hash differs, `applyHashlineEdits()` throws `HashlineMismatchError`. `execute.ts` catches only that class and calls `tryRecoverHashlineWithCache()`.\n8. Recovery replays the edits against the most recent cached read/search snapshot for that path (`packages/coding-agent/src/edit/file-read-cache.ts`), then 3-way merges the result onto current disk content using `Diff.applyPatch(..., { fuzzFactor: 3 })` in `packages/coding-agent/src/hashline/recovery.ts`. On success the edit proceeds with a warning; on failure the original mismatch error is re-thrown.\n9. Before splicing lines, `absorbReplacementBoundaryDuplicates()` normalizes some malformed-but-recoverable ranges:\n - duplicate prefix/suffix lines adjacent to a replacement can be absorbed by widening the delete range\n - pure inserts can auto-drop duplicated leading/trailing payload lines when `edit.hashlineAutoDropPureInsertDuplicates` is enabled\n - all such fixes append warnings\n10. `after_anchor` inserts are normalized to `before_anchor` of the next line, or `EOF` if the anchor was the last line.\n11. Anchor-targeted edits are bucketed by target line and applied bottom-up so earlier splices do not invalidate later original line numbers. `BOF` and `EOF` inserts are applied after that.\n12. The edited text is restored to the original BOM and line ending style with helpers from `packages/coding-agent/src/edit/normalize.ts` and persisted via `serializeEditFileText()` in `packages/coding-agent/src/edit/read-file.ts`.\n13. The writethrough callback from `createLspWritethrough()` may format the file and fetch diagnostics. Late diagnostics are queued back into session state as a hidden deferred message by `EditTool.#injectLateDiagnostics()` in `packages/coding-agent/src/edit/index.ts`.\n14. `invalidateFsScanAfterWrite()` calls `invalidateFsScanCache(path)` so filesystem-backed tools do not serve stale scan results.\n15. The session file-read cache is refreshed with the post-edit file text via `recordContiguous()`, making the just-written content the new recovery base for subsequent stale-anchor merges.\n16. The final response is built from a unified diff (`generateDiffString()`), a compact preview, and any accumulated warnings.\n\n## Modes / Variants\n- `hashline` — default mode; line-anchored patch language described here (`packages/coding-agent/src/utils/edit-mode.ts`).\n- `replace` — exact/fuzzy old/new text replacement (`packages/coding-agent/src/edit/modes/replace.ts`).\n- `patch` — structured JSON diff-hunk mode (`packages/coding-agent/src/edit/modes/patch.ts`).\n- `apply_patch` — freeform Codex-style `*** Begin Patch` envelope, internally expanded into patch-mode entries (`packages/coding-agent/src/edit/modes/apply-patch.ts`).\n- `vim` — persistent modal editing buffer (`packages/coding-agent/src/tools/vim.ts`).\n\nHashline op examples:\n\n```text\n@@ src/a.ts\n+ 4fb\n~const added = true;\n```\n\n```text\n@@ src/a.ts\n< 4fb\n~const addedBefore = true;\n```\n\n```text\n@@ src/a.ts\n- 4fb..6qx\n```\n\n```text\n@@ src/a.ts\n= 4fb..5dm\n~const clean = (name || DEF).trim();\n~return clean.length === 0 ? DEF : clean.toUpperCase();\n```\n\nBOF/EOF examples:\n\n```text\n@@ src/a.ts\n+ BOF\n~const HEADER = true;\n```\n\n```text\n@@ src/a.ts\n+ EOF\n~export const done = true;\n```\n\n## Side Effects\n- Filesystem\n - Reads target files with `readEditFileText()`.\n - Writes full updated file contents with `serializeEditFileText()`.\n - Preserves BOM and original line-ending style.\n- Subprocesses / native bindings\n - `createLspWritethrough()` may trigger formatter / diagnostics work through the LSP subsystem.\n - `invalidateFsScanAfterWrite()` calls native `invalidateFsScanCache()` from `@oh-my-pi/pi-natives`.\n- Session state\n - Reads and updates the per-session `FileReadCache` used for stale-anchor recovery.\n - Stores pending deferred-diagnostics abort controllers per path inside `EditTool`.\n - Queues late diagnostics back into the session transcript as a hidden custom message.\n- Background work / cancellation\n - A new edit to the same path aborts the prior deferred diagnostics fetch for that path (`packages/coding-agent/src/edit/index.ts`).\n - The tool itself is marked `nonAbortable = true` and `concurrency = \"exclusive\"` in `packages/coding-agent/src/edit/index.ts`.\n\n## Limits & Caps\n- Default mode is `hashline` (`DEFAULT_EDIT_MODE`) in `packages/coding-agent/src/utils/edit-mode.ts`.\n- Anchor hashes are always 2 lowercase letters from a stable 647-entry bigram table (`HL_BIGRAMS_COUNT`) in `packages/coding-agent/src/hashline/hash.ts`.\n- The visible mismatch report shows 2 lines of context on each side (`MISMATCH_CONTEXT`) in `packages/coding-agent/src/hashline/constants.ts`.\n- Stale-anchor recovery uses `fuzzFactor: 3` (`HASHLINE_RECOVERY_FUZZ_FACTOR`) in `packages/coding-agent/src/hashline/recovery.ts`.\n- The per-session read cache keeps at most 30 paths (`MAX_PATHS_PER_SESSION`) in `packages/coding-agent/src/edit/file-read-cache.ts`.\n- Hashline streaming chunk defaults are 200 lines or 64 KiB per chunk (`packages/coding-agent/src/hashline/types.ts`, consumed by `packages/coding-agent/src/hashline/stream.ts`).\n- `HL_EDIT_SEP` defaults to `~`; `HL_BODY_SEP` is always `|` (`packages/coding-agent/src/hashline/hash.ts`).\n\n## Errors\n- Missing section header:\n - `input must begin with \"@@ PATH\" on the first non-blank line; got: ... Example: \"@@ src/foo.ts\" then edit ops.`\n- Empty header:\n - `Input header \"@\" is empty; provide a file path.`\n- Bad anchor token:\n - `line N: expected a full anchor such as \"119sr\"; got \"...\".`\n- Bad range syntax:\n - `line N: explicit ranges are required for delete/replace...`\n - `line N: range must include exactly two full anchors separated by \"..\".`\n - `line N: range A..B ends before it starts.`\n - `line N: range A..B uses two different hashes for the same line.`\n- Missing payload for `+` / `<`:\n - `line N: + and < operations require at least one ~TEXT payload line.`\n- Stray payload line:\n - `line N: payload line has no preceding +, <, or = operation.`\n- Unknown op:\n - `line N: unrecognized op. Use < ANCHOR..., + ANCHOR..., - A..B..., = A..B...`\n- Missing file for anchor-scoped edits:\n - `File not found: <path>`\n- Out-of-range anchor:\n - `Line N does not exist (file has M lines)`\n- Stale anchors throw `HashlineMismatchError`. The error message contains re-read guidance and reprints nearby current file lines as `LINEhh|TEXT`; mismatched lines are marked `*`. `displayMessage` renders the same information in a code-frame style.\n- No-op edit:\n - `Edits to <path> resulted in no changes being made.`\n- Recovery failure is silent internally: if cache-based merge cannot prove a valid result, the original mismatch error is surfaced unchanged.\n\n## Notes\n- `read` and `search` are the authoritative source of anchors. The edit parser does not want the trailing `|TEXT`; copy only the `LINEhh` token.\n- Multi-op patches are parsed against the original file snapshot. Do not renumber later anchors after earlier ops; `applyHashlineEdits()` buckets and applies them bottom-up.\n- `= A..B` is not a primitive replace in the parser. It expands to inserts before `A` plus deletes for `A..B`, which is why stale-anchor checking still happens on the original range lines.\n- Interior lines of a multi-line range use hash `**` (`RANGE_INTERIOR_HASH`) and are not individually verified; only the first and last anchor hashes are checked.\n- `computeLineHash()` trims trailing whitespace before hashing. Anchors survive line-ending changes and trailing-space-only changes, but not substantive line edits.\n- For punctuation-only lines, the hash mixes in the line number; identical `}` lines on different lines intentionally get different anchors.\n- `splitHashlineInputs()` normalizes absolute `@@ PATH` headers back to a cwd-relative path when the file is inside the current working tree. Headers with any run of leading `@` chars (e.g. `@ foo.ts`, `@@ foo.ts`, `@@@foo.ts`) are accepted to absorb unified-diff-style drift; the canonical form is `@@ PATH`.\n- Optional `*** Begin Patch` / `*** End Patch` markers are accepted in hashline mode, but the file sections are still `@@ PATH`-based, not Codex `*** Update File:` hunks.\n- `*** Abort` terminates parsing early and returns `ABORT_WARNING`; ops parsed before the marker still apply.\n- File-read cache invalidation is conflict-based, not write-through invalidation. If `read` later records content for a line that disagrees with the cached snapshot, the entire snapshot for that path is replaced with the newly observed lines (`packages/coding-agent/src/edit/file-read-cache.ts`).\n- There is no resolve-style apply/discard phase for hashline edits. The only preview path is the transient TUI diff preview in `packages/coding-agent/src/edit/streaming.ts`.\n",
70
- "tools/eval.md": "# eval\n\n> Execute Python or JavaScript code in persistent cell-based runtimes.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/eval.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/eval.md`\n- Key collaborators:\n - `packages/coding-agent/src/eval/parse.ts` — lenient cell parser\n - `packages/coding-agent/src/eval/sniff.ts` — language sniffing heuristics\n - `packages/coding-agent/src/eval/backend.ts` — backend execution contract\n - `packages/coding-agent/src/eval/js/index.ts` — JS backend adapter\n - `packages/coding-agent/src/eval/js/executor.ts` — JS execution + output sink\n - `packages/coding-agent/src/eval/js/context-manager.ts` — persistent VM contexts, prelude, tool bridge\n - `packages/coding-agent/src/eval/js/prelude.txt` — JS global helpers\n - `packages/coding-agent/src/eval/py/index.ts` — Python backend adapter\n - `packages/coding-agent/src/eval/py/executor.ts` — kernel session retention, reset, cleanup\n - `packages/coding-agent/src/eval/py/kernel.ts` — Jupyter gateway/kernel protocol, display capture\n - `packages/coding-agent/src/eval/py/prelude.py` — Python helper functions and status events\n - `packages/coding-agent/src/session/streaming-output.ts` — truncation, artifacts, streamed chunks\n - `docs/python-repl.md` — Python kernel/gateway internals\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `input` | `string` | Yes | Cell program text. Parsed by `parseEvalInput()` in `packages/coding-agent/src/eval/parse.ts`, not by JSON subfields. |\n\n`input` syntax accepted at runtime:\n\n- Cell header: `*** Cell <attrs...>`. Attributes are space-separated tokens with quoted titles (`\"...\"` or `'...'`).\n- Canonical tokens (advertised in the prompt):\n - `<lang>:\"<title>\"` — language + title shorthand. `lang` is `py` or `js` (lenient: also `ts`, plus the long-form aliases `python`, `javascript`, `typescript`, `ipy`, `ipython`).\n - `t:<n>[ms|s|m]` — per-cell timeout (default 30s).\n - `rst` — wipe this cell's language kernel before running.\n- Lenient additional tokens (accepted by the parser, not advertised):\n - bare language token (`py`, `js`)\n - `id:\"...\"` / `title:\"...\"` / `name:\"...\"` / `cell:\"...\"` / `file:\"...\"` / `label:\"...\"` — title aliases\n - `timeout:` / `duration:` / `time:` — `t:` aliases\n - `reset` — `rst` alias\n - `rst:true|false|1|0|yes|no|on|off` — explicit boolean form\n - a bare positional duration token (`30s`, `2m`, `500ms`)\n - any unclassified bare token folds into a positional title fragment\n- Cell body: every following line until the next `*** Cell ...`, the optional `*** End`, or `*** Abort`. `*** End` is a quirk fix for GPT-trained models that emit terminators and is not documented in the prompt.\n\nLeniencies in `packages/coding-agent/src/eval/parse.ts`:\n\n- Markers accept two or more leading `*` and flexible whitespace.\n- `*** End` is optional everywhere; the parser silently consumes trailing tokens (e.g. `*** End py`).\n- Missing terminators between adjacent cells are tolerated; the next `*** Cell` closes the prior cell, and stray non-marker lines between cells fold into the prior cell's body without crashing.\n- Bare code or a single markdown fence such as ```` ```py ```` is treated as one implicit cell.\n- If `*** Abort` appears, the in-progress cell is dropped and the result carries an abort warning. To preserve a completed cell before `*** Abort`, emit `*** End` first.\n\nThe tool also exposes a custom Lark grammar from `packages/coding-agent/src/eval/eval.lark` for constrained sampling. That grammar is stricter than the runtime parser: it requires the canonical `*** Cell <lang>:\"title\"` header form with a fixed attribute order, advertises only `py` / `js`, and pins the trailing `*** End` so GPT-trained models' natural terminator habit aligns with the constrained output.\n\n## Outputs\n\nFinal result from `EvalTool.execute()` is single-shot, but `onUpdate` streams partial text and `details` while cells run.\n\nReturned shape:\n\n- `content`: one text block containing combined cell output, or `(no text output)` / `(no output)` when only rich outputs exist.\n- `details` (`EvalToolDetails` from `packages/coding-agent/src/eval/types.ts`):\n - `cells`: per-cell code, status (`pending`/`running`/`complete`/`error`), output, duration, exit code, status events, markdown flag\n - `language`: first backend used\n - `languages`: distinct backends used, in first-use order\n - `jsonOutputs`: structured values emitted via `display(...)`\n - `images`: image payloads emitted by Python rich display or JS `display({ type: \"image\", ... })`\n - `statusEvents`: aggregated helper/tool status events\n - `notice`: backend fallback notice\n - `meta`: truncation metadata\n - `isError`: set on cell failure or cancellation\n\nRenderer behavior in `packages/coding-agent/src/tools/eval.ts`:\n\n- call preview renders parsed code cells with syntax highlighting\n- result view renders each cell separately, including status, duration, and output\n- markdown outputs are rendered with the Markdown component instead of plain text\n- `jsonOutputs` render as a tree, collapsed or expanded depending on UI state\n- timeout / fallback / truncation notices render as dim metadata lines\n- images are carried in `details.images`; generic tool UI image handling renders them outside the text block\n\nSide-channel artifacts:\n\n- `session.allocateOutputArtifact?.(\"eval\")` may allocate an `artifact://...` backing store for spilled output.\n- Truncated output metadata points at that artifact when available.\n\n## Flow\n\n1. `EvalTool.execute()` in `packages/coding-agent/src/tools/eval.ts` parses `params.input` with `parseEvalInput()`.\n2. `parseEvalInput()` normalizes newlines, collects cells, parses attributes, and assigns each cell a language from the header, language sniffing, or the default `python`.\n3. Back in `execute()`, each parsed cell is resolved to a backend with `resolveBackend()`:\n - explicit `python`/`js` requests are validated against session settings and backend availability\n - otherwise `sniffEvalLanguage()` in `packages/coding-agent/src/eval/sniff.ts` tries shebangs and language markers\n - if no explicit language was present, later cells prefer the previous runtime language before re-sniffing\n - Python is preferred when available; JS is the fallback when Python is unavailable or disabled\n4. The tool allocates an `OutputSink`, a `TailBuffer`, per-cell result objects, and a `sessionAbortController`. `session.trackEvalExecution?.(...)` can wrap the whole run for external cancellation tracking.\n5. Cells execute sequentially. For each cell, `execute()`:\n - clamps the cell timeout through `clampTimeout(\"eval\", ...)`\n - builds a combined abort signal from the tool signal, the timeout, and the session abort controller\n - marks the cell `running` and emits an update\n - calls the backend’s `execute()` with `cwd`, `sessionId`, `sessionFile`, `kernelOwnerId`, `deadlineMs`, `reset`, artifact info, and chunk callback\n6. JS cells dispatch through `packages/coding-agent/src/eval/js/index.ts` into `executeJs()`; Python cells dispatch through `packages/coding-agent/src/eval/py/index.ts` into `executePython()`.\n7. Backend text chunks stream into the shared `OutputSink`; rich outputs are accumulated separately as JSON, images, markdown markers, and status events.\n8. After each cell:\n - text output is trimmed and stored on that cell result\n - multi-cell runs prefix text with `[i/n]` and the optional title\n - cancellations return early with `isError: true` and a cell-specific abort message\n - non-zero exit codes return early with `isError: true` and a message naming the failed cell\n - later cells are skipped after the first error, but earlier cell state persists in the underlying runtime\n9. On success, the tool joins all cell outputs, synthesizes `(no text output)` or `(no output)` when needed, and attaches truncation metadata from `summarizeFinal()`.\n10. The renderer uses `details.cells`, `details.jsonOutputs`, and `details.statusEvents` to build notebook-style output. `mergeCallAndResult = true` and `inline = true`, so call and result render together in the transcript.\n\n## Modes / Variants\n\n### Parsing modes\n\n- Explicit multi-cell format with `*** Cell ...` headers\n- Implicit single-cell fallback for bare code or a single fenced block\n- Abort-recovery parse path when `*** Abort` is present\n\n### Backend selection\n\n- Explicit Python backend\n- Explicit JavaScript backend\n- Auto-detected backend via `sniffEvalLanguage()`\n- Fallback from requested/inferred Python to JS when Python is unavailable\n- Fallback notice when JS markers are seen but `eval.js` is disabled and Python is used instead\n\n### JavaScript runtime\n\nImplemented in `packages/coding-agent/src/eval/js/context-manager.ts` and `packages/coding-agent/src/eval/js/prelude.txt`.\n\n- Persistent `vm.Context` instances keyed by `js:${sessionId}` in `vmContexts`\n- `rst` calls `resetVmContext(sessionKey)` before the cell executes\n- Top-level `await` and bare `return` are supported by wrapping code in an async IIFE when `wrapCode()` sees `await` or `return`\n- Top-level static `import ... from ...` and dynamic `import(...)` calls are routed through `rewriteImports()`, which sends them via `__omp_import__` so the specifier resolves against the session cwd\n- The prelude installs globals:\n - `display`, `print`\n - `read`, `write`, `append`, `sort`, `uniq`, `counter`, `diff`, `tree`, `env`, `output`\n - `tool.<name>(args)` proxy for arbitrary session tool calls\n- JS helpers are async because they cross the VM/tool boundary\n- `display(value)` behavior:\n - plain objects/arrays become JSON outputs\n - `{ type: \"image\", data, mimeType }` becomes an image output\n - scalars become text\n- The VM exposes a restricted `process` subset plus `Buffer`, `fetch`, `Blob`, `File`, `Headers`, `Request`, `Response`, `fs`, `require`, and browser-style globals\n- Per-session VM runs are serialized with `runQueued()`\n\n### Python runtime\n\nImplemented in `packages/coding-agent/src/eval/py/executor.ts`, `packages/coding-agent/src/eval/py/kernel.ts`, and `packages/coding-agent/src/eval/py/prelude.py`. See `docs/python-repl.md` for gateway and kernel details.\n\n- Default mode is retained `session` kernels keyed by `python:${sessionId}`\n- Optional `python.kernelMode = \"per-call\"` creates a fresh kernel for each cell and shuts it down afterward\n- `rst` disposes the retained kernel for that session before the cell runs; later Python cells in the same tool call reuse the fresh kernel\n- Startup path:\n - availability check\n - create/connect kernel\n - initialize cwd / env / `sys.path`\n - execute `PYTHON_PRELUDE`\n- Python cells run inside IPython/Jupyter, so top-level `await` works; the prompt warns not to use `asyncio.run(...)`\n- The Python prelude defines synchronous helpers with the same surface as JS (except `tool.<name>` exists only in JS)\n- `display(value)` wraps dict/list/tuple values in `IPython.display.JSON`; rich display MIME bundles are preserved\n- Kernel `display_data` / `execute_result` messages map to:\n - `application/x-omp-status` → status event\n - `image/png` → image output\n - `application/json` → JSON output\n - `text/markdown` → markdown output\n - `text/plain` → text output\n - `text/html` → HTML converted to markdown with `htmlToBasicMarkdown()`\n- Interactive stdin is rejected: `input_request` sends an empty reply, marks `stdinRequested`, and the executor returns exit code `1`\n\n### Multi-language call behavior\n\nA single tool call can mix Python and JS cells. Persistence is per language runtime:\n\n- resetting Python does not touch JS state\n- resetting JS does not touch Python state\n- each backend keeps its own retained session keyed from the same session-derived ID\n\n## Side Effects\n\n- Filesystem\n - JS/Python prelude helpers can read, write, append, diff, and traverse files under the session cwd or absolute paths.\n - Output may spill to an artifact file via `OutputSink`.\n- Network\n - Python backend speaks NDJSON to a local `python3` subprocess over stdin/stdout (no network).\n - JS runtime exposes `fetch` and `tool.<name>()`; those tools may perform additional network I/O.\n- Subprocesses / native bindings\n - Python availability check runs `<python> -c ...`.\n - Python backend spawns one `python -u runner.py` subprocess per kernel; cancellation sends `SIGINT`. Details in `docs/python-repl.md`.\n- Session state\n - `session.assertEvalExecutionAllowed?.()` can block execution.\n - `session.trackEvalExecution?.(...)` can register cancellable eval work.\n - `session.getSessionFile?.()` and `session.getEvalKernelOwnerId?.()` influence kernel reuse and artifact lookup.\n - JS VM contexts persist in `vmContexts` across eval calls until reset/disposal.\n - Python retained kernels persist in `kernelSessions` until reset, eviction, idle cleanup, or owner cleanup.\n- User-visible prompts / interactive UI\n - none; stdin requests are rejected programmatically\n- Background work / cancellation\n - Python retained kernels have heartbeat and idle cleanup timers.\n - Cancellation interrupts a running Python kernel and aborts JS promise waits.\n\n## Limits & Caps\n\n- Per-cell timeout default: 30s (`DEFAULT_TIMEOUT_MS` in `packages/coding-agent/src/eval/parse.ts`; `TOOL_TIMEOUTS.eval.default` in `packages/coding-agent/src/tools/tool-timeouts.ts`)\n- Timeout clamp: 1s minimum, 600s maximum (`TOOL_TIMEOUTS.eval` in `packages/coding-agent/src/tools/tool-timeouts.ts`)\n- Transcript code/output preview: 10 lines by default (`EVAL_DEFAULT_PREVIEW_LINES` in `packages/coding-agent/src/tools/eval.ts`)\n- Output truncation window: 50KB default (`DEFAULT_MAX_BYTES` in `packages/coding-agent/src/session/streaming-output.ts`)\n- Output line cap inside truncation helpers: 3000 lines (`DEFAULT_MAX_LINES` in `packages/coding-agent/src/session/streaming-output.ts`)\n- Streaming tail buffer for live updates: `DEFAULT_MAX_BYTES * 2` = 100KB (`packages/coding-agent/src/tools/eval.ts`)\n- Python retained kernel idle timeout: 5 minutes (`IDLE_TIMEOUT_MS` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python retained kernel cap: 4 sessions (`MAX_KERNEL_SESSIONS` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python retained kernel cleanup sweep: every 30s (`CLEANUP_INTERVAL_MS` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python owner-cleanup shutdown wait: 2000ms (`OWNER_CLEANUP_KERNEL_SHUTDOWN_TIMEOUT_MS` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python heartbeat interval: 5s (`ensureKernelHeartbeat()` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python external gateway availability check timeout: 5s (`AbortSignal.timeout(5000)` in `packages/coding-agent/src/eval/py/kernel.ts`)\n- Python auto-restart budget: one restart per retained session before hard failure (`restartCount > 1` in `packages/coding-agent/src/eval/py/executor.ts`)\n\n## Errors\n\n- Parse errors from `parseEvalInput()` throw immediately, for example invalid timeout strings.\n- Missing session without proxy executor throws `ToolError(\"Eval tool requires a session when not using proxy executor\")`.\n- Disabled/unavailable backends throw `ToolError` from `resolveBackend()`:\n - `eval.py = false`\n - `eval.js = false`\n - Python kernel unavailable\n - no backend available\n- JS runtime exceptions are converted into text output plus `exitCode: 1`; cancellations return `cancelled: true` and may append `Command timed out`.\n- Python execution errors from the kernel become text output and `exitCode: 1`; later cells are skipped.\n- Python stdin requests are treated as errors with the message `Kernel requested stdin; interactive input is not supported.`\n- Cancellation is returned, not thrown, once backend execution has started. The tool formats it as a cell failure and sets `details.isError = true`.\n- If parsing encountered `*** Abort`, the final text appends `ABORT_WARNING`, explicitly telling the model that earlier cells ran and state persists.\n- If output truncates, the tool still succeeds; truncation is surfaced through `details.meta` and artifact-backed full output when available.\n\n## Notes\n\n- The runtime parser is intentionally more permissive than `packages/coding-agent/src/eval/eval.lark`; maintain both when changing syntax.\n- Cell language in `ParsedEvalCell` is not the last word: `EvalTool.execute()` may override backend selection for cells without an explicit header by inheriting the previous runtime language.\n- `tool.<name>()` exists only in JS. Python prelude helpers do not call back into the full tool registry.\n- JS helper paths reject protocol URIs (`://`) in `resolvePath()`; the JS prelude is filesystem-only unless the code calls `tool.read(...)` or another tool explicitly.\n- Python helper `output(...)` depends on `PI_SESSION_FILE`; it fails outside a session-backed run.\n- `display()` can produce text and structured outputs from the same value; the renderer prefers markdown over `text/plain` when both exist.\n- JS static imports are rewritten only at top level. Nested imports stay invalid and surface normal JS syntax/runtime errors.\n- `EvalTool` is `concurrency = \"exclusive\"`, so eval calls do not overlap within a session.\n- The tool description shown to the model is templated by backend availability (`getEvalToolDescription()`); if Python is unavailable, the prompt omits Python-specific instructions.\n",
71
- "tools/exit_plan_mode.md": "# exit_plan_mode\n\n> Submits the current plan-mode plan for user approval.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/exit-plan-mode.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/exit-plan-mode.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/plan-mode-guard.ts` — resolves canonical plan paths during plan mode\n - `packages/coding-agent/src/plan-mode/approved-plan.ts` — renames approved plan artifact after user approval\n - `packages/coding-agent/src/modes/interactive-mode.ts` — approval popup, plan preview, mode exit, tool restoration\n - `packages/coding-agent/src/plan-mode/state.ts` — plan-mode state shape\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `title` | `string` | Yes | Final plan title. `.md` is optional; the runtime normalizes to `local://<title>.md`. Allowed characters: letters, numbers, `_`, `-`. |\n\n## Outputs\n- Single-shot success result with `content[0].text = \"Plan ready for approval.\"`.\n- `details` contains:\n - `planFilePath` — current plan artifact path from plan-mode state, typically `local://PLAN.md`\n - `planExists` — whether that file existed at call time\n - `title` — normalized title without `.md`\n - `finalPlanFilePath` — normalized destination, always `local://<title>.md`\n- The actual rename and mode transition happen later in the interactive controller after the user chooses an approval action.\n\n## Flow\n1. `execute()` reads `session.getPlanModeState()` and rejects the call unless `state.enabled` is true.\n2. `normalizePlanTitle()` trims whitespace, rejects empty values, rejects `/`, `\\\\`, and `..`, appends `.md` if missing, and enforces `^[A-Za-z0-9_-]+\\.md$`.\n3. The tool computes `finalPlanFilePath = local://<normalized>.md` and resolves both source and destination through `resolvePlanPath(...)` to validate them against plan-mode path rules.\n4. It `stat`s the current plan file path; if the plan artifact does not exist it throws a `ToolError` telling the caller to write the finalized plan first.\n5. On success it returns the approval-ready payload; it does not mutate files itself.\n6. `packages/coding-agent/src/modes/controllers/event-controller.ts` watches successful `exit_plan_mode` results and forwards `details` to `InteractiveMode.handleExitPlanModeTool(...)`.\n7. The interactive controller aborts the agent, renders the current plan, and shows four choices: `Approve and execute`, `Approve and keep context`, `Refine plan`, `Stay in plan mode`.\n8. If the user approves, `#approvePlan(...)` renames `local://PLAN.md` to `local://<title>.md`, exits plan mode, restores the previous tool set, optionally clears session context, writes the approved plan into the new local root when context is reset, and injects a synthetic system prompt instructing execution from the finalized artifact.\n\n## Side Effects\n- Filesystem\n - Tool itself only `stat`s the current plan file.\n - Approval path later renames the plan artifact via `fs.rename(...)` and may rewrite the approved plan into a fresh local root with `Bun.write(...)`.\n- Session state\n - Requires active plan-mode state.\n - Approval flow aborts the current agent loop, exits plan mode, restores previous active tools, clears or preserves context depending on the user choice, and records the approved plan reference path.\n- User-visible prompts / interactive UI\n - Successful calls trigger a plan preview and an approval/refinement selector in interactive mode.\n- Background work / cancellation\n - The controller aborts the running agent before showing the popup to prevent repeated `exit_plan_mode` calls.\n\n## Limits & Caps\n- `title` accepts only `[A-Za-z0-9_-]` plus optional `.md` (`packages/coding-agent/src/tools/exit-plan-mode.ts`).\n- Destination must be under the `local:` scheme; approval rename rejects non-`local:` source or destination paths (`packages/coding-agent/src/plan-mode/approved-plan.ts`).\n- In plan mode, only the plan file may be edited; other writes are blocked by `enforcePlanModeWrite(...)` in `packages/coding-agent/src/tools/plan-mode-guard.ts`.\n\n## Errors\n- Plan mode inactive: throws `ToolError(\"Plan mode is not active.\")`.\n- Empty title: throws `ToolError(\"Title is required and must not be empty.\")`.\n- Path traversal / separators: throws `ToolError(\"Title must not contain path separators or '..'.\")`.\n- Invalid characters: throws `ToolError(\"Title may only contain letters, numbers, underscores, or hyphens.\")`.\n- Missing plan artifact: throws `ToolError(\"Plan file not found at ... Write the finalized plan ... before calling exit_plan_mode.\")`.\n- Approval-time failures surface in the UI from `InteractiveMode.handleExitPlanModeTool(...)`, including destination already exists and rename failures from `renameApprovedPlanFile(...)`.\n\n## Notes\n- This tool is hidden/internal: it is injected when `plan.enabled` is on and is not part of normal discoverable built-ins (`packages/coding-agent/src/tools/index.ts`, `packages/coding-agent/src/session/agent-session.ts`).\n- The tool returning success does not mean plan mode has ended; it only means the request was handed off to the approval UI.\n- `resolvePlanPath(...)` special-cases bare filenames matching the plan basename so `PLAN.md` maps back to the canonical session-scoped `local://PLAN.md` artifact.\n- `Approve and keep context` skips the full conversation reset; `Approve and execute` clears context, then copies the approved plan into the new session-local artifact root before execution resumes.\n",
70
+ "tools/eval.md": "# eval\n\n> Execute Python or JavaScript code in persistent cell-based runtimes.\n\n> **Notice:** Do not shell out to `python -c`/`python -e`, `bun -e`, or `node -e` via the `bash` tool for ad-hoc code execution. Use this tool instead — it gives you persistent state across cells, structured `display()` output, image/JSON capture, and proper cancellation/timeout handling that one-shot `-e`/`-c` invocations cannot provide.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/eval.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/eval.md`\n- Key collaborators:\n - `packages/coding-agent/src/eval/parse.ts` — lenient cell parser\n - `packages/coding-agent/src/eval/sniff.ts` — language sniffing heuristics\n - `packages/coding-agent/src/eval/backend.ts` — backend execution contract\n - `packages/coding-agent/src/eval/js/index.ts` — JS backend adapter\n - `packages/coding-agent/src/eval/js/executor.ts` — JS execution + output sink\n - `packages/coding-agent/src/eval/js/context-manager.ts` — persistent VM contexts, prelude, tool bridge\n - `packages/coding-agent/src/eval/js/prelude.txt` — JS global helpers\n - `packages/coding-agent/src/eval/py/index.ts` — Python backend adapter\n - `packages/coding-agent/src/eval/py/executor.ts` — kernel session retention, reset, cleanup\n - `packages/coding-agent/src/eval/py/kernel.ts` — Jupyter gateway/kernel protocol, display capture\n - `packages/coding-agent/src/eval/py/prelude.py` — Python helper functions and status events\n - `packages/coding-agent/src/session/streaming-output.ts` — truncation, artifacts, streamed chunks\n - `docs/python-repl.md` — Python kernel/gateway internals\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `input` | `string` | Yes | Cell program text. Parsed by `parseEvalInput()` in `packages/coding-agent/src/eval/parse.ts`, not by JSON subfields. |\n\n`input` syntax accepted at runtime:\n\n- Cell header: `*** Cell <attrs...>`. Attributes are space-separated tokens with quoted titles (`\"...\"` or `'...'`).\n- Canonical tokens (advertised in the prompt):\n - `<lang>:\"<title>\"` — language + title shorthand. `lang` is `py` or `js` (lenient: also `ts`, plus the long-form aliases `python`, `javascript`, `typescript`, `ipy`, `ipython`).\n - `t:<n>[ms|s|m]` — per-cell timeout (default 30s).\n - `rst` — wipe this cell's language kernel before running.\n- Lenient additional tokens (accepted by the parser, not advertised):\n - bare language token (`py`, `js`)\n - `id:\"...\"` / `title:\"...\"` / `name:\"...\"` / `cell:\"...\"` / `file:\"...\"` / `label:\"...\"` — title aliases\n - `timeout:` / `duration:` / `time:` — `t:` aliases\n - `reset` — `rst` alias\n - `rst:true|false|1|0|yes|no|on|off` — explicit boolean form\n - a bare positional duration token (`30s`, `2m`, `500ms`)\n - any unclassified bare token folds into a positional title fragment\n- Cell body: every following line until the next `*** Cell ...`, the optional `*** End`, or `*** Abort`. `*** End` is a quirk fix for GPT-trained models that emit terminators and is not documented in the prompt.\n\nLeniencies in `packages/coding-agent/src/eval/parse.ts`:\n\n- Markers accept two or more leading `*` and flexible whitespace.\n- `*** End` is optional everywhere; the parser silently consumes trailing tokens (e.g. `*** End py`).\n- Missing terminators between adjacent cells are tolerated; the next `*** Cell` closes the prior cell, and stray non-marker lines between cells fold into the prior cell's body without crashing.\n- Bare code or a single markdown fence such as ```` ```py ```` is treated as one implicit cell.\n- If `*** Abort` appears, the in-progress cell is dropped and the result carries an abort warning. To preserve a completed cell before `*** Abort`, emit `*** End` first.\n\nThe tool also exposes a custom Lark grammar from `packages/coding-agent/src/eval/eval.lark` for constrained sampling. That grammar is stricter than the runtime parser: it requires the canonical `*** Cell <lang>:\"title\"` header form with a fixed attribute order, advertises only `py` / `js`, and pins the trailing `*** End` so GPT-trained models' natural terminator habit aligns with the constrained output.\n\n## Outputs\n\nFinal result from `EvalTool.execute()` is single-shot, but `onUpdate` streams partial text and `details` while cells run.\n\nReturned shape:\n\n- `content`: one text block containing combined cell output, or `(no text output)` / `(no output)` when only rich outputs exist.\n- `details` (`EvalToolDetails` from `packages/coding-agent/src/eval/types.ts`):\n - `cells`: per-cell code, status (`pending`/`running`/`complete`/`error`), output, duration, exit code, status events, markdown flag\n - `language`: first backend used\n - `languages`: distinct backends used, in first-use order\n - `jsonOutputs`: structured values emitted via `display(...)`\n - `images`: image payloads emitted by Python rich display or JS `display({ type: \"image\", ... })`\n - `statusEvents`: aggregated helper/tool status events\n - `notice`: backend fallback notice\n - `meta`: truncation metadata\n - `isError`: set on cell failure or cancellation\n\nRenderer behavior in `packages/coding-agent/src/tools/eval.ts`:\n\n- call preview renders parsed code cells with syntax highlighting\n- result view renders each cell separately, including status, duration, and output\n- markdown outputs are rendered with the Markdown component instead of plain text\n- `jsonOutputs` render as a tree, collapsed or expanded depending on UI state\n- timeout / fallback / truncation notices render as dim metadata lines\n- images are carried in `details.images`; generic tool UI image handling renders them outside the text block\n\nSide-channel artifacts:\n\n- `session.allocateOutputArtifact?.(\"eval\")` may allocate an `artifact://...` backing store for spilled output.\n- Truncated output metadata points at that artifact when available.\n\n## Flow\n\n1. `EvalTool.execute()` in `packages/coding-agent/src/tools/eval.ts` parses `params.input` with `parseEvalInput()`.\n2. `parseEvalInput()` normalizes newlines, collects cells, parses attributes, and assigns each cell a language from the header, language sniffing, or the default `python`.\n3. Back in `execute()`, each parsed cell is resolved to a backend with `resolveBackend()`:\n - explicit `python`/`js` requests are validated against session settings and backend availability\n - otherwise `sniffEvalLanguage()` in `packages/coding-agent/src/eval/sniff.ts` tries shebangs and language markers\n - if no explicit language was present, later cells prefer the previous runtime language before re-sniffing\n - Python is preferred when available; JS is the fallback when Python is unavailable or disabled\n4. The tool allocates an `OutputSink`, a `TailBuffer`, per-cell result objects, and a `sessionAbortController`. `session.trackEvalExecution?.(...)` can wrap the whole run for external cancellation tracking.\n5. Cells execute sequentially. For each cell, `execute()`:\n - clamps the cell timeout through `clampTimeout(\"eval\", ...)`\n - builds a combined abort signal from the tool signal, the timeout, and the session abort controller\n - marks the cell `running` and emits an update\n - calls the backend’s `execute()` with `cwd`, `sessionId`, `sessionFile`, `kernelOwnerId`, `deadlineMs`, `reset`, artifact info, and chunk callback\n6. JS cells dispatch through `packages/coding-agent/src/eval/js/index.ts` into `executeJs()`; Python cells dispatch through `packages/coding-agent/src/eval/py/index.ts` into `executePython()`.\n7. Backend text chunks stream into the shared `OutputSink`; rich outputs are accumulated separately as JSON, images, markdown markers, and status events.\n8. After each cell:\n - text output is trimmed and stored on that cell result\n - multi-cell runs prefix text with `[i/n]` and the optional title\n - cancellations return early with `isError: true` and a cell-specific abort message\n - non-zero exit codes return early with `isError: true` and a message naming the failed cell\n - later cells are skipped after the first error, but earlier cell state persists in the underlying runtime\n9. On success, the tool joins all cell outputs, synthesizes `(no text output)` or `(no output)` when needed, and attaches truncation metadata from `summarizeFinal()`.\n10. The renderer uses `details.cells`, `details.jsonOutputs`, and `details.statusEvents` to build notebook-style output. `mergeCallAndResult = true` and `inline = true`, so call and result render together in the transcript.\n\n## Modes / Variants\n\n### Parsing modes\n\n- Explicit multi-cell format with `*** Cell ...` headers\n- Implicit single-cell fallback for bare code or a single fenced block\n- Abort-recovery parse path when `*** Abort` is present\n\n### Backend selection\n\n- Explicit Python backend\n- Explicit JavaScript backend\n- Auto-detected backend via `sniffEvalLanguage()`\n- Fallback from requested/inferred Python to JS when Python is unavailable\n- Fallback notice when JS markers are seen but `eval.js` is disabled and Python is used instead\n\n### JavaScript runtime\n\nImplemented in `packages/coding-agent/src/eval/js/context-manager.ts` and `packages/coding-agent/src/eval/js/prelude.txt`.\n\n- Persistent `vm.Context` instances keyed by `js:${sessionId}` in `vmContexts`\n- `rst` calls `resetVmContext(sessionKey)` before the cell executes\n- Top-level `await` and bare `return` are supported by wrapping code in an async IIFE when `wrapCode()` sees `await` or `return`\n- Top-level static `import ... from ...` and dynamic `import(...)` calls are routed through `rewriteImports()`, which sends them via `__omp_import__` so the specifier resolves against the session cwd\n- The prelude installs globals:\n - `display`, `print`\n - `read`, `write`, `append`, `sort`, `uniq`, `counter`, `diff`, `tree`, `env`, `output`\n - `tool.<name>(args)` proxy for arbitrary session tool calls\n- JS helpers are async because they cross the VM/tool boundary\n- `display(value)` behavior:\n - plain objects/arrays become JSON outputs\n - `{ type: \"image\", data, mimeType }` becomes an image output\n - scalars become text\n- The VM exposes a restricted `process` subset plus `Buffer`, `fetch`, `Blob`, `File`, `Headers`, `Request`, `Response`, `fs`, `require`, and browser-style globals\n- Per-session VM runs are serialized with `runQueued()`\n\n### Python runtime\n\nImplemented in `packages/coding-agent/src/eval/py/executor.ts`, `packages/coding-agent/src/eval/py/kernel.ts`, and `packages/coding-agent/src/eval/py/prelude.py`. See `docs/python-repl.md` for gateway and kernel details.\n\n- Default mode is retained `session` kernels keyed by `python:${sessionId}`\n- Optional `python.kernelMode = \"per-call\"` creates a fresh kernel for each cell and shuts it down afterward\n- `rst` disposes the retained kernel for that session before the cell runs; later Python cells in the same tool call reuse the fresh kernel\n- Startup path:\n - availability check\n - create/connect kernel\n - initialize cwd / env / `sys.path`\n - execute `PYTHON_PRELUDE`\n- Python cells run inside IPython/Jupyter, so top-level `await` works; the prompt warns not to use `asyncio.run(...)`\n- The Python prelude defines synchronous helpers with the same surface as JS (except `tool.<name>` exists only in JS)\n- `display(value)` wraps dict/list/tuple values in `IPython.display.JSON`; rich display MIME bundles are preserved\n- Kernel `display_data` / `execute_result` messages map to:\n - `application/x-omp-status` → status event\n - `image/png` → image output\n - `application/json` → JSON output\n - `text/markdown` → markdown output\n - `text/plain` → text output\n - `text/html` → HTML converted to markdown with `htmlToBasicMarkdown()`\n- Interactive stdin is rejected: `input_request` sends an empty reply, marks `stdinRequested`, and the executor returns exit code `1`\n\n### Multi-language call behavior\n\nA single tool call can mix Python and JS cells. Persistence is per language runtime:\n\n- resetting Python does not touch JS state\n- resetting JS does not touch Python state\n- each backend keeps its own retained session keyed from the same session-derived ID\n\n## Side Effects\n\n- Filesystem\n - JS/Python prelude helpers can read, write, append, diff, and traverse files under the session cwd or absolute paths.\n - Output may spill to an artifact file via `OutputSink`.\n- Network\n - Python backend speaks NDJSON to a local `python3` subprocess over stdin/stdout (no network).\n - JS runtime exposes `fetch` and `tool.<name>()`; those tools may perform additional network I/O.\n- Subprocesses / native bindings\n - Python availability check runs `<python> -c ...`.\n - Python backend spawns one `python -u runner.py` subprocess per kernel; cancellation sends `SIGINT`. Details in `docs/python-repl.md`.\n- Session state\n - `session.assertEvalExecutionAllowed?.()` can block execution.\n - `session.trackEvalExecution?.(...)` can register cancellable eval work.\n - `session.getSessionFile?.()` and `session.getEvalKernelOwnerId?.()` influence kernel reuse and artifact lookup.\n - JS VM contexts persist in `vmContexts` across eval calls until reset/disposal.\n - Python retained kernels persist in `kernelSessions` until reset, eviction, idle cleanup, or owner cleanup.\n- User-visible prompts / interactive UI\n - none; stdin requests are rejected programmatically\n- Background work / cancellation\n - Python retained kernels have heartbeat and idle cleanup timers.\n - Cancellation interrupts a running Python kernel and aborts JS promise waits.\n\n## Limits & Caps\n\n- Per-cell timeout default: 30s (`DEFAULT_TIMEOUT_MS` in `packages/coding-agent/src/eval/parse.ts`; `TOOL_TIMEOUTS.eval.default` in `packages/coding-agent/src/tools/tool-timeouts.ts`)\n- Timeout clamp: 1s minimum, 600s maximum (`TOOL_TIMEOUTS.eval` in `packages/coding-agent/src/tools/tool-timeouts.ts`)\n- Transcript code/output preview: 10 lines by default (`EVAL_DEFAULT_PREVIEW_LINES` in `packages/coding-agent/src/tools/eval.ts`)\n- Output truncation window: 50KB default (`DEFAULT_MAX_BYTES` in `packages/coding-agent/src/session/streaming-output.ts`)\n- Output line cap inside truncation helpers: 3000 lines (`DEFAULT_MAX_LINES` in `packages/coding-agent/src/session/streaming-output.ts`)\n- Streaming tail buffer for live updates: `DEFAULT_MAX_BYTES * 2` = 100KB (`packages/coding-agent/src/tools/eval.ts`)\n- Python retained kernel idle timeout: 5 minutes (`IDLE_TIMEOUT_MS` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python retained kernel cap: 4 sessions (`MAX_KERNEL_SESSIONS` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python retained kernel cleanup sweep: every 30s (`CLEANUP_INTERVAL_MS` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python owner-cleanup shutdown wait: 2000ms (`OWNER_CLEANUP_KERNEL_SHUTDOWN_TIMEOUT_MS` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python heartbeat interval: 5s (`ensureKernelHeartbeat()` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python external gateway availability check timeout: 5s (`AbortSignal.timeout(5000)` in `packages/coding-agent/src/eval/py/kernel.ts`)\n- Python auto-restart budget: one restart per retained session before hard failure (`restartCount > 1` in `packages/coding-agent/src/eval/py/executor.ts`)\n\n## Errors\n\n- Parse errors from `parseEvalInput()` throw immediately, for example invalid timeout strings.\n- Missing session without proxy executor throws `ToolError(\"Eval tool requires a session when not using proxy executor\")`.\n- Disabled/unavailable backends throw `ToolError` from `resolveBackend()`:\n - `eval.py = false`\n - `eval.js = false`\n - Python kernel unavailable\n - no backend available\n- JS runtime exceptions are converted into text output plus `exitCode: 1`; cancellations return `cancelled: true` and may append `Command timed out`.\n- Python execution errors from the kernel become text output and `exitCode: 1`; later cells are skipped.\n- Python stdin requests are treated as errors with the message `Kernel requested stdin; interactive input is not supported.`\n- Cancellation is returned, not thrown, once backend execution has started. The tool formats it as a cell failure and sets `details.isError = true`.\n- If parsing encountered `*** Abort`, the final text appends `ABORT_WARNING`, explicitly telling the model that earlier cells ran and state persists.\n- If output truncates, the tool still succeeds; truncation is surfaced through `details.meta` and artifact-backed full output when available.\n\n## Notes\n\n- The runtime parser is intentionally more permissive than `packages/coding-agent/src/eval/eval.lark`; maintain both when changing syntax.\n- Cell language in `ParsedEvalCell` is not the last word: `EvalTool.execute()` may override backend selection for cells without an explicit header by inheriting the previous runtime language.\n- `tool.<name>()` exists only in JS. Python prelude helpers do not call back into the full tool registry.\n- JS helper paths reject protocol URIs (`://`) in `resolvePath()`; the JS prelude is filesystem-only unless the code calls `tool.read(...)` or another tool explicitly.\n- Python helper `output(...)` depends on `PI_SESSION_FILE`; it fails outside a session-backed run.\n- `display()` can produce text and structured outputs from the same value; the renderer prefers markdown over `text/plain` when both exist.\n- JS static imports are rewritten only at top level. Nested imports stay invalid and surface normal JS syntax/runtime errors.\n- `EvalTool` is `concurrency = \"exclusive\"`, so eval calls do not overlap within a session.\n- The tool description shown to the model is templated by backend availability (`getEvalToolDescription()`); if Python is unavailable, the prompt omits Python-specific instructions.\n",
72
71
  "tools/find.md": "# find\n\n> Find filesystem paths by glob; use `search` when you need content matches instead of path matches.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/find.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/find.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/path-utils.ts` — normalize inputs; split base path vs glob.\n - `packages/coding-agent/src/tools/list-limit.ts` — apply result-count caps.\n - `packages/coding-agent/src/session/streaming-output.ts` — truncate text output at byte cap.\n - `packages/coding-agent/src/tools/tool-result.ts` — build `content` and `details.meta`.\n - `packages/coding-agent/src/tools/output-meta.ts` — encode limit / truncation metadata.\n - `packages/coding-agent/src/tools/tool-errors.ts` — map user-facing tool errors.\n - `packages/coding-agent/src/tools/index.ts` — register the built-in local implementation.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `paths` | `string[]` | Yes | One or more globs, files, or directories. Empty strings are rejected. Multiple entries may be merged into one brace-union search when their base paths can be resolved together. |\n| `hidden` | `boolean` | No | Whether hidden files are included. Defaults to `true` (`hidden ?? true`). |\n| `limit` | `number` | No | Max returned paths. Defaults to `1000`. Must be a finite positive number; non-integers are floored. |\n\n## Outputs\nThe tool returns a single text block plus structured `details`.\n\n- Success text: newline-delimited paths, one per line, relative to the session cwd when possible; absolute when outside cwd. Exact file inputs return that file path as one line.\n- Empty result text: `No files found matching pattern`.\n- Multi-path partial miss: appends `Skipped missing paths: ...` after the result block, or after the empty-result line.\n- `details` may include:\n - `scopePath`: display form of the searched root or merged roots.\n - `fileCount`: number of paths returned after result limiting.\n - `files`: returned paths as an array.\n - `truncated`: whether result count or byte truncation occurred.\n - `resultLimitReached`: reached result limit.\n - `missingPaths`: skipped missing inputs in multi-path calls.\n - `truncation` / `meta.limits`: structured truncation and limit metadata for renderers.\n- Streaming: when the runtime supplies `onUpdate`, the local implementation emits incremental newline-delimited text snapshots during globbing, throttled to 200 ms.\n\n## Flow\n1. `FindTool.execute()` normalizes each `paths` entry with `normalizePathLikeInput()` and `/\\\\/g -> \"/\"` (`packages/coding-agent/src/tools/find.ts`). Empty normalized entries fail with `` `paths` must contain non-empty globs or paths ``.\n2. For multi-path local calls, `partitionExistingPaths(..., parseFindPattern)` (`packages/coding-agent/src/tools/path-utils.ts`) stats each base path. Missing entries are skipped; if all are missing, the tool throws `Path not found: ...`. Single missing paths still hard-fail.\n3. The tool tries `resolveExplicitFindPatterns()` to merge multiple inputs into one search rooted at a common base path. If that does not apply, it parses one input with `parseFindPattern()`.\n4. `parseFindPattern()` determines `(basePath, globPattern, hasGlob)`:\n - no glob chars (`*`, `?`, `[`, `{`) => search that path with implicit `**/*`.\n - glob in the first segment => search from `.` and, unless the pattern already starts with `**/`, prefix it with `**/`.\n - glob later in the path => split at the first glob-bearing segment.\n5. `resolveToCwd()` converts the base path to an absolute path under the session cwd. A resolved `/` is rejected with `Searching from root directory '/' is not allowed`.\n6. `limit` is defaulted to `DEFAULT_LIMIT` (`1000`) and validated as a positive finite integer. `hidden` defaults to `true`. The tool also creates a 5 s timeout via `AbortSignal.timeout(GLOB_TIMEOUT_MS)`.\n7. Execution then branches:\n - **Custom operations branch**: if `FindToolOptions.operations.glob` exists, the tool checks existence with `operations.exists()`, short-circuits exact-file inputs via `operations.stat()` when available, then calls `operations.glob(globPattern, searchPath, { ignore: [\"**/node_modules/**\", \"**/.git/**\"], limit })`.\n - **Built-in local branch**: the tool stats `searchPath`. Exact-file inputs return immediately. Directory inputs call `natives.glob()` with `fileType: File`, `hidden`, `maxResults: limit`, `sortByMtime: true`, `gitignore: true`, and the combined abort signal.\n8. In the local branch, optional `onMatch` callbacks convert each match to a cwd-relative display path and emit throttled progress updates.\n9. After native glob returns, JS sorts `result.matches` by `mtime` descending (`(b.mtime ?? 0) - (a.mtime ?? 0)`) before formatting paths.\n10. `buildResult()` applies `applyListLimit()` to cap the array again at `limit`, joins paths with `\\n`, then runs `truncateHead()` with `maxLines: Number.MAX_SAFE_INTEGER`. In practice this leaves the 50 KB byte cap in place while disabling the default 3000-line cap.\n11. `toolResult()` packages text plus `details`, and records result-limit / truncation metadata for renderers.\n\n## Modes / Variants\n- **Exact file path**: if the parsed input has no glob and the resolved path stats as a file, output is that one path.\n- **Directory path**: if the parsed input has no glob and stats as a directory, the tool searches it with implicit `**/*`.\n- **Single glob path**: one input parsed by `parseFindPattern()`.\n- **Merged multi-path search**: multiple inputs resolved by `resolveExplicitFindPatterns()` into one brace-union glob rooted at a common base path.\n- **Partial multi-path search with missing inputs**: local multi-path calls skip missing base paths and surface them as `missingPaths` / `Skipped missing paths: ...`.\n- **Custom delegated search**: uses injected `FindOperations` instead of local fs + native glob.\n\n## Side Effects\n- Filesystem\n - Stats the resolved base path, and in local multi-path mode stats every candidate base path up front.\n - Does not write files.\n- Subprocesses / native bindings\n - Built-in local mode calls the native `@oh-my-pi/pi-natives` glob implementation.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Emits structured progress updates when `onUpdate` is provided.\n - Adds truncation / limit metadata to the tool result.\n- Background work / cancellation\n - Local globbing is cancellable through the caller abort signal plus an internal 5 s timeout.\n\n## Limits & Caps\n- Default result limit: `1000` (`DEFAULT_LIMIT` in `packages/coding-agent/src/tools/find.ts`).\n- Local glob timeout: `5000` ms (`GLOB_TIMEOUT_MS` in `packages/coding-agent/src/tools/find.ts`).\n- Output byte cap: `50 * 1024` bytes (`DEFAULT_MAX_BYTES` in `packages/coding-agent/src/session/streaming-output.ts`).\n- Default generic line cap in `truncateHead()` is `3000`, but `find` overrides `maxLines` to `Number.MAX_SAFE_INTEGER`, so byte size — not line count — is the practical output truncation cap.\n- Streaming update throttle: `200` ms between `onUpdate` emissions.\n- Sort order: most recent `mtime` first in the built-in local branch and promised in the prompt. The tool re-sorts in JS even though native glob receives `sortByMtime: true` so native code can still stop early at `maxResults`.\n\n## Errors\n- User-facing `ToolError`s from `FindTool.execute()` include:\n - `` `paths` must contain non-empty globs or paths ``\n - `Path not found: ...`\n - `Searching from root directory '/' is not allowed`\n - `Limit must be a positive number`\n - `Path is not a directory: ...`\n - `find timed out after 5s`\n- If the caller aborts, the local branch converts `AbortError` into `ToolAbortError`.\n- Non-`ENOENT` stat failures and other unexpected errors are rethrown.\n- Empty matches are not errors; they return the no-files text result.\n\n## Notes\n- Reach for `find` for filename / path discovery. Reach for `search` when the selection criterion is file contents or regex matches; `search` takes a `pattern` and returns anchored content matches, while `find` only returns matching paths (`packages/coding-agent/src/prompts/tools/find.md`, `packages/coding-agent/src/prompts/tools/search.md`).\n- Bare top-level globs are made recursive. `*.ts` is parsed as base `.` plus glob `**/*.ts`; `src/*.ts` stays rooted at `src` with a non-recursive `*.ts` segment; `src/**/*.ts` preserves explicit recursion.\n- `.gitignore` is always enabled in the built-in local branch (`gitignore: true`). There is no model-facing flag to disable it.\n- `hidden` defaults to `true`; hidden-file exclusion is opt-out, not opt-in.\n- Multi-path missing-input tolerance only applies in the built-in local branch. The custom-operations branch hard-fails the first missing `searchPath` it checks.\n- The custom `FindOperations.glob()` hook receives `ignore` and `limit`, but not the `hidden` flag or an explicit `.gitignore` toggle. A remote delegate must account for that itself if it wants parity with the local branch.\n- Built-in local globbing asks the native layer for `fileType: File`, so recursive directory searches yield files, not directories. Directory outputs are only possible through exact-path passthrough or custom delegates that return them.\n",
73
- "tools/github.md": "# github\n\n> Dispatch GitHub CLI operations for repositories, issues, pull requests, search, and Actions run watching.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/gh.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/github.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/gh-format.ts` — shorten commit SHAs for summaries.\n - `packages/coding-agent/src/tools/gh-renderer.ts` — TUI rendering, especially `run_watch` live/result views.\n - `packages/coding-agent/src/utils/git.ts` — `gh`/`git` process wrappers, repo locking, branch config writes.\n - `packages/utils/src/dirs.ts` — base directory for dedicated PR worktrees.\n - `packages/coding-agent/src/sdk.ts` — session artifact allocation hook.\n - `packages/coding-agent/src/session/artifacts.ts` — artifact filename format `<id>.<toolType>.log`.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `op` | `\"repo_view\" \\| \"pr_create\" \\| \"pr_checkout\" \\| \"pr_push\" \\| \"search_issues\" \\| \"search_prs\" \\| \"search_code\" \\| \"search_commits\" \\| \"search_repos\" \\| \"run_watch\"` | Yes | Dispatch selector. `GithubTool.execute()` switches only on this field. |\n| `repo` | `string` | No | `owner/repo` override. Ignored when the identifier argument is already a full GitHub URL. Required in practice when `gh` cannot infer repo context from the current checkout. |\n| `branch` | `string` | No | Used by `repo_view`, `pr_push`, and `run_watch`. `run_watch` falls back to current git branch when `run` is omitted; `pr_push` falls back to current branch. |\n| `pr` | `string \\| string[]` | No | Used by `pr_checkout`. Each item may be a PR number, branch name, or GitHub PR URL. Array form enables batching. Omitted means current branch PR. |\n| `force` | `boolean` | No | Used only by `pr_checkout`. Defaults to `false`; allows resetting an existing `pr-<number>` local branch to the PR head commit. |\n| `forceWithLease` | `boolean` | No | Used only by `pr_push`; passed through to git push. |\n| `title` | `string` | No | Used only by `pr_create`. Required unless `fill` is `true`. |\n| `body` | `string` | No | Used only by `pr_create`. Mutually exclusive with `fill`. Empty/omitted body becomes `--body \"\"` to suppress the interactive editor. Non-empty body is written to a temp file and passed as `--body-file`. |\n| `base` | `string` | No | Used only by `pr_create`; passed as `--base`. |\n| `head` | `string` | No | Used only by `pr_create`; passed as `--head`. |\n| `draft` | `boolean` | No | Used only by `pr_create`. Defaults to `false`. |\n| `fill` | `boolean` | No | Used only by `pr_create`. Defaults to `false`. Mutually exclusive with `title` and `body`. |\n| `reviewer` | `string[]` | No | Used only by `pr_create`; each entry becomes `--reviewer`. |\n| `assignee` | `string[]` | No | Used only by `pr_create`; each entry becomes `--assignee`. |\n| `label` | `string[]` | No | Used only by `pr_create`; each entry becomes `--label`. |\n| `query` | `string` | No | Used by all `search_*` ops. Required there. |\n| `limit` | `number` | No | Used by all `search_*` ops. Defaults to `10`, floored, clamped to `50`, and must be `> 0`. |\n| `run` | `string` | No | Used only by `run_watch`. Must be a numeric run ID or full GitHub Actions run URL. |\n| `tail` | `number` | No | Used only by `run_watch`. Defaults to `15`, floored, clamped to `200`, and must be `> 0`. |\n\n## Outputs\nThe tool returns a single text result built by `buildTextResult()` in `packages/coding-agent/src/tools/gh.ts`.\n\n- `content`: one text block. Multi-item ops join sections with blank lines and `---` separators.\n- `sourceUrl`: set for single repo/PR/run results when a canonical URL is known.\n- `details`: optional structured metadata used by the TUI renderer.\n - Common fields: `artifactId`, `repo`, `branch`, `worktreePath`, `remote`, `remoteBranch`, `headSha`, `runId`, `runIds`, `status`, `conclusion`, `failedJobs`.\n - `pr_checkout` adds `checkouts: GhPrCheckoutSummary[]`.\n - `run_watch` adds `watch: GhRunWatchViewDetails`, which drives the custom live/result renderer in `packages/coding-agent/src/tools/gh-renderer.ts`.\n- Artifact trailer: when `artifactId` is present, the text body gets an appended line like `Full failed-job logs: artifact://<id>`.\n - `run_watch` allocates artifacts with `session.allocateOutputArtifact(\"github\")`; persistent sessions therefore save failed-log bodies as `<artifact-dir>/<id>.github.log`.\n\n`run_watch` is the only streaming op. It emits `onUpdate` snapshots while polling, then returns one final text result.\n\n## Flow\n1. `GithubTool.createIf()` exposes the tool only when `git.github.available()` finds `gh` on `PATH`.\n2. `GithubTool.execute()` wraps dispatch in `untilAborted()` and switches on `params.op`.\n3. Each op normalizes optional strings, arrays, booleans, and numeric caps locally in `packages/coding-agent/src/tools/gh.ts`.\n4. CLI execution goes through `git.github.run/json/text()` in `packages/coding-agent/src/utils/git.ts`:\n - spawns `gh ...` with `Bun.spawn()`;\n - trims stdout/stderr unless `trimOutput: false`;\n - maps common auth/repo-context failures into tool-facing `ToolError` messages;\n - `json()` rejects empty or invalid JSON.\n5. Read-style ops (`repo_view`, `search_*`) fetch JSON and format Markdown-like text summaries. Single-issue and single-PR views were moved out of the tool and now resolve through the `issue://` / `pr://` internal URL schemes, which share the same SQLite cache.\n7. PR diffs moved out of the tool. `pr://<N>/diff` lists changed files, `pr://<N>/diff/<i>` slices a single file, and `pr://<N>/diff/all` returns the full unified diff — see `docs/tools/read.md`. All three variants share one `gh pr diff` invocation through the `pr-diff` cache row.\n8. `pr_checkout` resolves PR metadata first, then enters `git.withRepoLock()` before any git mutation so parallel checkout calls for the same primary repo do not race on shared `.git` state.\n9. `pr_push` reads PR head metadata back from git branch config, derives a refspec, then pushes with `git.push()`.\n10. `pr_create` shells out once, then best-effort re-reads the created PR for a richer summary.\n11. `run_watch` chooses either run mode (`run` supplied) or commit mode (`run` omitted), polls GitHub Actions APIs every 3 seconds, emits streaming updates, and may save a full failed-log artifact before returning.\n12. Final text goes through `toolResult().text(...)`; if `session.allocateOutputArtifact()` returns a slot, failed-log text is persisted with `Bun.write()`.\n\n## Modes / Variants\n\n### `repo_view`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` |\n| Optional fields | `repo`, `branch` |\n| `gh` command | `gh repo view [<repo>] [--branch <branch>] --json <GH_REPO_FIELDS>` |\n| Batching | None |\n| Output | `# <owner/repo>` header, description, URL, default branch, requested branch, visibility, permission, primary language, stars, forks, archive/fork flags, updated timestamp, homepage, topics. `sourceUrl = data.url`. |\n\nIf `repo` is omitted, `gh` repository resolution is used.\n\nSingle-issue and single-PR reads live in the `issue://<N>` / `pr://<N>` URL schemes (see `docs/tools/read.md`). They share `~/.omp/cache/github-cache.db` (override via `OMP_GITHUB_CACHE_DB`) and the `github.cache.softTtlSec` / `github.cache.hardTtlSec` / `github.cache.enabled` settings. The cache retains rendered Markdown plus the raw JSON payload returned by `gh`, including private bodies, comments, reviews, and review comments when comments are enabled; rows are scoped by the local GitHub credential fingerprint. Root and repo-scoped reads (`issue://`, `pr://owner/repo`) issue a live `gh issue list` / `gh pr list` for browsing; query params `state`, `limit`, `author`, `label` pass through to `gh` (`issue://` accepts `state=open|closed|all`; `pr://` also accepts `merged`). PR diffs ride the same cache under `pr://<N>/diff[/…]`: the listing, full diff, and per-file slices all share one `pr-diff` row keyed by repo and PR number.\n\n### `pr_create`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` plus either `fill=true` or `title` |\n| Optional fields | `repo`, `title`, `body`, `base`, `head`, `draft`, `fill`, `reviewer[]`, `assignee[]`, `label[]` |\n| `gh` command | `gh pr create ...` with flags assembled from provided fields |\n| Batching | None |\n| Output | `# Created Pull Request ...` summary with URL, state, draft flag, base/head, author, created time, labels, optional body. `sourceUrl` is the created PR URL. |\n\nBranches:\n- `fill && (title || body !== undefined)` throws.\n- Non-empty `body` is written under a temp dir `gh-pr-body-*` in `os.tmpdir()`, passed as `--body-file`, then removed in `finally`.\n- After creation, the tool parses the returned URL and best-effort runs `gh pr view <number> --repo <repo> --json <GH_PR_FIELDS_NO_COMMENTS>`; failures there are swallowed.\n\n### `pr_checkout`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` |\n| Optional fields | `repo`, `pr`, `force` |\n| `gh` command | For each requested PR: `gh pr view [<pr>] [--repo <repo>] --json <GH_PR_CHECKOUT_FIELDS>`; cross-repo PRs may also call `gh repo view <headRepository> --json <GH_REPO_CLONE_FIELDS>`. |\n| Batching | Yes. `pr` may be `string[]`; each PR is resolved in parallel, but git mutations are serialized per primary repo by `git.withRepoLock()`. |\n| Output | Single PR: checkout/worktree summary plus `details.repo`, `details.branch`, `details.worktreePath`, `details.remote`, `details.remoteBranch`, `details.checkouts`. Batched: `# <n> Pull Request Worktrees (...)` plus one section per PR and aggregated `details.checkouts`. |\n\nWorktree and metadata behavior:\n- Local branch name is always `pr-<number>`.\n- Worktree path is `path.join(getWorktreesDir(), encodeRepoPathForFilesystem(primaryRepoRoot), localBranch)`, where `getWorktreesDir()` is `~/.omp/wt`; effective path is `~/.omp/wt/<encoded-primary-repo-root>/pr-<number>`.\n- Existing worktree detection is by branch ref `refs/heads/pr-<number>` from `git.worktree.list()`.\n- New worktree creation calls `git.worktree.add(repoRoot, finalWorktreePath, localBranch, { signal })` after verifying the path is neither already registered nor already present on disk.\n- For same-repo PRs, remote is `origin`. For cross-repo PRs, the tool resolves a clone URL for the head repo, reuses an existing remote with the same URL when possible, or creates `fork-<owner>` / `fork-<owner>-<n>`.\n- The branch push metadata is persisted with `git config` under the repository's shared `.git/config` as:\n - `branch.pr-<number>.remote`\n - `branch.pr-<number>.merge`\n - `branch.pr-<number>.pushRemote`\n - `branch.pr-<number>.ompPrHeadRef`\n - `branch.pr-<number>.ompPrUrl`\n - `branch.pr-<number>.ompPrIsCrossRepository`\n - `branch.pr-<number>.ompPrMaintainerCanModify`\n- If `refs/heads/pr-<number>` already exists at a different commit, checkout fails unless `force=true`, in which case `git branch --force` resets it to the fetched PR head.\n- If a matching worktree already exists, the tool reuses it and reports `reused: true`.\n\n### `pr_push`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` |\n| Optional fields | `branch`, `forceWithLease` |\n| `gh` command | None. This path uses git, not `gh`. |\n| Batching | None |\n| Output | `# Pushed Pull Request Branch` summary with local branch, remote, remote branch, remote URL, PR URL, and force-with-lease flag. `sourceUrl = prUrl` when known. |\n\nPush target resolution reads the `branch.<name>.ompPrHeadRef`, `pushRemote`/`remote`, `ompPrUrl`, `ompPrMaintainerCanModify`, and `ompPrIsCrossRepository` git-config keys written by `pr_checkout`. If the current checked-out branch matches the target branch, the source ref is `HEAD`; otherwise it pushes `refs/heads/<branch>`. The refspec is `HEAD:refs/heads/<headRef>` or `refs/heads/<branch>:refs/heads/<headRef>`.\n\n### `search_issues`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op`, `query` |\n| Optional fields | `repo`, `limit` |\n| `gh` command | `gh api -X GET /search/issues -f q=\"<query> [repo:<repo>] is:issue\" -F per_page=<limit>` |\n| Batching | None |\n| Output | `# GitHub issues search`, echoed query, optional repo, result count, then one bullet per issue with repo/state/author/labels/timestamps/URL. |\n\n### `search_prs`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op`, `query` |\n| Optional fields | `repo`, `limit` |\n| `gh` command | `gh api -X GET /search/issues -f q=\"<query> [repo:<repo>] is:pr\" -F per_page=<limit>` |\n| Batching | None |\n| Output | Same shape as `search_issues`, labeled as pull requests. |\n\n### `search_code`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op`, `query` |\n| Optional fields | `repo`, `limit` |\n| `gh` command | `gh api -X GET /search/code -f q=\"<query> [repo:<repo>]\" -F per_page=<limit> -H \"Accept: application/vnd.github.text-match+json\"` |\n| Batching | None |\n| Output | `# GitHub code search`, result count, then one bullet per match with path, repo, short commit SHA, URL, and first normalized text-match fragment line when present. |\n\n### `search_commits`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op`, `query` |\n| Optional fields | `repo`, `limit` |\n| `gh` command | `gh api -X GET /search/commits -f q=\"<query> [repo:<repo>]\" -F per_page=<limit>` |\n| Batching | None |\n| Output | `# GitHub commits search`, result count, then one bullet per commit: short SHA + first commit-message line, repo, author, date, URL. |\n\n### `search_repos`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op`, `query` |\n| Optional fields | `limit` |\n| `gh` command | `gh api -X GET /search/repositories -f q=\"<query>\" -F per_page=<limit>` |\n| Batching | None |\n| Output | `# GitHub repositories search`, result count, then one bullet per repo with first description line, language, stars, forks, open issues, visibility, archive/fork flags, updated time, URL. |\n\n`repo` is intentionally not used for this op.\n\n### `run_watch`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` |\n| Optional fields | `repo`, `branch`, `run`, `tail` |\n| `gh` command | Repo resolution: `gh repo view --json nameWithOwner -q .nameWithOwner` when `repo` and run URL repo are both absent. Single-run mode uses `gh api --method GET /repos/<repo>/actions/runs/<runId>` and `gh api --method GET /repos/<repo>/actions/runs/<runId>/jobs`. Commit mode uses `gh api --method GET /repos/<repo>/branches/<branch>`, `gh api --method GET /repos/<repo>/actions/runs`, `gh api --method GET /repos/<repo>/actions/runs/<runId>/jobs`, and `gh api /repos/<repo>/actions/jobs/<jobId>/logs` for failed jobs. |\n| Batching | Implicit batching only in commit mode: all workflow runs for one commit are tracked together. |\n| Output | Streaming watch snapshots via `onUpdate`, then a final text report. On failure, appends `Full failed-job logs: artifact://<id>` and sets `details.artifactId`. |\n\nWatch flow:\n- `run` parsing accepts either a decimal run ID or a full run URL. URL repo must match explicit `repo` when both are given.\n- Poll interval is fixed at 3 seconds (`RUN_WATCH_INTERVAL_DEFAULT`).\n- Failure grace period is fixed at 5 seconds (`RUN_WATCH_GRACE_DEFAULT`). When any failed job appears before completion, the tool emits a note, waits once, re-fetches state, then collects logs so concurrent failures are included.\n- Failed-job logs are fetched with `gh api /repos/<repo>/actions/jobs/<jobId>/logs` via `git.github.run()`, not `json()`. Non-zero exit leaves `available: false` instead of failing the whole watch.\n- Inline result includes only the last `tail` lines per failed job. The saved artifact contains full logs (`mode: \"full\"`).\n- In commit mode, success is intentionally double-checked: once all known runs are successful, the tool waits one more poll interval and succeeds only if the set of run IDs is unchanged. This avoids returning before late workflow runs appear for the same commit.\n- `details.watch` drives a specialized renderer in `packages/coding-agent/src/tools/gh-renderer.ts`; non-watch results fall back to generic text rendering.\n\n## Side Effects\n- Filesystem\n - `pr_create` may create a temp dir under `os.tmpdir()` named `gh-pr-body-*`, write `body.md`, then remove the dir in `finally`.\n - `pr_checkout` may create directories under `~/.omp/wt/<encoded-primary-repo-root>/` and add git worktrees there.\n - `run_watch` may write a session artifact with full failed-job logs.\n- Network\n - Every op shells out to `gh`, which then talks to GitHub APIs except `pr_push`.\n - `pr_push` uses git network transport to the configured remote.\n- Subprocesses / native bindings\n - All `gh` calls use `Bun.spawn([\"gh\", ...args])`.\n - `pr_checkout` and `pr_push` also invoke git helpers from `packages/coding-agent/src/utils/git.ts`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - `run_watch` consumes `session.allocateOutputArtifact()` when failed-job logs are persisted.\n - Returned `details` objects carry run/checkouts metadata for the renderer/UI.\n- User-visible prompts / interactive UI\n - `gh` interactive editor fallback is suppressed for `pr_create` by forcing either `--body-file` or `--body \"\"`.\n - `gh-renderer` provides compact headers for all ops and a custom live watch view for `run_watch`.\n- Background work / cancellation\n - `run_watch` loops until success/failure and uses `abortableSleep()` between polls.\n - `GithubTool.execute()` is wrapped in `untilAborted()`; `git.github.run()` forwards the abort signal into `Bun.spawn()`.\n\n## Limits & Caps\n- Search result default: `10` (`SEARCH_LIMIT_DEFAULT` in `packages/coding-agent/src/tools/gh.ts`).\n- Search result max: `50` (`SEARCH_LIMIT_MAX`).\n- PR file preview inside the `pr://` view: first `50` files only (`FILE_PREVIEW_LIMIT` in `gh.ts`).\n- Run-watch poll interval: `3s` (`RUN_WATCH_INTERVAL_DEFAULT`).\n- Run-watch failure grace period: `5s` (`RUN_WATCH_GRACE_DEFAULT`).\n- Run-watch failed-log tail default: `15` lines (`RUN_WATCH_TAIL_DEFAULT`).\n- Run-watch failed-log tail max: `200` lines (`RUN_WATCH_TAIL_MAX`).\n- PR review comments page size: `100` (`REVIEW_COMMENTS_PAGE_SIZE`).\n- Actions jobs page size: `100` (`RUN_JOBS_PAGE_SIZE`).\n- Search and tail numeric inputs are floored with `Math.floor()`, clamped to the max, and rejected when non-finite or `<= 0`.\n- `pr_checkout` batch fan-out is unbounded in tool code; all requested PRs are launched with `Promise.all()`.\n\n## Errors\n- Tool creation is skipped entirely when `gh` is not installed.\n- `git.github.run()` throws `ToolError(\"GitHub CLI (gh) is not installed...\")` if `gh` is missing at execution time.\n- `git.github.text/json()` map common failures to model-facing messages:\n - not authenticated → `GitHub CLI is not authenticated. Run \\`gh auth login\\`.`\n - missing repo context without explicit `repo` → `GitHub repository context is unavailable. Pass \\`repo\\` explicitly or run the tool inside a GitHub checkout.`\n - otherwise stderr/stdout text, or fallback `GitHub CLI command failed: gh ...`\n- `json()` also throws on empty stdout or invalid JSON.\n- Local validation errors throw `ToolError`, including:\n - missing required per-op fields (`query`, `title unless fill=true`)\n - invalid numeric `limit` / `tail`\n - invalid `run` format\n - `fill` combined with `title` or `body`\n - missing git repo / branch / HEAD context for checkout, push, or watch\n - `pr_push` on a branch without `ompPrHeadRef` metadata\n - conflicting existing worktree path or branch without `force`\n- `run_watch` treats failed-job log fetches specially: missing log content does not fail the watch; it marks that log `available: false` and prints `Log tail unavailable.` / `Full log unavailable.`.\n- `pr_create` swallows only the post-create best-effort `gh pr view` refresh; the create step itself still fails normally.\n\n## Notes\n- `appendRepoFlag()` intentionally skips `--repo` when the identifier argument is already a full GitHub URL; that lets `gh` derive repo/number from the URL.\n- `normalizePrIdentifierList()` accepts `reviewer`, `assignee`, and `label` arrays too; the helper name is broader than its callers.\n- `pr_push` depends on `pr_checkout` having run first for that local branch; there is no alternate metadata source.\n- `pr_checkout` stores push metadata in branch config, not in the worktree directory. Reusing the same `pr-<number>` branch reuses those config keys.\n- Worktree write serialization is keyed by the primary repo root, not the current worktree path, because git worktrees share `.git/config`, `packed-refs`, commit-graph, and worktree metadata files.\n- `search_repos` is the only search op that never forwards `repo`; repository scoping must be expressed in the query itself.\n- `run_watch` success on commit mode means “all observed runs succeeded and no additional runs appeared one poll later”, not merely “latest poll looked green”.\n- The TUI renderer collapses failed log previews unless the result view is expanded; the underlying text result still contains the same tailed lines plus any artifact reference.\n",
72
+ "tools/github.md": "# github\n\n> Dispatch GitHub CLI operations for repositories, issues, pull requests, search, and Actions run watching.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/gh.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/github.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/gh-format.ts` — shorten commit SHAs for summaries.\n - `packages/coding-agent/src/tools/gh-renderer.ts` — TUI rendering, especially `run_watch` live/result views.\n - `packages/coding-agent/src/utils/git.ts` — `gh`/`git` process wrappers, repo locking, branch config writes.\n - `packages/utils/src/dirs.ts` — base directory for dedicated PR worktrees.\n - `packages/coding-agent/src/sdk.ts` — session artifact allocation hook.\n - `packages/coding-agent/src/session/artifacts.ts` — artifact filename format `<id>.<toolType>.log`.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `op` | `\"repo_view\" \\| \"pr_create\" \\| \"pr_checkout\" \\| \"pr_push\" \\| \"search_issues\" \\| \"search_prs\" \\| \"search_code\" \\| \"search_commits\" \\| \"search_repos\" \\| \"run_watch\"` | Yes | Dispatch selector. `GithubTool.execute()` switches only on this field. |\n| `repo` | `string` | No | `owner/repo` override. Ignored when the identifier argument is already a full GitHub URL. For `search_issues`/`search_prs`/`search_code`/`search_commits`, defaults to the current checkout's `owner/repo` when omitted (skipped when the query already contains a `repo:`/`org:`/`user:`/`owner:` qualifier or when current-repo resolution fails). Required in practice when `gh` cannot infer repo context from the current checkout. |\n| `branch` | `string` | No | Used by `repo_view`, `pr_push`, and `run_watch`. `run_watch` falls back to current git branch when `run` is omitted; `pr_push` falls back to current branch. |\n| `pr` | `string \\| string[]` | No | Used by `pr_checkout`. Each item may be a PR number, branch name, or GitHub PR URL. Array form enables batching. Omitted means current branch PR. |\n| `force` | `boolean` | No | Used only by `pr_checkout`. Defaults to `false`; allows resetting an existing `pr-<number>` local branch to the PR head commit. |\n| `forceWithLease` | `boolean` | No | Used only by `pr_push`; passed through to git push. |\n| `title` | `string` | No | Used only by `pr_create`. Required unless `fill` is `true`. |\n| `body` | `string` | No | Used only by `pr_create`. Mutually exclusive with `fill`. Empty/omitted body becomes `--body \"\"` to suppress the interactive editor. Non-empty body is written to a temp file and passed as `--body-file`. |\n| `base` | `string` | No | Used only by `pr_create`; passed as `--base`. |\n| `head` | `string` | No | Used only by `pr_create`; passed as `--head`. |\n| `draft` | `boolean` | No | Used only by `pr_create`. Defaults to `false`. |\n| `fill` | `boolean` | No | Used only by `pr_create`. Defaults to `false`. Mutually exclusive with `title` and `body`. |\n| `reviewer` | `string[]` | No | Used only by `pr_create`; each entry becomes `--reviewer`. |\n| `assignee` | `string[]` | No | Used only by `pr_create`; each entry becomes `--assignee`. |\n| `label` | `string[]` | No | Used only by `pr_create`; each entry becomes `--label`. |\n| `query` | `string` | No | Used by all `search_*` ops. Required there. |\n| `limit` | `number` | No | Used by all `search_*` ops. Defaults to `10`, floored, clamped to `50`, and must be `> 0`. |\n| `run` | `string` | No | Used only by `run_watch`. Must be a numeric run ID or full GitHub Actions run URL. |\n| `tail` | `number` | No | Used only by `run_watch`. Defaults to `15`, floored, clamped to `200`, and must be `> 0`. |\n\n## Outputs\nThe tool returns a single text result built by `buildTextResult()` in `packages/coding-agent/src/tools/gh.ts`.\n\n- `content`: one text block. Multi-item ops join sections with blank lines and `---` separators.\n- `sourceUrl`: set for single repo/PR/run results when a canonical URL is known.\n- `details`: optional structured metadata used by the TUI renderer.\n - Common fields: `artifactId`, `repo`, `branch`, `worktreePath`, `remote`, `remoteBranch`, `headSha`, `runId`, `runIds`, `status`, `conclusion`, `failedJobs`.\n - `pr_checkout` adds `checkouts: GhPrCheckoutSummary[]`.\n - `run_watch` adds `watch: GhRunWatchViewDetails`, which drives the custom live/result renderer in `packages/coding-agent/src/tools/gh-renderer.ts`.\n- Artifact trailer: when `artifactId` is present, the text body gets an appended line like `Full failed-job logs: artifact://<id>`.\n - `run_watch` allocates artifacts with `session.allocateOutputArtifact(\"github\")`; persistent sessions therefore save failed-log bodies as `<artifact-dir>/<id>.github.log`.\n\n`run_watch` is the only streaming op. It emits `onUpdate` snapshots while polling, then returns one final text result.\n\n## Flow\n1. `GithubTool.createIf()` exposes the tool only when `git.github.available()` finds `gh` on `PATH`.\n2. `GithubTool.execute()` wraps dispatch in `untilAborted()` and switches on `params.op`.\n3. Each op normalizes optional strings, arrays, booleans, and numeric caps locally in `packages/coding-agent/src/tools/gh.ts`.\n4. CLI execution goes through `git.github.run/json/text()` in `packages/coding-agent/src/utils/git.ts`:\n - spawns `gh ...` with `Bun.spawn()`;\n - trims stdout/stderr unless `trimOutput: false`;\n - maps common auth/repo-context failures into tool-facing `ToolError` messages;\n - `json()` rejects empty or invalid JSON.\n5. Read-style ops (`repo_view`, `search_*`) fetch JSON and format Markdown-like text summaries. Single-issue and single-PR views were moved out of the tool and now resolve through the `issue://` / `pr://` internal URL schemes, which share the same SQLite cache.\n7. PR diffs moved out of the tool. `pr://<N>/diff` lists changed files, `pr://<N>/diff/<i>` slices a single file, and `pr://<N>/diff/all` returns the full unified diff — see `docs/tools/read.md`. All three variants share one `gh pr diff` invocation through the `pr-diff` cache row.\n8. `pr_checkout` resolves PR metadata first, then enters `git.withRepoLock()` before any git mutation so parallel checkout calls for the same primary repo do not race on shared `.git` state.\n9. `pr_push` reads PR head metadata back from git branch config, derives a refspec, then pushes with `git.push()`.\n10. `pr_create` shells out once, then best-effort re-reads the created PR for a richer summary.\n11. `run_watch` chooses either run mode (`run` supplied) or commit mode (`run` omitted), polls GitHub Actions APIs every 3 seconds, emits streaming updates, and may save a full failed-log artifact before returning.\n12. Final text goes through `toolResult().text(...)`; if `session.allocateOutputArtifact()` returns a slot, failed-log text is persisted with `Bun.write()`.\n\n## Modes / Variants\n\n### `repo_view`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` |\n| Optional fields | `repo`, `branch` |\n| `gh` command | `gh repo view [<repo>] [--branch <branch>] --json <GH_REPO_FIELDS>` |\n| Batching | None |\n| Output | `# <owner/repo>` header, description, URL, default branch, requested branch, visibility, permission, primary language, stars, forks, archive/fork flags, updated timestamp, homepage, topics. `sourceUrl = data.url`. |\n\nIf `repo` is omitted, `gh` repository resolution is used.\n\nSingle-issue and single-PR reads live in the `issue://<N>` / `pr://<N>` URL schemes (see `docs/tools/read.md`). They share `~/.omp/cache/github-cache.db` (override via `OMP_GITHUB_CACHE_DB`) and the `github.cache.softTtlSec` / `github.cache.hardTtlSec` / `github.cache.enabled` settings. The cache retains rendered Markdown plus the raw JSON payload returned by `gh`, including private bodies, comments, reviews, and review comments when comments are enabled; rows are scoped by the local GitHub credential fingerprint. Root and repo-scoped reads (`issue://`, `pr://owner/repo`) issue a live `gh issue list` / `gh pr list` for browsing; query params `state`, `limit`, `author`, `label` pass through to `gh` (`issue://` accepts `state=open|closed|all`; `pr://` also accepts `merged`). PR diffs ride the same cache under `pr://<N>/diff[/…]`: the listing, full diff, and per-file slices all share one `pr-diff` row keyed by repo and PR number.\n\n### `pr_create`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` plus either `fill=true` or `title` |\n| Optional fields | `repo`, `title`, `body`, `base`, `head`, `draft`, `fill`, `reviewer[]`, `assignee[]`, `label[]` |\n| `gh` command | `gh pr create ...` with flags assembled from provided fields |\n| Batching | None |\n| Output | `# Created Pull Request ...` summary with URL, state, draft flag, base/head, author, created time, labels, optional body. `sourceUrl` is the created PR URL. |\n\nBranches:\n- `fill && (title || body !== undefined)` throws.\n- Non-empty `body` is written under a temp dir `gh-pr-body-*` in `os.tmpdir()`, passed as `--body-file`, then removed in `finally`.\n- After creation, the tool parses the returned URL and best-effort runs `gh pr view <number> --repo <repo> --json <GH_PR_FIELDS_NO_COMMENTS>`; failures there are swallowed.\n\n### `pr_checkout`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` |\n| Optional fields | `repo`, `pr`, `force` |\n| `gh` command | For each requested PR: `gh pr view [<pr>] [--repo <repo>] --json <GH_PR_CHECKOUT_FIELDS>`; cross-repo PRs may also call `gh repo view <headRepository> --json <GH_REPO_CLONE_FIELDS>`. |\n| Batching | Yes. `pr` may be `string[]`; each PR is resolved in parallel, but git mutations are serialized per primary repo by `git.withRepoLock()`. |\n| Output | Single PR: checkout/worktree summary plus `details.repo`, `details.branch`, `details.worktreePath`, `details.remote`, `details.remoteBranch`, `details.checkouts`. Batched: `# <n> Pull Request Worktrees (...)` plus one section per PR and aggregated `details.checkouts`. |\n\nWorktree and metadata behavior:\n- Local branch name is always `pr-<number>`.\n- Worktree path is `path.join(getWorktreesDir(), encodeRepoPathForFilesystem(primaryRepoRoot), localBranch)`, where `getWorktreesDir()` is `~/.omp/wt`; effective path is `~/.omp/wt/<encoded-primary-repo-root>/pr-<number>`.\n- Existing worktree detection is by branch ref `refs/heads/pr-<number>` from `git.worktree.list()`.\n- New worktree creation calls `git.worktree.add(repoRoot, finalWorktreePath, localBranch, { signal })` after verifying the path is neither already registered nor already present on disk.\n- For same-repo PRs, remote is `origin`. For cross-repo PRs, the tool resolves a clone URL for the head repo, reuses an existing remote with the same URL when possible, or creates `fork-<owner>` / `fork-<owner>-<n>`.\n- The branch push metadata is persisted with `git config` under the repository's shared `.git/config` as:\n - `branch.pr-<number>.remote`\n - `branch.pr-<number>.merge`\n - `branch.pr-<number>.pushRemote`\n - `branch.pr-<number>.ompPrHeadRef`\n - `branch.pr-<number>.ompPrUrl`\n - `branch.pr-<number>.ompPrIsCrossRepository`\n - `branch.pr-<number>.ompPrMaintainerCanModify`\n- If `refs/heads/pr-<number>` already exists at a different commit, checkout fails unless `force=true`, in which case `git branch --force` resets it to the fetched PR head.\n- If a matching worktree already exists, the tool reuses it and reports `reused: true`.\n\n### `pr_push`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` |\n| Optional fields | `branch`, `forceWithLease` |\n| `gh` command | None. This path uses git, not `gh`. |\n| Batching | None |\n| Output | `# Pushed Pull Request Branch` summary with local branch, remote, remote branch, remote URL, PR URL, and force-with-lease flag. `sourceUrl = prUrl` when known. |\n\nPush target resolution reads the `branch.<name>.ompPrHeadRef`, `pushRemote`/`remote`, `ompPrUrl`, `ompPrMaintainerCanModify`, and `ompPrIsCrossRepository` git-config keys written by `pr_checkout`. If the current checked-out branch matches the target branch, the source ref is `HEAD`; otherwise it pushes `refs/heads/<branch>`. The refspec is `HEAD:refs/heads/<headRef>` or `refs/heads/<branch>:refs/heads/<headRef>`.\n\n### `search_issues`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op`, `query` |\n| Optional fields | `repo`, `limit` |\n| `gh` command | `gh api -X GET /search/issues -f q=\"<query> [repo:<repo>] is:issue\" -F per_page=<limit>` |\n| Batching | None |\n| Output | `# GitHub issues search`, echoed query, optional repo, result count, then one bullet per issue with repo/state/author/labels/timestamps/URL. |\n\n`repo` defaults to the current checkout's `owner/repo` via `resolveSearchRepoScope()` when omitted. The default is suppressed when the query already contains a leading `repo:`/`org:`/`user:`/`owner:` qualifier or when `gh repo view` fails to resolve the current checkout (e.g. outside a github remote).\n\n### `search_prs`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op`, `query` |\n| Optional fields | `repo`, `limit` |\n| `gh` command | `gh api -X GET /search/issues -f q=\"<query> [repo:<repo>] is:pr\" -F per_page=<limit>` |\n| Batching | None |\n| Output | Same shape as `search_issues`, labeled as pull requests. |\n\n`repo` defaults to the current checkout's `owner/repo` as in `search_issues`.\n\n### `search_code`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op`, `query` |\n| Optional fields | `repo`, `limit` |\n| `gh` command | `gh api -X GET /search/code -f q=\"<query> [repo:<repo>]\" -F per_page=<limit> -H \"Accept: application/vnd.github.text-match+json\"` |\n| Batching | None |\n| Output | `# GitHub code search`, result count, then one bullet per match with path, repo, short commit SHA, URL, and first normalized text-match fragment line when present. |\n\n`repo` defaults to the current checkout's `owner/repo` as in `search_issues`.\n\n### `search_commits`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op`, `query` |\n| Optional fields | `repo`, `limit` |\n| `gh` command | `gh api -X GET /search/commits -f q=\"<query> [repo:<repo>]\" -F per_page=<limit>` |\n| Batching | None |\n| Output | `# GitHub commits search`, result count, then one bullet per commit: short SHA + first commit-message line, repo, author, date, URL. |\n\n`repo` defaults to the current checkout's `owner/repo` as in `search_issues`.\n\n### `search_repos`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op`, `query` |\n| Optional fields | `limit` |\n| `gh` command | `gh api -X GET /search/repositories -f q=\"<query>\" -F per_page=<limit>` |\n| Batching | None |\n| Output | `# GitHub repositories search`, result count, then one bullet per repo with first description line, language, stars, forks, open issues, visibility, archive/fork flags, updated time, URL. |\n\n`repo` is intentionally not used for this op.\n\n### `run_watch`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` |\n| Optional fields | `repo`, `branch`, `run`, `tail` |\n| `gh` command | Repo resolution: `gh repo view --json nameWithOwner -q .nameWithOwner` when `repo` and run URL repo are both absent. Single-run mode uses `gh api --method GET /repos/<repo>/actions/runs/<runId>` and `gh api --method GET /repos/<repo>/actions/runs/<runId>/jobs`. Commit mode uses `gh api --method GET /repos/<repo>/branches/<branch>`, `gh api --method GET /repos/<repo>/actions/runs`, `gh api --method GET /repos/<repo>/actions/runs/<runId>/jobs`, and `gh api /repos/<repo>/actions/jobs/<jobId>/logs` for failed jobs. |\n| Batching | Implicit batching only in commit mode: all workflow runs for one commit are tracked together. |\n| Output | Streaming watch snapshots via `onUpdate`, then a final text report. On failure, appends `Full failed-job logs: artifact://<id>` and sets `details.artifactId`. |\n\nWatch flow:\n- `run` parsing accepts either a decimal run ID or a full run URL. URL repo must match explicit `repo` when both are given.\n- Poll interval is fixed at 3 seconds (`RUN_WATCH_INTERVAL_DEFAULT`).\n- Failure grace period is fixed at 5 seconds (`RUN_WATCH_GRACE_DEFAULT`). When any failed job appears before completion, the tool emits a note, waits once, re-fetches state, then collects logs so concurrent failures are included.\n- Failed-job logs are fetched with `gh api /repos/<repo>/actions/jobs/<jobId>/logs` via `git.github.run()`, not `json()`. Non-zero exit leaves `available: false` instead of failing the whole watch.\n- Inline result includes only the last `tail` lines per failed job. The saved artifact contains full logs (`mode: \"full\"`).\n- In commit mode, success is intentionally double-checked: once all known runs are successful, the tool waits one more poll interval and succeeds only if the set of run IDs is unchanged. This avoids returning before late workflow runs appear for the same commit.\n- `details.watch` drives a specialized renderer in `packages/coding-agent/src/tools/gh-renderer.ts`; non-watch results fall back to generic text rendering.\n\n## Side Effects\n- Filesystem\n - `pr_create` may create a temp dir under `os.tmpdir()` named `gh-pr-body-*`, write `body.md`, then remove the dir in `finally`.\n - `pr_checkout` may create directories under `~/.omp/wt/<encoded-primary-repo-root>/` and add git worktrees there.\n - `run_watch` may write a session artifact with full failed-job logs.\n- Network\n - Every op shells out to `gh`, which then talks to GitHub APIs except `pr_push`.\n - `pr_push` uses git network transport to the configured remote.\n- Subprocesses / native bindings\n - All `gh` calls use `Bun.spawn([\"gh\", ...args])`.\n - `pr_checkout` and `pr_push` also invoke git helpers from `packages/coding-agent/src/utils/git.ts`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - `run_watch` consumes `session.allocateOutputArtifact()` when failed-job logs are persisted.\n - Returned `details` objects carry run/checkouts metadata for the renderer/UI.\n- User-visible prompts / interactive UI\n - `gh` interactive editor fallback is suppressed for `pr_create` by forcing either `--body-file` or `--body \"\"`.\n - `gh-renderer` provides compact headers for all ops and a custom live watch view for `run_watch`.\n- Background work / cancellation\n - `run_watch` loops until success/failure and uses `scheduler.wait()` between polls.\n - `GithubTool.execute()` is wrapped in `untilAborted()`; `git.github.run()` forwards the abort signal into `Bun.spawn()`.\n\n## Limits & Caps\n- Search result default: `10` (`SEARCH_LIMIT_DEFAULT` in `packages/coding-agent/src/tools/gh.ts`).\n- Search result max: `50` (`SEARCH_LIMIT_MAX`).\n- PR file preview inside the `pr://` view: first `50` files only (`FILE_PREVIEW_LIMIT` in `gh.ts`).\n- Run-watch poll interval: `3s` (`RUN_WATCH_INTERVAL_DEFAULT`).\n- Run-watch failure grace period: `5s` (`RUN_WATCH_GRACE_DEFAULT`).\n- Run-watch failed-log tail default: `15` lines (`RUN_WATCH_TAIL_DEFAULT`).\n- Run-watch failed-log tail max: `200` lines (`RUN_WATCH_TAIL_MAX`).\n- PR review comments page size: `100` (`REVIEW_COMMENTS_PAGE_SIZE`).\n- Actions jobs page size: `100` (`RUN_JOBS_PAGE_SIZE`).\n- Search and tail numeric inputs are floored with `Math.floor()`, clamped to the max, and rejected when non-finite or `<= 0`.\n- `pr_checkout` batch fan-out is unbounded in tool code; all requested PRs are launched with `Promise.all()`.\n\n## Errors\n- Tool creation is skipped entirely when `gh` is not installed.\n- `git.github.run()` throws `ToolError(\"GitHub CLI (gh) is not installed...\")` if `gh` is missing at execution time.\n- `git.github.text/json()` map common failures to model-facing messages:\n - not authenticated → `GitHub CLI is not authenticated. Run \\`gh auth login\\`.`\n - missing repo context without explicit `repo` → `GitHub repository context is unavailable. Pass \\`repo\\` explicitly or run the tool inside a GitHub checkout.`\n - otherwise stderr/stdout text, or fallback `GitHub CLI command failed: gh ...`\n- `json()` also throws on empty stdout or invalid JSON.\n- Local validation errors throw `ToolError`, including:\n - missing required per-op fields (`query`, `title unless fill=true`)\n - invalid numeric `limit` / `tail`\n - invalid `run` format\n - `fill` combined with `title` or `body`\n - missing git repo / branch / HEAD context for checkout, push, or watch\n - `pr_push` on a branch without `ompPrHeadRef` metadata\n - conflicting existing worktree path or branch without `force`\n- `run_watch` treats failed-job log fetches specially: missing log content does not fail the watch; it marks that log `available: false` and prints `Log tail unavailable.` / `Full log unavailable.`.\n- `pr_create` swallows only the post-create best-effort `gh pr view` refresh; the create step itself still fails normally.\n\n## Notes\n- `appendRepoFlag()` intentionally skips `--repo` when the identifier argument is already a full GitHub URL; that lets `gh` derive repo/number from the URL.\n- `normalizePrIdentifierList()` accepts `reviewer`, `assignee`, and `label` arrays too; the helper name is broader than its callers.\n- `pr_push` depends on `pr_checkout` having run first for that local branch; there is no alternate metadata source.\n- `pr_checkout` stores push metadata in branch config, not in the worktree directory. Reusing the same `pr-<number>` branch reuses those config keys.\n- Worktree write serialization is keyed by the primary repo root, not the current worktree path, because git worktrees share `.git/config`, `packed-refs`, commit-graph, and worktree metadata files.\n- `search_repos` is the only search op that never forwards `repo`; repository scoping must be expressed in the query itself.\n- `run_watch` success on commit mode means “all observed runs succeeded and no additional runs appeared one poll later”, not merely “latest poll looked green”.\n- The TUI renderer collapses failed log previews unless the result view is expanded; the underlying text result still contains the same tailed lines plus any artifact reference.\n",
74
73
  "tools/inspect_image.md": "# inspect_image\n\n> Send a local image file to a vision-capable model and return text analysis.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/inspect-image.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/inspect-image.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/inspect-image-renderer.ts` — TUI call/result rendering.\n - `packages/coding-agent/src/utils/image-loading.ts` — path resolution, type detection, size gate, optional resize.\n - `packages/coding-agent/src/utils/image-resize.ts` — downscale and recompress oversized images.\n - `packages/coding-agent/src/tools/path-utils.ts` — resolve input path relative to session cwd.\n - `packages/utils/src/mime.ts` — detect supported image formats from file bytes.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `path` | `string` | Yes | Image path passed to `loadImageInput`; resolved relative to `session.cwd` by `resolveReadPath(...)`. |\n| `question` | `string` | Yes | User prompt sent as a text content block alongside the image. |\n\n## Outputs\nThe tool returns a single `AgentToolResult`:\n\n- `content`: one text block, `[{ type: \"text\", text }]`, where `text` is the concatenated assistant text content from the model response.\n- `details`:\n - `model`: `<provider>/<id>` of the selected model.\n - `imagePath`: resolved filesystem path returned by `loadImageInput(...)`.\n - `mimeType`: MIME type actually sent to the model after optional resize/re-encode.\n\nModel-visible output is single-shot, not streamed by this tool.\n\nTUI rendering adds presentation-only truncation from `packages/coding-agent/src/tools/inspect-image-renderer.ts`:\n\n- call preview truncates `question` to 100 columns,\n- result view shows 4 lines collapsed or 16 lines expanded,\n- each rendered output line is truncated to 120 columns,\n- footer metadata shows `model · mimeType` when present.\n\n## Flow\n1. `InspectImageTool.execute(...)` rejects immediately if `images.blockImages` is enabled in session settings.\n2. It reads `session.modelRegistry`; missing registry, empty registry, missing API key, or unresolved model each raise `ToolError` from `packages/coding-agent/src/tools/inspect-image.ts`.\n3. Model selection tries, in order, `pi/vision`, `pi/default`, the active model string from the session, then `availableModels[0]`. `expandRoleAlias(...)` and `resolveModelFromString(...)` handle each lookup.\n4. The chosen model must advertise `input.includes(\"image\")`; otherwise execution fails before reading the file.\n5. `loadImageInput(...)` in `packages/coding-agent/src/utils/image-loading.ts` resolves the path with `resolveReadPath(...)`, detects MIME type with `readImageMetadata(...)`, and rejects files larger than `MAX_IMAGE_INPUT_BYTES` (`20 * 1024 * 1024`, 20 MiB) using `ImageInputTooLargeError`.\n6. `readImageMetadata(...)` in `packages/utils/src/mime.ts` inspects file headers only. Supported detected MIME types are `image/png`, `image/jpeg`, `image/gif`, and `image/webp`.\n7. If `images.autoResize` is true, `loadImageInput(...)` calls `resizeImage(...)`. Resize failures are swallowed there and the original bytes are kept.\n8. If MIME detection returned no supported image type, `execute(...)` throws `ToolError(\"inspect_image only supports PNG, JPEG, GIF, and WEBP files detected by file content.\")`.\n9. The tool calls `completeSimple(...)` with one user message containing two content parts in order:\n - `{ type: \"image\", data: imageInput.data, mimeType: imageInput.mimeType }`\n - `{ type: \"text\", text: params.question }`\n10. `systemPrompt` is a one-element array rendered from `packages/coding-agent/src/prompts/tools/inspect-image-system.md`.\n11. If the model response stop reason is `error` or `aborted`, the tool maps that to `ToolError`.\n12. `extractResponseText(...)` concatenates only `text` content blocks from the assistant message, trims the result, and fails if nothing remains.\n13. Success returns the text plus `details`; `inspectImageToolRenderer` formats the result for the TUI.\n\n## Modes / Variants\n- **Original image path**: `images.autoResize` disabled. The original file bytes are base64-encoded and sent with the detected MIME type.\n- **Auto-resized path**: `images.autoResize` enabled. `resizeImage(...)` may downscale and re-encode the image before upload.\n- **Unsupported image path**: file exists but header sniffing does not identify PNG/JPEG/GIF/WEBP. The tool returns a `ToolError` before any model call.\n- **Oversize image path**: file size exceeds 20 MiB before upload. The tool returns a `ToolError` before any model call.\n\n## Side Effects\n- Filesystem\n - Resolves and reads the target image from disk.\n - Stats the file once with `Bun.file(...).stat()` and reads it fully with `fs.readFile(...)`.\n- Network\n - Sends the final base64 image payload plus question text to the selected model through `completeSimple(...)`.\n- Session state\n - Reads session settings, active model preferences, cwd, and model registry.\n- Background work / cancellation\n - Passes the caller `AbortSignal` into `completeSimple(...)`.\n - Image preprocessing is local and not cancellation-aware in these helpers.\n\n## Limits & Caps\n- Supported detected input formats: `image/png`, `image/jpeg`, `image/gif`, `image/webp` (`SUPPORTED_IMAGE_MIME_TYPES` in `packages/utils/src/mime.ts`).\n- Metadata sniff cap: `DEFAULT_IMAGE_METADATA_HEADER_BYTES = 256 * 1024` bytes. Format detection only reads up to 256 KiB from the file header.\n- Upload input cap: `MAX_IMAGE_INPUT_BYTES = 20 * 1024 * 1024` bytes (20 MiB) in `packages/coding-agent/src/utils/image-loading.ts`.\n- Auto-resize defaults in `packages/coding-agent/src/utils/image-resize.ts`:\n - `maxWidth: 1568`\n - `maxHeight: 1568`\n - `maxBytes: 500 * 1024` bytes (500 KiB target)\n - `jpegQuality: 75`\n- Resize fast path: if the original image is already within `1568x1568` and within `maxBytes / 4` (125 KiB by default), `resizeImage(...)` returns the original bytes unchanged.\n- Resize quality ladder: after the first encode pass, lossy retries use qualities `[70, 60, 50, 40]`.\n- Resize dimension ladder: if quality reduction still misses the byte target, retries scale dimensions by `[1.0, 0.75, 0.5, 0.35, 0.25]` and stop if either dimension would fall below `100` pixels.\n- First resize pass encodes PNG, JPEG, and WebP, then keeps the smallest encoded buffer. Fallback passes encode JPEG and WebP only, again keeping the smaller output.\n- Renderer caps:\n - `INSPECT_QUESTION_PREVIEW_WIDTH = 100`\n - `INSPECT_OUTPUT_COLLAPSED_LINES = 4`\n - `INSPECT_OUTPUT_EXPANDED_LINES = 16`\n - `INSPECT_OUTPUT_LINE_WIDTH = 120`\n\n## Errors\n- Settings gate:\n - `Image submission is disabled by settings (images.blockImages=true). Disable it to use inspect_image.`\n- Model resolution / capability:\n - `Model registry is unavailable for inspect_image.`\n - `No models available for inspect_image.`\n - `Unable to resolve a model for inspect_image.`\n - `Resolved model <provider>/<id> does not support image input. Configure a vision-capable model for modelRoles.vision.`\n - `No API key available for <provider>/<id>. Configure credentials for this provider or choose another vision-capable model.`\n- Input file:\n - `Image file too large: <size> exceeds <limit> limit.` from `ImageInputTooLargeError`, remapped to `ToolError`.\n - `inspect_image only supports PNG, JPEG, GIF, and WEBP files detected by file content.` when header sniffing fails.\n- Model call:\n - `inspect_image request failed.` if the response stop reason is `error` without a provider message.\n - Provider `errorMessage` is passed through when present.\n - `inspect_image request aborted.` on aborted responses.\n - `inspect_image model returned no text output.` when the assistant message contains no text blocks after filtering.\n\nFailures surface as thrown `ToolError`s from `execute(...)`; the normal success return shape is not used for error reporting.\n\n## Notes\n- The model-facing prompt path on disk is `packages/coding-agent/src/prompts/tools/inspect-image.md`; the assignment's underscore form does not exist.\n- Format support is based on file content, not filename extension. Renaming a non-image file to `.png` does not make it valid.\n- `resolveReadPath(...)` tries macOS-specific path variants: shell-unescaped spaces, AM/PM narrow no-break-space filenames, NFD normalization, and curly-quote variants.\n- `loadImageInput(...)` also computes `textNote`, `dimensionNote`, and final `bytes`, but `inspect_image` does not include those in tool output.\n- Auto-resize can change the MIME type sent to the model. A JPEG or GIF input may be uploaded as PNG, JPEG, or WebP depending on which encoder output is smallest.\n- If `resizeImage(...)` throws or cannot decode the image, `loadImageInput(...)` silently keeps the original base64 payload instead of failing.\n",
75
74
  "tools/irc.md": "# irc\n\n> Send short prose messages to other live agents in the current process.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/irc.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/irc.md`\n- Key collaborators:\n - `packages/coding-agent/src/registry/agent-registry.ts` — process-global live agent directory.\n - `packages/coding-agent/src/session/agent-session.ts` — side-channel reply generation and history injection.\n - `packages/coding-agent/src/prompts/system/irc-incoming.md` — no-tools auto-reply prompt.\n - `packages/coding-agent/src/tools/index.ts` — tool availability gating.\n - `packages/coding-agent/src/config/settings-schema.ts` — `irc.enabled` default.\n - `packages/coding-agent/src/modes/controllers/event-controller.ts` — renders IRC events into chat UI.\n - `packages/coding-agent/src/modes/utils/ui-helpers.ts` — formats `[IRC]` transcript lines.\n - `packages/coding-agent/src/task/executor.ts` — carries `irc.enabled` into subagents.\n\n## Inputs\n\n### `op: \"list\"`\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `op` | `\"list\"` | Yes | Lists peers visible to the caller. |\n\n### `op: \"send\"`\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `op` | `\"send\"` | Yes | Sends one message to one peer or to `\"all\"`. |\n| `to` | `string` | Yes | Peer id such as `0-Main`, or `\"all\"` for broadcast. Whitespace is trimmed. |\n| `message` | `string` | Yes | Message body. Whitespace is trimmed; empty-after-trim is rejected. |\n| `awaitReply` | `boolean` | No | Wait for prose replies. Defaults to `true` for direct messages and `false` for `to: \"all\"`. |\n\n## Outputs\n- Single-shot `AgentToolResult`; no streaming updates.\n- `content` is one text block.\n - `list` returns either `No other live agents.` or a bullet list headed by `<n> peer(s):`.\n - `send` returns delivery summary text, then optional `## Replies`, `## Failed`, and `Unknown / unavailable peers:` sections.\n- `details` is structured metadata:\n - `list`: `{ op, from, peers, channels }`\n - `send`: `{ op, from, to, delivered, replies?, failed?, notFound? }`\n- The tool does not return raw IRC frames, message ids, or a transcript object.\n\n## Flow\n1. `IrcTool.createIf` only constructs the tool when `irc.enabled` is on and the session has both an `AgentRegistry` and `getAgentId` (`packages/coding-agent/src/tools/irc.ts`).\n2. Tool discovery adds another gate in `packages/coding-agent/src/tools/index.ts`: if the caller is `0-Main` and `async.enabled` is off, `irc` is hidden because the main agent cannot talk to concurrent peers in sync mode.\n3. `execute` resolves the process-global registry and sender id. Missing either returns a text error result instead of throwing.\n4. `op: \"list\"` calls `registry.listVisibleTo(senderId)`, which exposes every other agent in flat namespace whose status is `running` or `idle` (`packages/coding-agent/src/registry/agent-registry.ts`).\n5. `list` formats human-readable lines and returns `channels` as `['all', ...peerIds]`. These are logical targets only; there is no channel join state.\n6. `op: \"send\"` trims `to` and `message`; missing values produce text errors.\n7. `send` resolves targets:\n - `to === \"all\"`: all visible peers.\n - otherwise: one exact registry id, excluding self and excluding peers not in `running`/`idle`.\n8. `send` chooses `awaitReply = params.awaitReply ?? !isBroadcast`.\n9. Each target is dispatched in parallel via `target.session.respondAsBackground(...)`. One slow or failing peer does not block dispatch to the others.\n10. `respondAsBackground` emits an `irc_message` session event, forwards a display-only relay to the main session UI, and either:\n - queues just the incoming message for later history injection when `awaitReply === false`, or\n - renders `packages/coding-agent/src/prompts/system/irc-incoming.md`, runs `runEphemeralTurn` with `toolChoice: \"none\"`, emits an auto-reply event, then queues both incoming and reply messages for history injection.\n11. Deferred injection waits until the recipient is no longer streaming; `#flushPendingBackgroundExchanges` appends the custom messages through normal `message_start`/`message_end` external events so persistence and listeners see them.\n12. `send` aggregates `delivered`, `replies`, `failed`, and `notFound`, then returns one text summary plus matching `details`.\n\n## Modes / Variants\n- `list`: enumerate visible peers and logical channels.\n- `send` direct message: one exact peer id, default synchronous auto-reply.\n- `send` broadcast: `to: \"all\"`, default fire-and-forget (`awaitReply: false`) to every visible peer.\n- `send` with `awaitReply: false`: recipient records the incoming message but does not generate a reply.\n- `send` with `awaitReply: true`: recipient performs a no-tools ephemeral LLM turn and returns prose.\n\n## Side Effects\n- Session state\n - Reads from the process-global `AgentRegistry`.\n - Emits `irc_message` session events on recipient sessions.\n - Queues IRC custom messages into recipient persisted history after the current stream finishes.\n - For non-main recipients, forwards display-only relay observations into the main session UI; these relays are not persisted to the main agent history.\n - Subagents inherit `irc.enabled` from task executor settings.\n- User-visible prompts / interactive UI\n - IRC events render as `[IRC]` transcript lines in the TUI.\n - Auto-replies are generated from `packages/coding-agent/src/prompts/system/irc-incoming.md` and explicitly forbid tool use.\n- Background work / cancellation\n - `send` starts one background `respondAsBackground` call per target.\n - The caller's `AbortSignal` is forwarded into each background reply turn.\n- Network\n - No IRC server connection.\n - When `awaitReply: true`, the recipient may make model-provider API calls through `runEphemeralTurn`.\n- Filesystem\n - No direct filesystem writes in the tool itself.\n\n## Limits & Caps\n- Availability gates:\n - `irc.enabled` defaults to `true` in `packages/coding-agent/src/config/settings-schema.ts`.\n - Main agent tool discovery suppresses `irc` when `async.enabled` is off (`packages/coding-agent/src/tools/index.ts`).\n- Visibility scope: only peers in status `running` or `idle` are addressable via `listVisibleTo`.\n- Reply execution:\n - No tools are available in auto-reply turns (`toolChoice: \"none\"` in `runEphemeralTurn`).\n - No internal timeout, retry, backoff, rate limit, or reply length cap is defined in `irc.ts`; behavior relies on the underlying model stream and any upstream API limits.\n- Flush scheduling: deferred history injection polls every `50` ms while the recipient is still streaming (`#scheduleBackgroundExchangeFlush` in `packages/coding-agent/src/session/agent-session.ts`).\n\n## Errors\n- The tool returns text errors, not thrown exceptions, for:\n - missing registry: `IRC is unavailable in this session.`\n - missing sender id: `IRC is unavailable: caller has no agent id.`\n - missing `to`: `` `to` is required for op=\"send\". ``\n - missing `message`: `` `message` is required for op=\"send\". ``\n - unknown op: `Unknown irc op.`\n- Unknown, self-addressed, non-running, and non-idle direct targets are reported under `details.notFound` and in the text footer `Unknown / unavailable peers:`.\n- If a target has no attached session, it is treated as not found.\n- Exceptions thrown by `respondAsBackground` or `runEphemeralTurn` are caught per-target and surfaced under `details.failed` as `{ id, error }`; other recipients still complete.\n- If no target succeeds, `send` still returns normally with `No recipients received the message.` and optional `failed`/`notFound` metadata.\n\n## Notes\n- This is IRC-like naming only. There are no servers, sockets, nick registration, auth handshakes, channels beyond `all`, or commands such as join/part/topic.\n- Addressing is by exact agent id from the registry; there is no fuzzy lookup or aliasing.\n- `channels` in `list` is synthetic output: `all` plus visible peer ids. Nothing is persisted across calls as channel membership.\n- Persistence is per recipient history, not per sender history. The sender gets the tool result; the recipient later sees injected custom messages on its next turn.\n- The main UI may show IRC relays for conversations it was not part of, but those relay records are explicitly display-only.\n- Because reply generation snapshots in-flight assistant text, a recipient can answer based on partially streamed context.\n- Direct self-messaging is rejected by resolving the target as unavailable.",
76
- "tools/job.md": "# job\n\n> Wait for or cancel background jobs managed by the session async runtime.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/job.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/job.md`\n- Key collaborators:\n - `packages/coding-agent/src/async/job-manager.ts` — job registry, cancellation, delivery suppression.\n - `packages/coding-agent/src/async/support.ts` — feature gating for background jobs.\n - `packages/coding-agent/src/internal-urls/jobs-protocol.ts` — `jobs://` listing and per-job detail.\n - `packages/coding-agent/src/tools/bash.ts` — explicit async bash and auto-backgrounded bash jobs.\n - `packages/coding-agent/src/task/index.ts` — async task-job scheduling.\n - `packages/coding-agent/src/sdk.ts` — automatic follow-up delivery for unsuppressed completions.\n - `packages/coding-agent/src/config/settings-schema.ts` — `async.pollWaitDuration` options.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `poll` | `string[]` | No | Job ids to watch. If omitted and `cancel` is also omitted, the tool watches all running jobs. If provided, missing ids are silently filtered out before waiting. |\n| `cancel` | `string[]` | No | Job ids to cancel before any polling. Missing ids are reported as `not_found`; non-running ids as `already_completed`. |\n\n## Outputs\nThe tool returns one text block plus `details`.\n\n- `content[0].text`: markdown-like plain text sections assembled by `#buildResult(...)`:\n - `## Cancelled (N)` for cancel outcomes.\n - `## Completed (N)` for non-running jobs, including stored `resultText` and `errorText`.\n - `## Still Running (N)` for jobs still in `running`.\n- `details.jobs`: array of snapshots:\n - `id: string`\n - `type: \"bash\" | \"task\"`\n - `status: \"running\" | \"completed\" | \"failed\" | \"cancelled\"`\n - `label: string`\n - `durationMs: number`\n - optional `resultText`, `errorText`\n- `details.cancelled` appears only when `cancel` was passed; each item is `{ id, status }` where status is `\"cancelled\" | \"not_found\" | \"already_completed\"`.\n\nStreaming behavior:\n- During a polling wait, `execute(...)` emits `onUpdate(...)` every 500 ms with an empty text block and fresh `details.jobs` snapshots.\n- Final return is single-shot after a completion, timeout, abort, or immediate fast path.\n\nRelated read path:\n- Reading `jobs://` lists all current jobs.\n- Reading `jobs://<id>` renders one job with status, label, start time, duration, and stored result/error text.\n\n## Flow\n1. `JobTool.createIf(...)` in `packages/coding-agent/src/tools/job.ts` only exposes the tool when `isBackgroundJobSupportEnabled(...)` returns true for either `async.enabled` or `bash.autoBackground.enabled`.\n2. `execute(...)` fetches `session.asyncJobManager`. If absent, it returns `Async execution is disabled; no background jobs are available.`\n3. `cancel` ids are processed first:\n - `manager.getJob(id)` missing → `not_found`.\n - existing job with `status !== \"running\"` → `already_completed`.\n - running job → `manager.cancel(id)`, which sets `job.status = \"cancelled\"`, aborts the controller, and schedules eviction.\n4. Polling mode is chosen with `const shouldPoll = requestedPollIds !== undefined || cancelIds.length === 0`:\n - only `cancel` present → return immediately, no wait.\n - explicit `poll`, or no args at all → proceed to watch jobs.\n5. Watch set resolution:\n - explicit `poll` → map ids through `manager.getJob(...)` and drop missing ones.\n - no `poll` and no `cancel` → `manager.getRunningJobs()`.\n6. Empty watch set returns immediately:\n - if cancellations happened, return snapshots for the cancelled ids that still exist.\n - else return either `No matching jobs found for IDs: ...` or `No running background jobs to wait for.`\n7. If every watched job is already non-running, `#buildResult(...)` returns immediately without waiting.\n8. Otherwise the tool waits on `Promise.race(...)` across:\n - every watched running job's `job.promise`,\n - a timeout promise for `async.pollWaitDuration`,\n - the tool-call abort signal when present.\n9. Before waiting, it calls `manager.watchJobs(watchedJobIds)`. This suppresses automatic completion delivery for those ids while they are being watched.\n10. If `onUpdate` exists, a 500 ms interval sends progress snapshots from `#snapshotJobs(...)`; one snapshot is emitted immediately before entering the race.\n11. In `finally`, the tool always calls `manager.unwatchJobs(...)`, clears the timeout, and stops the progress interval.\n12. `#buildResult(...)` deduplicates jobs, snapshots current manager state, then calls `manager.acknowledgeDeliveries(...)` for every non-running job in the result. That suppresses later automatic follow-up delivery for the same completions and removes queued deliveries for those ids.\n13. The final text groups jobs by non-running vs still-running state. A timeout is not an error path; it simply returns the current snapshot.\n\n## Modes / Variants\n- Poll all running jobs: call with neither `poll` nor `cancel`.\n- Poll explicit ids: call with `poll` only.\n- Cancel only: call with `cancel` only; cancellations happen and the tool returns immediately.\n- Cancel then poll: call with both. Cancellations are applied first, then the tool watches the remaining resolved `poll` ids.\n- Read-only inspection outside the tool: `jobs://` and `jobs://<id>` expose the same manager state without waiting.\n\nSpawn paths that produce jobs:\n- `packages/coding-agent/src/tools/bash.ts`\n - `async: true` always registers a `type: \"bash\"` job with `AsyncJobManager.register(...)` and returns a start message.\n - auto-background mode (`bash.autoBackground.enabled`) starts the same managed job path for non-PTY commands, waits up to `min(bash.autoBackground.thresholdMs, timeoutMs - 1000)`, and if the command is still running returns a background-job start result instead of inline command output.\n- `packages/coding-agent/src/task/index.ts`\n - when `async.enabled` is on, the chosen agent is not blocking, and `tasks.length > 0`, each task item is registered as a `type: \"task\"` job.\n\nLifecycle and exact state names:\n- Conceptual scheduling path: `pending` (only task-progress bookkeeping before work starts) → `running` → `completed` / `failed`; cancellation changes a running async job to `cancelled`.\n- Exact `AsyncJob.status` values in `packages/coding-agent/src/async/job-manager.ts`: `\"running\" | \"completed\" | \"failed\" | \"cancelled\"`.\n- Exact per-task progress values in `packages/coding-agent/src/task/types.ts`: `\"pending\" | \"running\" | \"completed\" | \"failed\" | \"aborted\"`.\n\n## Side Effects\n- Filesystem\n - None in `job.ts` itself.\n - Jobs being observed may already have written artifacts/results through their own tool runtimes.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Reads and mutates `session.asyncJobManager` state.\n - `watchJobs(...)` / `unwatchJobs(...)` toggle delivery suppression for the watched ids.\n - `acknowledgeDeliveries(...)` marks completed ids as suppressed and removes queued deliveries for them.\n - `cancel(...)` aborts running jobs through each job's `AbortController`.\n- User-visible prompts / interactive UI\n - Polling emits periodic `onUpdate` snapshots every 500 ms.\n - Automatic job completion follow-ups are generated by `packages/coding-agent/src/sdk.ts` only for unsuppressed deliveries.\n- Background work / cancellation\n - Waiting uses a timeout plus optional tool-call abort signal.\n - Cancelling a job does not synchronously await teardown; it flips state, aborts, and returns control to the manager/job promise.\n\n## Limits & Caps\n- Poll wait duration comes from `async.pollWaitDuration` in `packages/coding-agent/src/config/settings-schema.ts`:\n - allowed values: `5s`, `10s`, `30s`, `1m`, `5m`\n - default: `30s`\n- Progress update cadence while polling: `PROGRESS_INTERVAL_MS = 500` in `packages/coding-agent/src/tools/job.ts`.\n- Async job retention default: `DEFAULT_RETENTION_MS = 5 * 60 * 1000` in `packages/coding-agent/src/async/job-manager.ts`.\n- Manager fallback max-running limit: `DEFAULT_MAX_RUNNING_JOBS = 15` in `packages/coding-agent/src/async/job-manager.ts`.\n- Session wiring clamps `async.maxJobs` to `1..100` before constructing the manager in `packages/coding-agent/src/sdk.ts`; settings default is `100` in `packages/coding-agent/src/config/settings-schema.ts`.\n- Async completion delivery retry backoff in `packages/coding-agent/src/async/job-manager.ts`:\n - base `500` ms\n - max `30_000` ms\n - jitter `< 200` ms\n - exponent capped at 8 doublings\n\n## Errors\n- Tool-disabled path is returned as normal text, not thrown: `Async execution is disabled; no background jobs are available.`\n- Polling a nonexistent id is not an exception:\n - with `poll` only, missing ids are dropped; if none remain the tool returns `No matching jobs found for IDs: ...`.\n - with `cancel`, each missing id is reported as `not_found` in `details.cancelled` and text.\n- Cancelling a non-running job is not an exception; it reports `already_completed` even if the actual status is `completed`, `failed`, or `cancelled`.\n- Tool-call abort during polling stops waiting and returns a final snapshot through `#buildResult(...)`; it does not cancel watched jobs.\n- Failures inside the underlying async work are stored on the job (`status: \"failed\"`, `errorText`) and reported in normal tool output, not rethrown by `job`.\n- Reading `jobs://<id>` for a missing job returns markdown content headed `# Job Not Found` rather than throwing.\n\n## Notes\n- `job` waits for the first watched running job to settle, not for all watched jobs. If others remain `running`, they are reported under `## Still Running`; the caller must invoke `job` again to continue waiting.\n- Delivery suppression is the key difference between snapshot and automatic delivery:\n - snapshots (`job`, reads of `jobs://`) read current manager state;\n - follow-up delivery comes from `AsyncJobManager.#enqueueDelivery(...)` and `sdk.ts` `onJobComplete`;\n - watched or acknowledged ids are suppressed via `isDeliverySuppressed(...)`.\n- `manager.cancel(id)` sets `status = \"cancelled\"` before the underlying promise settles. The job function may later populate `resultText` or `errorText`; `job-manager.ts` preserves that text but does not transition the status away from `cancelled`.\n- `jobs://` is implemented by `JobsProtocolHandler` with `immutable = true`, but each resolve call reads live manager state at access time.\n- `jobs://<id>` shows a cancellation section only when a cancelled job has `errorText`; cancelled jobs with `resultText` are not rendered with a result section there.\n- Retention eviction removes the job record, suppression flags, and watch flag together. After eviction, both `job` and reads of `jobs://<id>` behave as if the id never existed.\n",
75
+ "tools/job.md": "# job\n\n> Wait for or cancel background jobs managed by the session async runtime.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/job.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/job.md`\n- Key collaborators:\n - `packages/coding-agent/src/async/job-manager.ts` — job registry, cancellation, delivery suppression.\n - `packages/coding-agent/src/async/support.ts` — feature gating for background jobs.\n - `packages/coding-agent/src/tools/bash.ts` — explicit async bash and auto-backgrounded bash jobs.\n - `packages/coding-agent/src/task/index.ts` — async task-job scheduling.\n - `packages/coding-agent/src/sdk.ts` — automatic follow-up delivery for unsuppressed completions.\n - `packages/coding-agent/src/config/settings-schema.ts` — `async.pollWaitDuration` options.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `poll` | `string[]` | No | Job ids to watch. Cannot be combined with `list`. If omitted (and `cancel` is also omitted), the tool watches all running jobs. If provided, missing ids are silently filtered out before waiting. |\n| `cancel` | `string[]` | No | Job ids to cancel before any polling. Missing ids are reported as `not_found`; non-running ids as `already_completed`. |\n| `list` | `boolean` | No | Return an immediate snapshot of every job spawned by the calling agent (running + completed within retention) without waiting. Read-only — cannot be combined with `poll` or `cancel`. |\n\n## Outputs\nThe tool returns one text block plus `details`.\n\n- `content[0].text`: markdown-like plain text sections assembled by `#buildResult(...)`:\n - `## Cancelled (N)` for cancel outcomes.\n - `## Completed (N)` for non-running jobs, including stored `resultText` and `errorText`.\n - `## Still Running (N)` for jobs still in `running`.\n- `details.jobs`: array of snapshots:\n - `id: string`\n - `type: \"bash\" | \"task\"`\n - `status: \"running\" | \"completed\" | \"failed\" | \"cancelled\"`\n - `label: string`\n - `durationMs: number`\n - optional `resultText`, `errorText`\n- `details.cancelled` appears only when `cancel` was passed; each item is `{ id, status }` where status is `\"cancelled\" | \"not_found\" | \"already_completed\"`.\n\nStreaming behavior:\n- During a polling wait, `execute(...)` emits `onUpdate(...)` every 500 ms with an empty text block and fresh `details.jobs` snapshots.\n- Final return is single-shot after a completion, timeout, abort, or immediate fast path.\n\nRead-only snapshot path:\n- Calling `job` with `list: true` returns a markdown summary of every job spawned by the calling agent (running + completed within retention) without waiting.\n\n## Flow\n1. `JobTool.createIf(...)` in `packages/coding-agent/src/tools/job.ts` only exposes the tool when `isBackgroundJobSupportEnabled(...)` returns true for either `async.enabled` or `bash.autoBackground.enabled`.\n2. `execute(...)` fetches `session.asyncJobManager`. If absent, it returns `Async execution is disabled; no background jobs are available.`\n3. `cancel` ids are processed first:\n - `manager.getJob(id)` missing → `not_found`.\n - existing job with `status !== \"running\"` → `already_completed`.\n - running job → `manager.cancel(id)`, which sets `job.status = \"cancelled\"`, aborts the controller, and schedules eviction.\n4. Polling mode is chosen with `const shouldPoll = requestedPollIds !== undefined || cancelIds.length === 0`:\n - only `cancel` present → return immediately, no wait.\n - explicit `poll`, or no args at all → proceed to watch jobs.\n5. Watch set resolution:\n - explicit `poll` → map ids through `manager.getJob(...)` and drop missing ones.\n - no `poll` and no `cancel` → `manager.getRunningJobs()`.\n6. Empty watch set returns immediately:\n - if cancellations happened, return snapshots for the cancelled ids that still exist.\n - else return either `No matching jobs found for IDs: ...` or `No running background jobs to wait for.`\n7. If every watched job is already non-running, `#buildResult(...)` returns immediately without waiting.\n8. Otherwise the tool waits on `Promise.race(...)` across:\n - every watched running job's `job.promise`,\n - a timeout promise for `async.pollWaitDuration`,\n - the tool-call abort signal when present.\n9. Before waiting, it calls `manager.watchJobs(watchedJobIds)`. This suppresses automatic completion delivery for those ids while they are being watched.\n10. If `onUpdate` exists, a 500 ms interval sends progress snapshots from `#snapshotJobs(...)`; one snapshot is emitted immediately before entering the race.\n11. In `finally`, the tool always calls `manager.unwatchJobs(...)`, clears the timeout, and stops the progress interval.\n12. `#buildResult(...)` deduplicates jobs, snapshots current manager state, then calls `manager.acknowledgeDeliveries(...)` for every non-running job in the result. That suppresses later automatic follow-up delivery for the same completions and removes queued deliveries for those ids.\n13. The final text groups jobs by non-running vs still-running state. A timeout is not an error path; it simply returns the current snapshot.\n\n## Modes / Variants\n- Poll all running jobs: call with neither `poll` nor `cancel`.\n- Poll explicit ids: call with `poll` only.\n- Cancel only: call with `cancel` only; cancellations happen and the tool returns immediately.\n- Cancel then poll: call with both. Cancellations are applied first, then the tool watches the remaining resolved `poll` ids.\n- Read-only inspection: call with `list: true` for the same snapshot data without waiting on completion.\n\nSpawn paths that produce jobs:\n- `packages/coding-agent/src/tools/bash.ts`\n - `async: true` always registers a `type: \"bash\"` job with `AsyncJobManager.register(...)` and returns a start message.\n - auto-background mode (`bash.autoBackground.enabled`) starts the same managed job path for non-PTY commands, waits up to `min(bash.autoBackground.thresholdMs, timeoutMs - 1000)`, and if the command is still running returns a background-job start result instead of inline command output.\n- `packages/coding-agent/src/task/index.ts`\n - when `async.enabled` is on, the chosen agent is not blocking, and `tasks.length > 0`, each task item is registered as a `type: \"task\"` job.\n\nLifecycle and exact state names:\n- Conceptual scheduling path: `pending` (only task-progress bookkeeping before work starts) → `running` → `completed` / `failed`; cancellation changes a running async job to `cancelled`.\n- Exact `AsyncJob.status` values in `packages/coding-agent/src/async/job-manager.ts`: `\"running\" | \"completed\" | \"failed\" | \"cancelled\"`.\n- Exact per-task progress values in `packages/coding-agent/src/task/types.ts`: `\"pending\" | \"running\" | \"completed\" | \"failed\" | \"aborted\"`.\n\n## Side Effects\n- Filesystem\n - None in `job.ts` itself.\n - Jobs being observed may already have written artifacts/results through their own tool runtimes.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Reads and mutates `session.asyncJobManager` state.\n - `watchJobs(...)` / `unwatchJobs(...)` toggle delivery suppression for the watched ids.\n - `acknowledgeDeliveries(...)` marks completed ids as suppressed and removes queued deliveries for them.\n - `cancel(...)` aborts running jobs through each job's `AbortController`.\n- User-visible prompts / interactive UI\n - Polling emits periodic `onUpdate` snapshots every 500 ms.\n - Automatic job completion follow-ups are generated by `packages/coding-agent/src/sdk.ts` only for unsuppressed deliveries.\n- Background work / cancellation\n - Waiting uses a timeout plus optional tool-call abort signal.\n - Cancelling a job does not synchronously await teardown; it flips state, aborts, and returns control to the manager/job promise.\n\n## Limits & Caps\n- Poll wait duration comes from `async.pollWaitDuration` in `packages/coding-agent/src/config/settings-schema.ts`:\n - allowed values: `5s`, `10s`, `30s`, `1m`, `5m`\n - default: `30s`\n- Progress update cadence while polling: `PROGRESS_INTERVAL_MS = 500` in `packages/coding-agent/src/tools/job.ts`.\n- Async job retention default: `DEFAULT_RETENTION_MS = 5 * 60 * 1000` in `packages/coding-agent/src/async/job-manager.ts`.\n- Manager fallback max-running limit: `DEFAULT_MAX_RUNNING_JOBS = 15` in `packages/coding-agent/src/async/job-manager.ts`.\n- Session wiring clamps `async.maxJobs` to `1..100` before constructing the manager in `packages/coding-agent/src/sdk.ts`; settings default is `100` in `packages/coding-agent/src/config/settings-schema.ts`.\n- Async completion delivery retry backoff in `packages/coding-agent/src/async/job-manager.ts`:\n - base `500` ms\n - max `30_000` ms\n - jitter `< 200` ms\n - exponent capped at 8 doublings\n\n## Errors\n- Tool-disabled path is returned as normal text, not thrown: `Async execution is disabled; no background jobs are available.`\n- Polling a nonexistent id is not an exception:\n - with `poll` only, missing ids are dropped; if none remain the tool returns `No matching jobs found for IDs: ...`.\n - with `cancel`, each missing id is reported as `not_found` in `details.cancelled` and text.\n- Cancelling a non-running job is not an exception; it reports `already_completed` even if the actual status is `completed`, `failed`, or `cancelled`.\n- Tool-call abort during polling stops waiting and returns a final snapshot through `#buildResult(...)`; it does not cancel watched jobs.\n- Failures inside the underlying async work are stored on the job (`status: \"failed\"`, `errorText`) and reported in normal tool output, not rethrown by `job`.\n- Calling `list: true` against an empty manager returns a normal empty-list result rather than throwing; missing ids passed to `poll` are silently filtered.\n\n## Notes\n- `job` waits for the first watched running job to settle, not for all watched jobs. If others remain `running`, they are reported under `## Still Running`; the caller must invoke `job` again to continue waiting.\n- Delivery suppression is the key difference between snapshot and automatic delivery:\n - snapshots (`job` calls with `poll` or `list: true`) read current manager state;\n - follow-up delivery comes from `AsyncJobManager.#enqueueDelivery(...)` and `sdk.ts` `onJobComplete`;\n - watched or acknowledged ids are suppressed via `isDeliverySuppressed(...)`.\n- `manager.cancel(id)` sets `status = \"cancelled\"` before the underlying promise settles. The job function may later populate `resultText` or `errorText`; `job-manager.ts` preserves that text but does not transition the status away from `cancelled`.\n- Retention eviction removes the job record, suppression flags, and watch flag together. After eviction, both `job` calls and `list: true` snapshots behave as if the id never existed.\n",
77
76
  "tools/lsp.md": "# lsp\n\n> Query language servers for diagnostics, navigation, symbols, renames, code actions, capabilities, and raw requests.\n\n## Source\n- Entry: `packages/coding-agent/src/lsp/index.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/lsp.md`\n- Key collaborators:\n - `packages/coding-agent/src/lsp/client.ts` — client process lifecycle and JSON-RPC\n - `packages/coding-agent/src/lsp/config.ts` — config loading, auto-detect, server selection\n - `packages/coding-agent/src/lsp/lspmux.ts` — optional `lspmux` command wrapping\n - `packages/coding-agent/src/lsp/edits.ts` — apply `WorkspaceEdit` and text edits\n - `packages/coding-agent/src/lsp/utils.ts` — URI conversion, symbol resolution, formatting, glob expansion\n - `packages/coding-agent/src/lsp/types.ts` — tool schema and protocol types\n - `packages/coding-agent/src/lsp/clients/index.ts` — custom linter client cache/factory\n - `packages/coding-agent/src/lsp/clients/lsp-linter-client.ts` — LSP-backed linter adapter\n - `packages/coding-agent/src/lsp/clients/biome-client.ts` — Biome CLI diagnostics/formatting adapter\n - `packages/coding-agent/src/lsp/clients/swiftlint-client.ts` — SwiftLint CLI diagnostics adapter\n - `packages/coding-agent/src/tools/index.ts` — tool registration and `lsp.enabled` gating\n - `packages/coding-agent/src/tools/tool-timeouts.ts` — timeout defaults and clamping\n - `packages/coding-agent/src/lsp/defaults.json` — built-in server definitions for auto-detect\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `action` | string enum | Yes | One of `diagnostics`, `definition`, `references`, `hover`, `symbols`, `rename`, `rename_file`, `code_actions`, `type_definition`, `implementation`, `status`, `reload`, `capabilities`, `request`. |\n| `file` | string | No | File path; for `diagnostics` also a glob; for workspace forms use `\"*\"`; for `rename_file` this is the source path. |\n| `line` | number | No | 1-indexed line number for position-based actions. Defaults to `1` on the single-file action path. |\n| `symbol` | string | No | Substring used to resolve the column on `line`. Supports `name#N` occurrence selectors; `N` is 1-indexed and defaults to `1`. |\n| `query` | string | No | Workspace symbol query, code-action selector/filter, or LSP method name for `action=request`. |\n| `new_name` | string | No | Required for `rename` and `rename_file`. |\n| `apply` | boolean | No | For `rename`/`rename_file`, apply unless explicitly `false`. For `code_actions`, list unless explicitly `true`. |\n| `timeout` | number | No | Seconds, clamped by `clampTimeout(\"lsp\", ...)` to `5..60`, default `20`. |\n| `payload` | string | No | JSON string for `action=request`; overrides auto-built params. |\n\n## Outputs\n- Single-shot `AgentToolResult`.\n- `content` is always one text block: `[{ type: \"text\", text: string }]`.\n- `details` is `LspToolDetails`: `action`, `success`, optional `serverName`, optional original `request`.\n- No streaming updates.\n- No artifact URIs or background jobs.\n- Many validation failures are returned as ordinary text results with `details.success: false`; aborts throw `ToolAbortError` instead.\n\n## Flow\n1. `packages/coding-agent/src/tools/index.ts` registers `lsp: LspTool.createIf`; session creation also gates it behind `session.enableLsp !== false` and `settings.get(\"lsp.enabled\")`.\n2. `LspTool.execute()` in `packages/coding-agent/src/lsp/index.ts` clamps `timeout` with `clampTimeout(\"lsp\", ...)`, builds an `AbortSignal.timeout(...)`, and combines it with the caller signal.\n3. `getConfig()` loads and caches `LspConfig` per cwd, applies idle-timeout config via `setIdleTimeout()`, and reuses the cached config on later calls.\n4. Config loading in `packages/coding-agent/src/lsp/config.ts` merges `defaults.json` with JSON/YAML overrides from project, project config dirs, user config dirs, plugin roots, and home; if there are no overrides it auto-detects servers from root markers plus executable discovery.\n5. Server routing uses `getServersForFile()` / `getServerForFile()` from `config.ts`: extension or basename match, then sort primary servers before linters. `index.ts` further filters custom linter clients out of navigation/refactor paths with `getLspServersForFile()` / `getLspServerForFile()`.\n6. `getOrCreateClient()` in `client.ts` creates one process per `command:cwd`, optionally wraps supported commands with `lspmux`, spawns the server, starts the background message reader, sends `initialize`, stores server capabilities, then sends `initialized`.\n7. The message reader in `client.ts` parses LSP frames, resolves pending requests, caches `publishDiagnostics`, tracks `$/progress` tokens for project-load completion, answers `workspace/configuration`, and applies `workspace/applyEdit` requests through `applyWorkspaceEdit()`.\n8. File-scoped actions call `ensureFileOpen()` before requests. Column resolution uses `resolveSymbolColumn()` from `utils.ts`: read the target file, pick first non-whitespace when `symbol` is omitted, otherwise find the exact or case-insensitive match on the target line and honor `#N` occurrence selectors.\n9. Actions dispatch in `LspTool.execute()` through dedicated branches: workspace-only branches (`status`, some `diagnostics`, workspace `symbols`, workspace `reload`, `capabilities`, `request`) run before the single-file switch; all other single-file actions share one client lookup and `switch(action)`.\n10. Requests go through `sendRequest()` in `client.ts`, which allocates an incrementing JSON-RPC id, installs abort and timeout handling, sends `$/cancelRequest` on abort, and rejects on timeout or process exit.\n11. Actions that return edits either preview with `formatWorkspaceEdit()` or apply with `applyWorkspaceEdit()` from `edits.ts`; `rename_file` also performs the filesystem rename and then sends `workspace/didRenameFiles`.\n12. Non-abort failures inside the single-file action block are converted to `LSP error: ...`; many precondition failures return explicit text without throwing.\n\n## Modes / Variants\n### Routing and workspace scope\n- `file: \"*\"` is only special for `diagnostics`, `symbols`, and `reload`.\n- `status` ignores `file`.\n- `capabilities` with omitted `file` or `\"*\"` inspects all non-custom LSP servers; with a concrete file it scopes to matching non-custom servers.\n- `request` with omitted `file` or `\"*\"` chooses the first available non-custom LSP server; with a concrete file it chooses that file's primary non-linter server.\n- `rename_file` sends `workspace/willRenameFiles` and `workspace/didRenameFiles` to every non-custom LSP server from `getLspServers(config)`, not just one file-scoped server.\n- Diagnostics are the only tool action that queries both normal LSP servers and custom linter clients (`BiomeClient`, `SwiftLintClient`, or `LspLinterClient`).\n\n### `diagnostics`\n**Inputs**\n- Required: `file`, unless using workspace mode with `file: \"*\"`.\n- Optional: `timeout`.\n\n**Execution**\n- `file: \"*\"`: `runWorkspaceDiagnostics()` detects project type from root markers and runs one subprocess command: Rust `cargo check --message-format=short`, TypeScript `npx tsc --noEmit`, Go `go build ./...`, Python `pyright`.\n- Concrete file or glob: `resolveDiagnosticTargets()` treats non-globs as one target, otherwise expands a `Bun.Glob` up to `MAX_GLOB_DIAGNOSTIC_TARGETS`.\n- Per file, every matching server runs: custom clients call `lint(file)`; real LSP servers optionally wait for project load, capture `diagnosticsVersion`, `refreshFile()`, then `waitForDiagnostics()` for fresh `publishDiagnostics`.\n- Results are deduplicated by range+message and severity-sorted.\n\n**Output text**\n- Single target with no issues: `OK`.\n- Single target with issues: `<summary>:\\n<grouped diagnostics>`.\n- Batch/glob target: one section per file, plus an initial truncation warning when the glob exceeds the file cap.\n- Workspace mode: `Workspace diagnostics (<detected description>):\\n<command output>`.\n\n### `definition`\n**Inputs**\n- Required: `file`.\n- Optional: `line`, `symbol`, `timeout`.\n\n**Execution**\n- Sends `textDocument/definition` with `{ textDocument, position }`.\n- Accepts `Location`, `Location[]`, `LocationLink`, or `LocationLink[]`; `normalizeLocationResult()` converts `LocationLink` to `targetSelectionRange ?? targetRange`.\n- Waits for project load before the request.\n\n**Output text**\n- `No definition found` or `Found N definition(s):` followed by `file:line:col` and one context line above/below each location.\n\n### `type_definition`\nSame as `definition`, but sends `textDocument/typeDefinition` and reports `type definition(s)`.\n\n### `implementation`\nSame as `definition`, but sends `textDocument/implementation` and reports `implementation(s)`.\n\n### `references`\n**Inputs**\n- Required: `file`.\n- Optional: `line`, `symbol`, `timeout`.\n\n**Execution**\n- Sends `textDocument/references` with `includeDeclaration: true`.\n- For project-aware servers, retries up to `REFERENCES_RETRY_COUNT` times when the only hit is the queried declaration; between retries it waits for project load and sleeps `REFERENCES_RETRY_DELAY_MS`.\n- First `REFERENCE_CONTEXT_LIMIT` references include surrounding context; the rest are location-only.\n\n**Output text**\n- `No references found` or `Found N reference(s):` with contextual entries first, then `... M additional reference(s) shown without context` when truncated.\n\n### `hover`\n**Inputs**\n- Required: `file`.\n- Optional: `line`, `symbol`, `timeout`.\n\n**Execution**\n- Sends `textDocument/hover`.\n- `extractHoverText()` flattens strings, markup content, marked-string objects, or arrays into plain text.\n\n**Output text**\n- `No hover information` or the extracted hover text.\n\n### `symbols`\n**Inputs**\n- Workspace mode: `file: \"*\"` or omitted file on the early workspace branch, plus required `query`.\n- Document mode: required `file`.\n- Optional: `timeout`.\n\n**Execution**\n- Workspace mode sends `workspace/symbol` to every non-custom LSP server, post-filters matches with `filterWorkspaceSymbols()`, deduplicates with `dedupeWorkspaceSymbols()`, then truncates to `WORKSPACE_SYMBOL_LIMIT`.\n- Document mode sends `textDocument/documentSymbol` to the primary server. If the first item has `selectionRange`, it formats hierarchical `DocumentSymbol`s; otherwise it formats flat `SymbolInformation`s.\n\n**Output text**\n- Workspace mode: `Found N symbol(s) matching \"query\":` plus formatted `name @ file:line:col`, with an omission line when over the limit.\n- Document mode: `Symbols in <file>:` plus hierarchical or flat symbol lines.\n\n### `rename`\n**Inputs**\n- Required: `file`, `new_name`.\n- Optional: `line`, `symbol`, `apply`, `timeout`.\n\n**Execution**\n- Waits for project load, sends `textDocument/rename`, receives a `WorkspaceEdit`.\n- `apply !== false` applies edits immediately with `applyWorkspaceEdit()`.\n- `apply === false` renders a preview with `formatWorkspaceEdit()`.\n\n**Output text**\n- `Rename returned no edits`, `Applied rename:` plus applied change lines, or `Rename preview:` plus summarized edits.\n\n### `rename_file`\n**Inputs**\n- Required: `file` source path, `new_name` destination path.\n- Optional: `apply`, `timeout`.\n\n**Execution**\n- Resolves absolute source and destination, rejects identical paths, missing source, existing destination, empty rename set, or directories with more than `MAX_RENAME_PAIRS` files.\n- `enumerateRenamePairs()` returns one `{oldUri,newUri}` pair for a file or walks every regular file in a directory tree.\n- Sends `workspace/willRenameFiles` with `{ files: pairs }` to every non-custom LSP server; collects returned `WorkspaceEdit`s and server notes.\n- Preview mode (`apply === false`) only formats those edits.\n- Apply mode runs each returned `WorkspaceEdit`, renames the source path on disk, sends `textDocument/didClose` for every renamed open file, deletes those `openFiles` entries, then sends `workspace/didRenameFiles`.\n\n**Output text**\n- Preview: `Rename preview: <file-count label> → <dest>` plus per-server edit summaries and optional server notes.\n- Apply: `Renamed <file-count label> → <dest>` plus applied edit summaries, filesystem rename line, and optional server notes.\n\n### `code_actions`\n**Inputs**\n- Required: `file`.\n- Optional: `line`, `symbol`, `query`, `apply`, `timeout`.\n\n**Execution**\n- Reads cached diagnostics for the open URI from `client.diagnostics` and sends `textDocument/codeAction` for a zero-width range at the resolved position.\n- When `apply !== true`, `query` is passed as `context.only: [query]`; this is a server-side kind filter.\n- When `apply === true`, `query` becomes a required client-side selector: either a zero-based numeric index or a case-insensitive substring of the action title.\n- Applying a `CodeAction` uses `applyCodeAction()`: optionally `codeAction/resolve`, then `applyWorkspaceEdit(edit)`, then optional `workspace/executeCommand`.\n- Applying a bare `Command` only runs `workspace/executeCommand`.\n\n**Output text**\n- List mode: `N code action(s):` plus `index: [kind] title` lines.\n- Apply mode success: `Applied \"title\":` plus `Workspace edit:` and/or `Executed command(s):` sections.\n- Apply mode miss: `No code action matches \"query\". Available actions:`.\n- Apply mode with no edit/command: `Action \"title\" has no workspace edit or command to apply`.\n\n### `status`\n**Inputs**\n- None.\n\n**Execution**\n- Reads configured servers from cached `LspConfig`, not `getActiveClients()`.\n- Calls `detectLspmux()` and appends status text when `lspmux` is installed.\n\n**Output text**\n- `Active language servers: ...` or `No language servers configured for this project`, optionally followed by `lspmux: active (multiplexing enabled)` or `lspmux: installed but server not running`.\n\n### `reload`\n**Inputs**\n- Workspace mode: `file: \"*\"` or omitted `file`.\n- Single-file mode: required `file`.\n- Optional: `timeout`.\n\n**Execution**\n- Workspace mode reloads every non-custom LSP server.\n- Single-file mode reloads the primary server for that file.\n- `reloadServer()` tries `rust-analyzer/reloadWorkspace`, then `workspace/didChangeConfiguration` with `{ settings: {} }`; if neither works it kills the process so the next request cold-starts a new client.\n\n**Output text**\n- One line per server: `Reloaded <server>`, `Restarted <server>`, or `Failed to reload <server>: ...`.\n\n### `capabilities`\n**Inputs**\n- Optional: `file`, `timeout`.\n\n**Execution**\n- With a concrete `file`, inspects matching non-custom servers for that file.\n- With omitted `file` or `\"*\"`, inspects every non-custom configured server.\n- Starts servers as needed and dumps `client.serverCapabilities ?? {}` as pretty JSON.\n\n**Output text**\n- Per server: `<server>:` followed by indented `capabilities: { ... }`, or `<server>: failed to start (...)`.\n\n### `request`\n**Inputs**\n- Required: `query` method name.\n- Optional: `file`, `line`, `symbol`, `payload`, `timeout`.\n\n**Execution**\n- Chooses one non-custom server: file-scoped primary server, otherwise the first configured non-custom server.\n- Param building precedence:\n 1. If `payload` is present, parse JSON and use it verbatim.\n 2. Else if `file` is concrete and `line` is present, build `{ textDocument: { uri }, position: { line: line - 1, character } }` using `resolveSymbolColumn()`.\n 3. Else if `file` is concrete, build `{ textDocument: { uri } }`.\n 4. Else use `{}`.\n- Opens the file first when `file` is concrete.\n\n**Output text**\n- Success: `<server> ← <method>:\\n<formatted result>`, where non-string results are `JSON.stringify(..., null, 2)` and nullish values become `null`.\n- Failure: `LSP error from <server> on <method>: ...`.\n\n## Side Effects\n- Filesystem\n - Reads config files, target files, and root markers.\n - `rename` and `code_actions` may edit/create/delete/rename files via `applyWorkspaceEdit()`.\n - `rename_file` always renames the source path on disk in apply mode.\n - Server-initiated `workspace/applyEdit` requests also mutate files through `applyWorkspaceEdit()`.\n- Network\n - None directly; communication is local stdio JSON-RPC to subprocesses.\n- Subprocesses / native bindings\n - Spawns language servers with `ptree.spawn()`.\n - Workspace diagnostics spawns `cargo`, `npx`, `go`, or `pyright`.\n - `BiomeClient` and `SwiftLintClient` spawn CLI tools.\n - Optional `lspmux` detection spawns `lspmux status`; supported servers may be wrapped through `lspmux client`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Caches config per cwd in `configCache`.\n - Caches LSP clients per `command:cwd`, with `pendingRequests`, `diagnostics`, `openFiles`, `serverCapabilities`, and project-load state.\n - Caches custom linter clients by `serverName:cwd`.\n - Updates client `lastActivity`; optional idle-timeout cleanup is driven by `setIdleTimeout()`.\n- Background work / cancellation\n - Every request has an abortable timeout signal.\n - Aborting an in-flight LSP request sends `$/cancelRequest`.\n - Background message readers persist for each live client until process exit/shutdown.\n\n## Limits & Caps\n- Tool timeout clamp: default `20`, min `5`, max `60` seconds — `TOOL_TIMEOUTS.lsp` in `packages/coding-agent/src/tools/tool-timeouts.ts`.\n- LSP request default timeout inside `sendRequest()`: `30_000ms` — `DEFAULT_REQUEST_TIMEOUT_MS` in `packages/coding-agent/src/lsp/client.ts`.\n- Warmup initialize timeout default: `5_000ms` — `WARMUP_TIMEOUT_MS` in `packages/coding-agent/src/lsp/client.ts`.\n- Project-load wait fallback: `15_000ms` — `PROJECT_LOAD_TIMEOUT_MS` in `packages/coding-agent/src/lsp/client.ts`.\n- Idle-client sweep interval when enabled: `60_000ms` — `IDLE_CHECK_INTERVAL_MS` in `packages/coding-agent/src/lsp/client.ts`.\n- Diagnostic message output cap: first `50` messages — `DIAGNOSTIC_MESSAGE_LIMIT` in `packages/coding-agent/src/lsp/index.ts`.\n- Single-file diagnostics wait: `3_000ms` — `SINGLE_DIAGNOSTICS_WAIT_TIMEOUT_MS`.\n- Batch/glob diagnostics wait per file: `400ms` — `BATCH_DIAGNOSTICS_WAIT_TIMEOUT_MS`.\n- Glob diagnostic target cap: first `20` matches — `MAX_GLOB_DIAGNOSTIC_TARGETS`.\n- Workspace symbol cap: first `200` entries — `WORKSPACE_SYMBOL_LIMIT`.\n- Reference context cap: first `50` references include source context — `REFERENCE_CONTEXT_LIMIT`.\n- References retry count: `2` retries, `250ms` backoff — `REFERENCES_RETRY_COUNT`, `REFERENCES_RETRY_DELAY_MS`.\n- Directory rename cap: `1_000` file pairs — `MAX_RENAME_PAIRS`.\n- `detectLspmux()` state cache TTL: `5 * 60 * 1000ms`; liveness check timeout: `1_000ms` — `STATE_CACHE_TTL_MS`, `LIVENESS_TIMEOUT_MS` in `packages/coding-agent/src/lsp/lspmux.ts`.\n- Workspace diagnostics output cap: first `50` lines from the subprocess.\n\n## Errors\n- Missing or invalid inputs are usually returned as text with `details.success: false`, not thrown:\n - missing `file`/`query`/`new_name`\n - invalid JSON in `payload`\n - no matching server\n - invalid `rename_file` source/destination conditions\n- `resolveSymbolColumn()` throws explicit errors for missing files, missing symbols, and out-of-bounds `#N` selectors; these surface as `LSP error: ...` or request-specific error text.\n- `sendRequest()` rejects on timeout with `LSP request <method> timed out after <ms>ms`.\n- Client process exit rejects all pending requests with an exit-code/stderr error assembled in `getOrCreateClient()`.\n- Single-file action failures inside the main `try` become `LSP error: <message>`.\n- `request` has its own error envelope: `LSP error from <server> on <method>: <message>`.\n- Some server failures are intentionally softened:\n - diagnostics continue when one server fails\n - `rename_file` suppresses `workspace/willRenameFiles` “method not found” errors and records other server errors as notes\n - `code_actions` ignores `codeAction/resolve` failures and applies unresolved actions when possible\n- Aborts are not converted to text: `ToolAbortError` is rethrown.\n\n## Notes\n- `status` reports configured/available servers from `LspConfig`, not currently active client processes from `getActiveClients()`.\n- `getLspServerForFile()` excludes `createClient` adapters and linter-only servers; navigation/refactor actions never target Biome/SwiftLint custom clients.\n- `getServersForFile()` matches both file extensions and exact basenames from `fileTypes`; config can target names like `Dockerfile` if present.\n- `symbol` matching is exact first, then case-insensitive, and falls back to the Nth occurrence on the specified line only; it never scans other lines.\n- `code_actions` uses `query` in two different ways: server-side `context.only` filter in list mode, client-side title/index selector in apply mode.\n- `rename` and `rename_file` default to apply. Preview requires `apply: false`.\n- `request` with `file: \"*\"` is treated the same as omitted `file`: it does not build workspace-specific params.\n- `reload` does not recreate a client immediately after killing it; the next request triggers reinitialization.\n- `workspace/applyEdit` can apply edits initiated by the server outside the direct tool action result path.\n- `detectLspmux()` can be disabled with `PI_DISABLE_LSPMUX=1`; only `rust-analyzer` is in `DEFAULT_SUPPORTED_SERVERS`.\n- `configCache` is per-process and never auto-invalidated; config changes require a fresh process to be observed by `getConfig()` callers.",
78
- "tools/read.md": "# read\n\n> Read files, directories, archives, SQLite databases, internal resources, images, documents, and URLs through one `path` string.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/read.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/read.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/path-utils.ts` — split `path` from trailing selectors; normalize local paths.\n - `packages/coding-agent/src/tools/archive-reader.ts` — detect `archive.ext:inner/path`, index archives, list/read entries.\n - `packages/coding-agent/src/tools/sqlite-reader.ts` — detect SQLite targets, parse selectors, render tables.\n - `packages/coding-agent/src/tools/fetch.ts` — URL parsing, fetch/render pipeline, URL cache/artifacts.\n - `packages/coding-agent/src/internal-urls/router.ts` — resolve `agent://`, `artifact://`, `jobs://`, `local://`, `mcp://`, `memory://`, `pi://`, `rule://`, `skill://`.\n - `packages/coding-agent/src/edit/notebook.ts` — convert `.ipynb` to editable `# %% [...] cell:N` text.\n - `packages/coding-agent/src/utils/file-display-mode.ts` — decide hashline vs line-number vs raw display.\n - `packages/coding-agent/src/workspace-tree.ts` — render directory trees.\n - `packages/coding-agent/src/edit/file-read-cache.ts` — cache read lines for later hashline edit recovery.\n - `packages/coding-agent/src/tools/index.ts` — registers `read: s => new ReadTool(s)`.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `path` | `string` | Yes | Filesystem path, internal URL, or web URL. May end with a trailing selector such as `:50-100` or `:raw`. |\n\n### Selector grammar\n\nFor normal file-like reads, `splitPathAndSel()` in `packages/coding-agent/src/tools/path-utils.ts` recognizes the final suffix only when it matches one of these forms:\n\n| Suffix | Meaning |\n| --- | --- |\n| `:raw` | Raw/verbatim mode. Disables structural summaries and line prefixes. |\n| `:N` / `:LN` | Start at 1-indexed line `N`, open-ended. |\n| `:A-B` / `:LA-LB` | Inclusive 1-indexed line range. |\n| `:A+C` / `:LA+LC` | `C` lines starting at `A`; tool converts this to end line `A + C - 1`. |\n| `:range:raw` or `:raw:range` | Same line selection, but raw output. |\n\nValidation in `parseLineRangeChunk()`:\n- line numbers are 1-indexed; `:0` throws.\n- `+` counts must be `>= 1`.\n- `-` end must be `>= start`.\n\nSelector parsing intentionally falls through for unrecognized trailing `:...`; archive and SQLite paths consume their own colon syntax.\n\nURL selectors are parsed separately in `packages/coding-agent/src/tools/fetch.ts` and support only `:raw`, `:N`, `:A-B`, and `:A+C` — no optional `L` prefix there.\n\n## Outputs\n- Single-shot `AgentToolResult` built through `toolResult()` in `packages/coding-agent/src/tools/tool-result.ts`.\n- `content` is usually one text block. Image reads may return `[text, image]`.\n- `details` is path-dependent. `ReadToolDetails` may include:\n - `kind: \"file\" | \"url\"` (URL path uses `kind: \"url\"`; file reads usually omit `kind`)\n - `isDirectory`\n - `resolvedPath`\n - `suffixResolution`\n - URL fields: `url`, `finalUrl`, `contentType`, `method`, `notes`\n - `truncation`\n - `displayContent` (unprefixed text + starting line for TUI rendering)\n - `summary` (`lines`, `elidedSpans`) for structural summaries\n - `meta` from `packages/coding-agent/src/tools/output-meta.ts`\n- `details.meta.source` is set to the backing path, URL, or internal URL.\n- `details.meta.truncation` carries shown range, total lines/bytes, next offset, and optional `artifactId` for cached URL output.\n- Directory/archive listings and SQLite table lists also set `details.meta.limits` when list limits trigger.\n\n## Flow\n1. `ReadTool.execute()` accepts `{ path }`. `file://...` inputs are expanded first with `expandPath()`.\n2. It tries URL handling first via `parseReadUrlTarget()` from `packages/coding-agent/src/tools/fetch.ts`.\n - Plain URL reads call `executeReadUrl()`.\n - URL reads with line selectors load or refresh the URL cache with `loadReadUrlCacheEntry()` and paginate the cached text locally with `#buildInMemoryTextResult()`.\n3. If not a web URL, it checks `session.internalRouter.canHandle(...)`.\n - Internal URLs are resolved with `internalRouter.resolve()`.\n - `agent://` query extraction (`/path` or `?q=`) bypasses pagination and returns the extracted content directly.\n - Other internal resources are paginated in-memory by `#buildInMemoryTextResult()`.\n4. It tries archive resolution next with `#resolveArchiveReadPath()`.\n - `parseArchivePathCandidates()` scans for `.tar`, `.tar.gz`, `.tgz`, or `.zip` anywhere before `:sub/path`.\n - On success, `#readArchive()` either lists a directory or decodes an entry as UTF-8 text.\n5. It tries SQLite resolution with `#resolveSqliteReadPath()`.\n - `parseSqlitePathCandidates()` scans for `.sqlite`, `.sqlite3`, `.db`, `.db3` before any `:table`, `:key`, or `?query` suffix.\n - `#readSqlite()` dispatches on `parseSqliteSelector()`.\n6. Otherwise it treats the input as a local filesystem path.\n - `resolveReadPath()` expands `~`, resolves relative to session cwd, treats bare `/` as session cwd, and retries macOS screenshot/NFD/curly-quote variants.\n - If the path does not exist, `findUniqueSuffixMatch()` does a workspace glob-based unique suffix lookup (skipped for remote mounts).\n7. Directories go through `#readDirectory()`.\n8. Non-directories branch by content type:\n - image metadata / inline image\n - editable notebook text\n - markit-converted document\n - structural summary for parseable code/prose\n - streamed text/line-range read\n9. Local text reads are streamed by `streamLinesFromFile()` rather than loading the whole file. The tool adds up to 3 lines of context before/after explicit bounded ranges.\n10. Non-empty contiguous local reads are recorded into `getFileReadCache(session)` for later hashline edit recovery.\n11. If suffix resolution happened, the first text block is prefixed with `[Path '...' not found; resolved to '...' via suffix match]`.\n\n## Modes / Variants\n\n### Local text files\n- No selector: if summarization is enabled and the file is small enough, `#trySummarize()` calls `summarizeCode()`.\n - Guards: file size `<= 2 MiB` (`MAX_SUMMARY_BYTES`), line count `<= 20_000` (`MAX_SUMMARY_LINES`).\n - Summary output keeps selected declarations and replaces elided spans with `...`.\n - When an elided block sits between matching brace lines, `#renderSummary()` may merge them into one anchored line rather than emitting separate opener/closer lines.\n- Explicit selector or summarization miss: streamed text read.\n - Default open-ended limit is `min(session setting read.defaultLimit, DEFAULT_MAX_LINES)`.\n - Explicit ranges expand by `RANGE_CONTEXT_LINES = 3` on the constrained sides only.\n - Non-raw output uses `resolveFileDisplayMode()`:\n - hashline anchors when edit mode is hashline, read is not raw, source is mutable, edit tool exists, and `readHashLines !== false`\n - otherwise optional line numbers when `readLineNumbers === true`\n - raw mode suppresses both\n- Prefix format in hashline mode is `lineNumber + 2-char line hash + \"|\"`, e.g. `41th|def alpha():`, from `formatHashLine()` in `packages/coding-agent/src/hashline/hash.ts`.\n- Those anchors are what the `edit`/hashline path consumes later; immutable sources and `:raw` intentionally suppress them.\n\n### Directory listings\n- `#readDirectory()` calls `buildDirectoryTree()` with:\n - `maxDepth = 2`\n - `perDirLimit = 12`\n - `rootLimit = null`\n - `lineCap = limit` when a line selector was present, else unlimited at this layer\n- `buildDirectoryTree()` sorts siblings by recency, shows file sizes and relative ages, and may mark `limits.resultLimit` when the tree truncates.\n- Empty directories render as `(empty directory)`.\n\n### Archives\n- Supported archive containers: `.tar`, `.tar.gz`, `.tgz`, `.zip`.\n- Syntax: `archive.ext`, `archive.ext:path/inside`, `archive.ext:path/inside:50-60`.\n- `openArchive()` reads the whole archive into memory, then:\n - tar/tgz uses `new Bun.Archive(bytes)`\n - zip uses `fflate.unzipSync()`\n- Archive paths normalize `/`, drop `.` segments, and reject `..`.\n- Directory reads list immediate children; files show `name` plus ` (size)` when size > 0.\n- Directory listing default limit is `500` entries in `#readArchiveDirectory()`.\n- File entries are UTF-8 decoded. Non-UTF-8 entries return `[Cannot read binary archive entry '...' (...)]` instead of bytes.\n- Text archive entries reuse the normal in-memory pagination/anchoring path.\n\n### SQLite databases\n- Database detection requires both a matching extension and a valid SQLite file header (`isSqliteFile()`).\n- Selector forms from `parseSqliteSelector()`:\n\n#### `db.sqlite`\n- `kind: \"list\"`\n- Lists non-`sqlite_%` tables with row counts.\n- `#readSqlite()` caps the rendered list to `500` tables via `applyListLimit()`.\n\n#### `db.sqlite:table`\n- `kind: \"schema\"`\n- Returns `sqlite_master.sql` plus sample rows.\n- Sample size is `DEFAULT_SCHEMA_SAMPLE_LIMIT = 5`.\n\n#### `db.sqlite:table:key`\n- `kind: \"row\"`\n- Resolves by primary key when the table has exactly one PK column; otherwise falls back to `rowid` lookup.\n- No query parameters allowed on row lookups.\n\n#### `db.sqlite:table?limit=...&offset=...&order=...&where=...`\n- `kind: \"query\"`\n- Defaults: `limit = 20`, `offset = 0`.\n- `limit` is capped at `500`.\n- `order` accepts `column` or `column:asc|desc` and must name an existing column.\n- `where` is accepted only after `validateWhereClause()` rejects comments, semicolons, and control keywords like `LIMIT`, `OFFSET`, `UNION`, `ATTACH`, `PRAGMA`.\n- Unknown query parameters throw.\n\n#### `db.sqlite?q=SELECT ...`\n- `kind: \"raw\"`\n- Cannot be combined with table selectors or any other query param.\n- Empty `q` throws.\n- `executeReadQuery()` runs `db.prepare(sql).all()` and rejects bound parameters; it does not verify that the SQL starts with `SELECT`.\n\n- Rendering caps in `packages/coding-agent/src/tools/sqlite-reader.ts`:\n - ASCII table width `120` (`MAX_RENDER_WIDTH`)\n - per-column width `40` (`MAX_COLUMN_WIDTH`)\n- `#readSqlite()` opens Bun SQLite in `{ readonly: true, strict: true }` and sets `PRAGMA busy_timeout = 3000`.\n\n### Documents\n- `CONVERTIBLE_EXTENSIONS` in `packages/coding-agent/src/tools/read.ts` covers `.pdf`, `.doc`, `.docx`, `.ppt`, `.pptx`, `.xls`, `.xlsx`, `.rtf`, `.epub`.\n- `convertFileWithMarkit()` converts the file to text/markdown.\n- Converted output is then head-truncated with normal shared limits; there is no line selector support inside the source document before conversion.\n- Conversion failures return a text block like `[Cannot read .pdf file: ...]`.\n\n### Jupyter notebooks\n- `.ipynb` goes through `readEditableNotebookText()` unless `:raw` was requested.\n- Output is editable plain text with markers like:\n\n```text\n# %% [code] cell:0\n...\n```\n\n- Raw mode bypasses that conversion and falls back to file-text reading.\n\n### Images\n- Image detection is metadata-based (`readImageMetadata()`).\n- Max accepted image size is `20 MiB` (`MAX_IMAGE_INPUT_BYTES`, re-exported as `MAX_IMAGE_SIZE`). Larger files throw.\n- If `inspect_image.enabled` is true, `read` returns metadata only (MIME, bytes, dimensions, channels, alpha) plus a suggestion to call `inspect_image`.\n- Otherwise it calls `loadImageInput()` and returns:\n - a text note from the image loader\n - an inline image block\n- Unsupported/undecodable image formats throw a `ToolError`.\n\n### Internal URLs\n- `read` does not resolve these itself; it delegates to `session.internalRouter.resolve()`.\n- Registered protocols are outside this file, but the router in `packages/coding-agent/src/internal-urls/router.ts` is built for `agent://`, `artifact://`, `issue://`, `jobs://`, `local://`, `mcp://`, `memory://`, `pi://`, `pr://`, `rule://`, and `skill://`.\n- `#handleInternalUrl()` behavior:\n - parses the URL with `parseInternalUrl()` so colons inside the host segment are legal\n - for `agent://`, treats non-root path extraction or `?q=` extraction as a special no-pagination mode\n - otherwise paginates the resolved text in memory\n - passes `immutable` through to `resolveFileDisplayMode()` so anchors are suppressed for immutable resources such as artifacts, skills, memory, and agent outputs\n - sets `ignoreResultLimits: true` for `skill://` so the full skill text is paginated only by explicit selectors, not by the normal default line limit\n- `issue://<N>` / `pr://<N>` (and the long form `issue://<owner>/<repo>/<N>` / `pr://<owner>/<repo>/<N>`) route through the same SQLite cache the `github` tool writes to; `?comments=0` selects the no-comments rendering. Bare `issue://` / `pr://` (and `issue://<owner>/<repo>` / `pr://<owner>/<repo>`) issue a live `gh issue list` / `gh pr list` for browsing, accepting `?state=`, `?limit=`, `?author=`, `?label=`. PR diffs share the same cache through `pr://<N>/diff` (numbered file listing with per-file hints), `pr://<N>/diff/<i>` (single file slice; 1-indexed), and `pr://<N>/diff/all` (verbatim unified diff); the listing and per-file slices are reconstructed from the cached unified-diff payload, so all three variants share one `gh pr diff` invocation per PR. Diff content is served as `text/plain`. Soft TTL `github.cache.softTtlSec` (default 5 minutes), hard TTL `github.cache.hardTtlSec` (default 7 days). Stale-hit returns the cached row and schedules a background refresh.\n\n### Web URLs\n- `parseReadUrlTarget()` accepts `http://`, `https://`, or `www.` targets.\n- Plain URL reads call `executeReadUrl()` in `packages/coding-agent/src/tools/fetch.ts`.\n- `:raw` means raw HTML/body fallback path; plain URL reads prefer rendered/reader-friendly output.\n- `:N`, `:A-B`, `:A+C` do not refetch. They page over cached output from the prior or current URL render.\n- URL render pipeline in `renderUrl()`:\n 1. normalize scheme (`https://` added for bare `www.`)\n 2. try special handlers for known sites unless raw\n 3. fetch with `loadPage()`\n 4. if content is image/PDF/DOCX/etc., try binary fetch + markit/image handling\n 5. handle JSON directly, feeds via feed parser, plain text directly\n 6. for HTML and non-raw mode, try markdown alternates, `URL.md`, content negotiation, feed alternates, HTML-to-text renderers, extracted linked documents, then `llms.txt`\n 7. fall back to raw body text/html\n- URL output is wrapped with a small header:\n\n```text\nURL: ...\nContent-Type: ...\nMethod: ...\nNotes: ...\n\n---\n```\n\n- `method` records the winning path (`json`, `feed`, `text`, `alternate-markdown`, `md-suffix`, `content-negotiation`, `image`, `markit`, `llms.txt`, `raw`, `raw-html`, etc.).\n- URL reads may return an inline image block when the fetched resource is a supported image and survives resizing.\n\n## Side Effects\n- Filesystem\n - Opens and streams local files.\n - Reads entire archives into memory before indexing.\n - May read URL-cache artifact files from the session artifacts directory.\n - Writes URL output artifacts when URL output is truncated or when line-range pagination needs a persisted cache body.\n- Network\n - URL mode performs HTTP fetches, binary refetches, and alternate-endpoint probes.\n- Subprocesses / native bindings\n - Uses Bun SQLite for `.db`/`.sqlite*`.\n - Uses `Bun.Archive` for tar/tgz and `fflate` for zip.\n - URL HTML rendering can delegate into site handlers and HTML-to-text backends from `packages/coding-agent/src/tools/fetch.ts`.\n- Session state\n - Records local text lines into `session.fileReadCache` for later stale-anchor recovery.\n - Uses `session.internalRouter` for internal URLs.\n - Uses `session.allocateOutputArtifact()` for cached/truncated URL output.\n- Background work / cancellation\n - Most branches honor `AbortSignal`; the tool itself is marked `nonAbortable = true`, but helper paths still call `throwIfAborted(signal)`.\n\n## Limits & Caps\n- Shared text truncation defaults from `packages/coding-agent/src/session/streaming-output.ts`:\n - `DEFAULT_MAX_LINES = 3000`\n - `DEFAULT_MAX_BYTES = 50 * 1024`\n- Local text open-ended default line limit: `read.defaultLimit`, clamped to `[1, DEFAULT_MAX_LINES]`.\n- Explicit line ranges add `3` context lines on each constrained side (`RANGE_CONTEXT_LINES`).\n- File streaming chunk size: `8 * 1024` bytes (`READ_CHUNK_SIZE`).\n- Local streamed byte budget for line reads: `max(DEFAULT_MAX_BYTES, maxLinesToCollect * 512)`.\n- Structural summaries only run when file size `<= 2 MiB` and line count `<= 20_000`.\n- Image input max: `20 MiB`.\n- Directory tree caps for local directories: depth `2`, per-directory children `12`.\n- Archive directory default list cap: `500` entries.\n- SQLite:\n - default row query limit `20`\n - schema sample limit `5`\n - max query limit `500`\n - table list cap `500`\n - render width `120`, column width `40`\n - busy timeout `3000` ms\n- URL read result shown to the model is truncated to `300` lines and `50 KiB` in `executeReadUrl()`; full cached output can be attached as an artifact.\n- Inline fetched URL images:\n - source bytes cap `20 MiB`\n - post-resize inline output cap `300 KiB`\n- Unique suffix auto-resolution glob timeout: `5000` ms.\n- File-read cache holds `30` paths per session.\n\n## Errors\n- Validation and operational failures surface as `ToolError`.\n- Selector errors include:\n - `Line selector 0 is invalid; lines are 1-indexed. Use :1.`\n - invalid `A+B` / `A-B` shapes\n - `Cannot combine query extraction with offset/limit` for `agent://.../path:50`\n- Missing local/archive/sqlite paths first attempt unique suffix resolution; if no unique match exists they error.\n- Out-of-bounds line reads do not throw. They return explanatory text with a suggestion such as `Use :1 ...` or `Use :<last line> ...`.\n- Binary archive entries do not throw; they return a text notice.\n- Document conversion failure returns a text notice.\n- Image oversize/unsupported/invalid cases throw.\n- SQLite parser rejects unsupported parameter combinations early; DB/runtime errors are caught and rethrown as `ToolError(message)`.\n- URL fetch failure does not throw when HTTP fetch succeeds but `response.ok === false`; it returns a failed URL read with `method: \"failed\"` and explanatory notes.\n\n## Notes\n- `readSchema` examples include `https://example.com:L1-L40`, but URL selector parsing in `packages/coding-agent/src/tools/fetch.ts` does not accept `L` prefixes.\n- Hashline anchors are suppressed for raw reads and immutable internal resources because there is no editable backing target for later `edit` consumption.\n- `splitPathAndSel()` intentionally treats unknown trailing `:...` as part of the path so `archive.zip:inner/file` and `db.sqlite:table:key` still work.\n- `resolveReadPath()` contains macOS-specific filename fallbacks for screenshot timestamps, NFD Unicode normalization, and curly apostrophes.\n- A bare `/` resolves to the session cwd, not the filesystem root.\n- URL cache keys are session-scoped and normalized by requested URL + raw/rendered mode; both requested URL and final redirected URL are cached.\n- URL line-range reads request `ensureArtifact: true, preferCached: true` so a later paginated read can reopen the same rendered body from artifact storage.\n- Raw SQLite `q=` execution is not keyword-restricted beyond “no bound parameters”; the read tool relies on the surrounding contract to keep it read-only.\n- The file-read cache is not a read acceleration cache. It exists to recover hashline edits when the file changed after the read.",
77
+ "tools/read.md": "# read\n\n> Read files, directories, archives, SQLite databases, internal resources, images, documents, and URLs through one `path` string.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/read.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/read.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/path-utils.ts` — split `path` from trailing selectors; normalize local paths.\n - `packages/coding-agent/src/tools/archive-reader.ts` — detect `archive.ext:inner/path`, index archives, list/read entries.\n - `packages/coding-agent/src/tools/sqlite-reader.ts` — detect SQLite targets, parse selectors, render tables.\n - `packages/coding-agent/src/tools/fetch.ts` — URL parsing, fetch/render pipeline, URL cache/artifacts.\n - `packages/coding-agent/src/internal-urls/router.ts` — resolve `agent://`, `artifact://`, `local://`, `mcp://`, `memory://`, `pi://`, `rule://`, `skill://`.\n - `packages/coding-agent/src/edit/notebook.ts` — convert `.ipynb` to editable `# %% [...] cell:N` text.\n - `packages/coding-agent/src/utils/file-display-mode.ts` — decide hashline vs line-number vs raw display.\n - `packages/coding-agent/src/workspace-tree.ts` — render directory trees.\n - `packages/coding-agent/src/edit/file-read-cache.ts` — cache read lines for later hashline edit recovery.\n - `packages/coding-agent/src/tools/index.ts` — registers `read: s => new ReadTool(s)`.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `path` | `string` | Yes | Filesystem path, internal URL, or web URL. May end with a trailing selector such as `:50-100` or `:raw`. |\n\n### Selector grammar\n\nFor normal file-like reads, `splitPathAndSel()` in `packages/coding-agent/src/tools/path-utils.ts` recognizes the final suffix only when it matches one of these forms:\n\n| Suffix | Meaning |\n| --- | --- |\n| `:raw` | Raw/verbatim mode. Disables structural summaries and line prefixes. |\n| `:N` / `:LN` | Start at 1-indexed line `N`, open-ended. |\n| `:A-B` / `:LA-LB` | Inclusive 1-indexed line range. |\n| `:A+C` / `:LA+LC` | `C` lines starting at `A`; tool converts this to end line `A + C - 1`. |\n| `:range:raw` or `:raw:range` | Same line selection, but raw output. |\n\nValidation in `parseLineRangeChunk()`:\n- line numbers are 1-indexed; `:0` throws.\n- `+` counts must be `>= 1`.\n- `-` end must be `>= start`.\n\nSelector parsing intentionally falls through for unrecognized trailing `:...`; archive and SQLite paths consume their own colon syntax.\n\nURL selectors are parsed separately in `packages/coding-agent/src/tools/fetch.ts` and support only `:raw`, `:N`, `:A-B`, and `:A+C` — no optional `L` prefix there.\n\n## Outputs\n- Single-shot `AgentToolResult` built through `toolResult()` in `packages/coding-agent/src/tools/tool-result.ts`.\n- `content` is usually one text block. Image reads may return `[text, image]`.\n- `details` is path-dependent. `ReadToolDetails` may include:\n - `kind: \"file\" | \"url\"` (URL path uses `kind: \"url\"`; file reads usually omit `kind`)\n - `isDirectory`\n - `resolvedPath`\n - `suffixResolution`\n - URL fields: `url`, `finalUrl`, `contentType`, `method`, `notes`\n - `truncation`\n - `displayContent` (unprefixed text + starting line for TUI rendering)\n - `summary` (`lines`, `elidedSpans`, `elidedLines`) for structural summaries\n - `meta` from `packages/coding-agent/src/tools/output-meta.ts`\n- `details.meta.source` is set to the backing path, URL, or internal URL.\n- `details.meta.truncation` carries shown range, total lines/bytes, next offset, and optional `artifactId` for cached URL output.\n- Directory/archive listings and SQLite table lists also set `details.meta.limits` when list limits trigger.\n\n## Flow\n1. `ReadTool.execute()` accepts `{ path }`. `file://...` inputs are expanded first with `expandPath()`.\n2. It tries URL handling first via `parseReadUrlTarget()` from `packages/coding-agent/src/tools/fetch.ts`.\n - Plain URL reads call `executeReadUrl()`.\n - URL reads with line selectors load or refresh the URL cache with `loadReadUrlCacheEntry()` and paginate the cached text locally with `#buildInMemoryTextResult()`.\n3. If not a web URL, it checks `session.internalRouter.canHandle(...)`.\n - Internal URLs are resolved with `internalRouter.resolve()`.\n - `agent://` query extraction (`/path` or `?q=`) bypasses pagination and returns the extracted content directly.\n - Other internal resources are paginated in-memory by `#buildInMemoryTextResult()`.\n4. It tries archive resolution next with `#resolveArchiveReadPath()`.\n - `parseArchivePathCandidates()` scans for `.tar`, `.tar.gz`, `.tgz`, or `.zip` anywhere before `:sub/path`.\n - On success, `#readArchive()` either lists a directory or decodes an entry as UTF-8 text.\n5. It tries SQLite resolution with `#resolveSqliteReadPath()`.\n - `parseSqlitePathCandidates()` scans for `.sqlite`, `.sqlite3`, `.db`, `.db3` before any `:table`, `:key`, or `?query` suffix.\n - `#readSqlite()` dispatches on `parseSqliteSelector()`.\n6. Otherwise it treats the input as a local filesystem path.\n - `resolveReadPath()` expands `~`, resolves relative to session cwd, treats bare `/` as session cwd, and retries macOS screenshot/NFD/curly-quote variants.\n - If the path does not exist, `findUniqueSuffixMatch()` does a workspace glob-based unique suffix lookup (skipped for remote mounts).\n7. Directories go through `#readDirectory()`.\n8. Non-directories branch by content type:\n - image metadata / inline image\n - editable notebook text\n - markit-converted document\n - structural summary for parseable code/prose\n - streamed text/line-range read\n9. Local text reads are streamed by `streamLinesFromFile()` rather than loading the whole file. The tool adds up to 3 lines of context before/after explicit bounded ranges.\n10. Non-empty contiguous local reads are recorded into `getFileReadCache(session)` for later hashline edit recovery.\n11. If suffix resolution happened, the first text block is prefixed with `[Path '...' not found; resolved to '...' via suffix match]`.\n\n## Modes / Variants\n\n### Local text files\n- No selector: if summarization is enabled and the file is small enough, `#trySummarize()` calls `summarizeCode()`.\n - Guards: file size `<= 2 MiB` (`MAX_SUMMARY_BYTES`), line count `<= 20_000` (`MAX_SUMMARY_LINES`).\n - Summary output keeps selected declarations and replaces elided spans with `...`. When at least one span is elided, the text content ends with a footer like `[NN lines across MM elided regions; read <path>:raw or a line range like <path>:1-9999 for verbatim content]` so the agent has a concrete recovery selector instead of a bare marker.\n - When an elided block sits between matching brace lines, `#renderSummary()` may merge them into one anchored line rather than emitting separate opener/closer lines.\n- Explicit selector or summarization miss: streamed text read.\n - Default open-ended limit is `min(session setting read.defaultLimit, DEFAULT_MAX_LINES)`.\n - Explicit ranges expand by `RANGE_LEADING_CONTEXT_LINES = 1` / `RANGE_TRAILING_CONTEXT_LINES = 3` on the constrained sides only.\n - Non-raw output uses `resolveFileDisplayMode()`:\n - hashline anchors when edit mode is hashline, read is not raw, source is mutable, edit tool exists, and `readHashLines !== false`\n - otherwise optional line numbers when `readLineNumbers === true`\n - raw mode suppresses both\n- Prefix format in hashline mode is `lineNumber + 2-char line hash + \"|\"`, e.g. `41th|def alpha():`, from `formatHashLine()` in `packages/coding-agent/src/hashline/hash.ts`.\n- Those anchors are what the `edit`/hashline path consumes later; immutable sources and `:raw` intentionally suppress them.\n\n### Directory listings\n- `#readDirectory()` calls `buildDirectoryTree()` with:\n - `maxDepth = 2`\n - `perDirLimit = 12`\n - `rootLimit = null`\n - `lineCap = limit` when a line selector was present, else unlimited at this layer\n- `buildDirectoryTree()` sorts siblings by recency, shows file sizes and relative ages, and may mark `limits.resultLimit` when the tree truncates.\n- Empty directories render as `(empty directory)`.\n\n### Archives\n- Supported archive containers: `.tar`, `.tar.gz`, `.tgz`, `.zip`.\n- Syntax: `archive.ext`, `archive.ext:path/inside`, `archive.ext:path/inside:50-60`.\n- `openArchive()` reads the whole archive into memory, then:\n - tar/tgz uses `new Bun.Archive(bytes)`\n - zip uses `fflate.unzipSync()`\n- Archive paths normalize `/`, drop `.` segments, and reject `..`.\n- Directory reads list immediate children; files show `name` plus ` (size)` when size > 0.\n- Directory listing default limit is `500` entries in `#readArchiveDirectory()`.\n- File entries are UTF-8 decoded. Non-UTF-8 entries return `[Cannot read binary archive entry '...' (...)]` instead of bytes.\n- Text archive entries reuse the normal in-memory pagination/anchoring path.\n\n### SQLite databases\n- Database detection requires both a matching extension and a valid SQLite file header (`isSqliteFile()`).\n- Selector forms from `parseSqliteSelector()`:\n\n#### `db.sqlite`\n- `kind: \"list\"`\n- Lists non-`sqlite_%` tables with row counts.\n- `#readSqlite()` caps the rendered list to `500` tables via `applyListLimit()`.\n\n#### `db.sqlite:table`\n- `kind: \"schema\"`\n- Returns `sqlite_master.sql` plus sample rows.\n- Sample size is `DEFAULT_SCHEMA_SAMPLE_LIMIT = 5`.\n\n#### `db.sqlite:table:key`\n- `kind: \"row\"`\n- Resolves by primary key when the table has exactly one PK column; otherwise falls back to `rowid` lookup.\n- No query parameters allowed on row lookups.\n\n#### `db.sqlite:table?limit=...&offset=...&order=...&where=...`\n- `kind: \"query\"`\n- Defaults: `limit = 20`, `offset = 0`.\n- `limit` is capped at `500`.\n- `order` accepts `column` or `column:asc|desc` and must name an existing column.\n- `where` is accepted only after `validateWhereClause()` rejects comments, semicolons, and control keywords like `LIMIT`, `OFFSET`, `UNION`, `ATTACH`, `PRAGMA`.\n- Unknown query parameters throw.\n\n#### `db.sqlite?q=SELECT ...`\n- `kind: \"raw\"`\n- Cannot be combined with table selectors or any other query param.\n- Empty `q` throws.\n- `executeReadQuery()` runs `db.prepare(sql).all()` and rejects bound parameters; it does not verify that the SQL starts with `SELECT`.\n\n- Rendering caps in `packages/coding-agent/src/tools/sqlite-reader.ts`:\n - ASCII table width `120` (`MAX_RENDER_WIDTH`)\n - per-column width `40` (`MAX_COLUMN_WIDTH`)\n- `#readSqlite()` opens Bun SQLite in `{ readonly: true, strict: true }` and sets `PRAGMA busy_timeout = 3000`.\n\n### Documents\n- `CONVERTIBLE_EXTENSIONS` in `packages/coding-agent/src/tools/read.ts` covers `.pdf`, `.doc`, `.docx`, `.ppt`, `.pptx`, `.xls`, `.xlsx`, `.rtf`, `.epub`.\n- `convertFileWithMarkit()` converts the file to text/markdown.\n- Converted output is then head-truncated with normal shared limits; there is no line selector support inside the source document before conversion.\n- Conversion failures return a text block like `[Cannot read .pdf file: ...]`.\n\n### Jupyter notebooks\n- `.ipynb` goes through `readEditableNotebookText()` unless `:raw` was requested.\n- Output is editable plain text with markers like:\n\n```text\n# %% [code] cell:0\n...\n```\n\n- Raw mode bypasses that conversion and falls back to file-text reading.\n\n### Images\n- Image detection is metadata-based (`readImageMetadata()`).\n- Max accepted image size is `20 MiB` (`MAX_IMAGE_INPUT_BYTES`, re-exported as `MAX_IMAGE_SIZE`). Larger files throw.\n- If `inspect_image.enabled` is true, `read` returns metadata only (MIME, bytes, dimensions, channels, alpha) plus a suggestion to call `inspect_image`.\n- Otherwise it calls `loadImageInput()` and returns:\n - a text note from the image loader\n - an inline image block\n- Unsupported/undecodable image formats throw a `ToolError`.\n\n### Internal URLs\n- `read` does not resolve these itself; it delegates to `session.internalRouter.resolve()`.\n- Registered protocols are outside this file, but the router in `packages/coding-agent/src/internal-urls/router.ts` is built for `agent://`, `artifact://`, `issue://`, `local://`, `mcp://`, `memory://`, `pi://`, `pr://`, `rule://`, and `skill://`.\n- `#handleInternalUrl()` behavior:\n - parses the URL with `parseInternalUrl()` so colons inside the host segment are legal\n - for `agent://`, treats non-root path extraction or `?q=` extraction as a special no-pagination mode\n - otherwise paginates the resolved text in memory\n - passes `immutable` through to `resolveFileDisplayMode()` so anchors are suppressed for immutable resources such as artifacts, skills, memory, and agent outputs\n - sets `ignoreResultLimits: true` for `skill://` so the full skill text is paginated only by explicit selectors, not by the normal default line limit\n- `issue://<N>` / `pr://<N>` (and the long form `issue://<owner>/<repo>/<N>` / `pr://<owner>/<repo>/<N>`) route through the same SQLite cache the `github` tool writes to; `?comments=0` selects the no-comments rendering. Bare `issue://` / `pr://` (and `issue://<owner>/<repo>` / `pr://<owner>/<repo>`) issue a live `gh issue list` / `gh pr list` for browsing, accepting `?state=`, `?limit=`, `?author=`, `?label=`. PR diffs share the same cache through `pr://<N>/diff` (numbered file listing with per-file hints), `pr://<N>/diff/<i>` (single file slice; 1-indexed), and `pr://<N>/diff/all` (verbatim unified diff); the listing and per-file slices are reconstructed from the cached unified-diff payload, so all three variants share one `gh pr diff` invocation per PR. Diff content is served as `text/plain`. Soft TTL `github.cache.softTtlSec` (default 5 minutes), hard TTL `github.cache.hardTtlSec` (default 7 days). Stale-hit returns the cached row and schedules a background refresh.\n\n### Web URLs\n- `parseReadUrlTarget()` accepts `http://`, `https://`, or `www.` targets.\n- Plain URL reads call `executeReadUrl()` in `packages/coding-agent/src/tools/fetch.ts`.\n- `:raw` means raw HTML/body fallback path; plain URL reads prefer rendered/reader-friendly output.\n- `:N`, `:A-B`, `:A+C` do not refetch. They page over cached output from the prior or current URL render.\n- URL render pipeline in `renderUrl()`:\n 1. normalize scheme (`https://` added for bare `www.`)\n 2. try special handlers for known sites unless raw\n 3. fetch with `loadPage()`\n 4. if content is image/PDF/DOCX/etc., try binary fetch + markit/image handling\n 5. handle JSON directly, feeds via feed parser, plain text directly\n 6. for HTML and non-raw mode, try markdown alternates, `URL.md`, content negotiation, feed alternates, HTML-to-text renderers, extracted linked documents, then `llms.txt`\n 7. fall back to raw body text/html\n- URL output is wrapped with a small header:\n\n```text\nURL: ...\nContent-Type: ...\nMethod: ...\nNotes: ...\n\n---\n```\n\n- `method` records the winning path (`json`, `feed`, `text`, `alternate-markdown`, `md-suffix`, `content-negotiation`, `image`, `markit`, `llms.txt`, `raw`, `raw-html`, etc.).\n- URL reads may return an inline image block when the fetched resource is a supported image and survives resizing.\n\n## Side Effects\n- Filesystem\n - Opens and streams local files.\n - Reads entire archives into memory before indexing.\n - May read URL-cache artifact files from the session artifacts directory.\n - Writes URL output artifacts when URL output is truncated or when line-range pagination needs a persisted cache body.\n- Network\n - URL mode performs HTTP fetches, binary refetches, and alternate-endpoint probes.\n- Subprocesses / native bindings\n - Uses Bun SQLite for `.db`/`.sqlite*`.\n - Uses `Bun.Archive` for tar/tgz and `fflate` for zip.\n - URL HTML rendering can delegate into site handlers and HTML-to-text backends from `packages/coding-agent/src/tools/fetch.ts`.\n- Session state\n - Records local text lines into `session.fileReadCache` for later stale-anchor recovery.\n - Uses `session.internalRouter` for internal URLs.\n - Uses `session.allocateOutputArtifact()` for cached/truncated URL output.\n- Background work / cancellation\n - Most branches honor `AbortSignal`; the tool itself is marked `nonAbortable = true`, but helper paths still call `throwIfAborted(signal)`.\n\n## Limits & Caps\n- Shared text truncation defaults from `packages/coding-agent/src/session/streaming-output.ts`:\n - `DEFAULT_MAX_LINES = 3000`\n - `DEFAULT_MAX_BYTES = 50 * 1024`\n- Local text open-ended default line limit: `read.defaultLimit`, clamped to `[1, DEFAULT_MAX_LINES]`.\n- Explicit line ranges add `1` leading and `3` trailing context lines on the constrained sides (`RANGE_LEADING_CONTEXT_LINES` / `RANGE_TRAILING_CONTEXT_LINES`).\n- File streaming chunk size: `8 * 1024` bytes (`READ_CHUNK_SIZE`).\n- Local streamed byte budget for line reads: `max(DEFAULT_MAX_BYTES, maxLinesToCollect * 512)`.\n- Structural summaries only run when file size `<= 2 MiB` and line count `<= 20_000`.\n- Image input max: `20 MiB`.\n- Directory tree caps for local directories: depth `2`, per-directory children `12`.\n- Archive directory default list cap: `500` entries.\n- SQLite:\n - default row query limit `20`\n - schema sample limit `5`\n - max query limit `500`\n - table list cap `500`\n - render width `120`, column width `40`\n - busy timeout `3000` ms\n- URL read result shown to the model is truncated to `300` lines and `50 KiB` in `executeReadUrl()`; full cached output can be attached as an artifact.\n- Inline fetched URL images:\n - source bytes cap `20 MiB`\n - post-resize inline output cap `300 KiB`\n- Unique suffix auto-resolution glob timeout: `5000` ms.\n- File-read cache holds `30` paths per session.\n\n## Errors\n- Validation and operational failures surface as `ToolError`.\n- Selector errors include:\n - `Line selector 0 is invalid; lines are 1-indexed. Use :1.`\n - invalid `A+B` / `A-B` shapes\n - `Cannot combine query extraction with offset/limit` for `agent://.../path:50`\n- Missing local/archive/sqlite paths first attempt unique suffix resolution; if no unique match exists they error.\n- Out-of-bounds line reads do not throw. They return explanatory text with a suggestion such as `Use :1 ...` or `Use :<last line> ...`.\n- Binary archive entries do not throw; they return a text notice.\n- Document conversion failure returns a text notice.\n- Image oversize/unsupported/invalid cases throw.\n- SQLite parser rejects unsupported parameter combinations early; DB/runtime errors are caught and rethrown as `ToolError(message)`.\n- URL fetch failure does not throw when HTTP fetch succeeds but `response.ok === false`; it returns a failed URL read with `method: \"failed\"` and explanatory notes.\n\n## Notes\n- `readSchema` examples include `https://example.com:L1-L40`, but URL selector parsing in `packages/coding-agent/src/tools/fetch.ts` does not accept `L` prefixes.\n- Hashline anchors are suppressed for raw reads and immutable internal resources because there is no editable backing target for later `edit` consumption.\n- `splitPathAndSel()` intentionally treats unknown trailing `:...` as part of the path so `archive.zip:inner/file` and `db.sqlite:table:key` still work.\n- `resolveReadPath()` contains macOS-specific filename fallbacks for screenshot timestamps, NFD Unicode normalization, and curly apostrophes.\n- A bare `/` resolves to the session cwd, not the filesystem root.\n- URL cache keys are session-scoped and normalized by requested URL + raw/rendered mode; both requested URL and final redirected URL are cached.\n- URL line-range reads request `ensureArtifact: true, preferCached: true` so a later paginated read can reopen the same rendered body from artifact storage.\n- Raw SQLite `q=` execution is not keyword-restricted beyond “no bound parameters”; the read tool relies on the surrounding contract to keep it read-only.\n- The file-read cache is not a read acceleration cache. It exists to recover hashline edits when the file changed after the read.",
79
78
  "tools/recall.md": "# recall\n\n> Search the active Hindsight bank and return raw matching memories.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/hindsight-recall.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/recall.md`\n- Key collaborators:\n - `packages/coding-agent/src/hindsight/state.ts` — session state, recall query defaults, prompt-side auto-recall.\n - `packages/coding-agent/src/hindsight/content.ts` — result formatting and UTC timestamp formatting.\n - `packages/coding-agent/src/hindsight/client.ts` — HTTP `recall` call and error mapping.\n - `packages/coding-agent/src/hindsight/bank.ts` — bank id and tag-filter scoping.\n - `docs/tools/retain.md` — shared backend, storage, seeding, and mental-model bootstrap.\n\n## Inputs\n\n| Field | Type | Required | Description |\n|---|---|---:|---|\n| `query` | `string` | Yes | Natural-language search query. The tool passes it through unchanged. |\n\n## Outputs\nReturns a single-shot tool result.\n\nWhen matches exist:\n- `content[0].type = \"text\"`\n- `content[0].text = \"Found <n> relevant memories (as of YYYY-MM-DD HH:MM UTC):\\n\\n<bullet list>\"`\n- each bullet is `- <text> [<type>] (<mentioned_at>)`; the type and timestamp suffixes appear only when those fields are present\n- `details = {}`\n\nWhen no matches exist:\n- `content[0].text = \"No relevant memories found.\"`\n- `details = {}`\n\n## Flow\n1. `HindsightRecallTool.createIf(...)` only exposes the tool when `memory.backend == \"hindsight\"`.\n2. `execute(...)` wraps the whole operation in `untilAborted(...)` from `@oh-my-pi/pi-utils`.\n3. It reads the active `HindsightSessionState`; missing state throws `Hindsight backend is not initialised for this session.`\n4. It calls `state.client.recall(...)` with:\n - `bankId` from session bootstrap,\n - the model-supplied `query`,\n - `budget`, `maxTokens`, and `types` from `HindsightConfig`,\n - tag filters from the bank scope (`recallTags`, `recallTagsMatch`).\n5. `HindsightApi.recall(...)` POSTs `/v1/default/banks/{bank_id}/memories/recall`.\n6. Results are formatted into a plain-text list with `formatMemories(...)`; empty results map to the fixed no-match string.\n7. Failures are logged with `logger.warn(\"recall failed\", ...)` and rethrown.\n\n## Modes / Variants\n- Tool path: explicit query-only recall. The tool does not compose context from recent turns; that richer path is reserved for backend auto-recall in `HindsightSessionState.beforeAgentStartPrompt(...)` / `maybeRecallOnAgentStart(...)`.\n- Bank scoping is inherited from the active `HindsightSessionState`:\n - `global` — no tag filter.\n - `per-project` — separate bank id per cwd basename.\n - `per-project-tagged` — shared bank id plus `project:<cwd basename>` filter with `tagsMatch = \"any\"`, so project-tagged and untagged global memories can both surface.\n- Session scope: reads cross-session server-side memories, but uses per-session cached config and scope.\n\n## Side Effects\n- Network\n - `POST /v1/default/banks/{bank_id}/memories/recall` via `packages/coding-agent/src/hindsight/client.ts`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - None on success. Unlike backend auto-recall, this tool does not update `lastRecallSnippet` or refresh the system prompt.\n- Background work / cancellation\n - Aborts through `untilAborted(...)` if the tool call signal is cancelled.\n\n## Limits & Caps\n- Client default budget for raw `HindsightApi.recall(...)` is `\"mid\"`; this tool overrides from config in `packages/coding-agent/src/hindsight/state.ts`.\n- Default recall settings from `packages/coding-agent/src/config/settings-schema.ts`:\n - `hindsight.recallBudget = \"mid\"`\n - `hindsight.recallMaxTokens = 1024`\n - `hindsight.recallTypes = [\"world\", \"experience\"]`\n- The explicit tool path does not apply `hindsight.recallContextTurns` or `hindsight.recallMaxQueryChars`; those caps only affect backend auto-recall query composition.\n\n## Errors\n- Throws `Hindsight backend is not initialised for this session.` when no state exists.\n- HTTP and fetch failures become `HindsightError` from `packages/coding-agent/src/hindsight/client.ts` with `statusCode` and parsed `details` when available.\n- Non-`Error` failures are normalized to `new Error(String(err))` before rethrow.\n\n## Notes\n- Shared backend details are in `docs/tools/retain.md`: server-side storage, subagent aliasing, bank scoping, mission setup, and mental-model bootstrap.\n- Mental models are not fetched by this tool. They may still already be present in the agent's developer instructions because the backend caches a `<mental_models>` block separately from recall results.\n- The tool returns raw memory hits; it does not synthesize across them. Use `reflect` for that path.\n",
80
79
  "tools/recipe.md": "# recipe\n\n> Run a task exposed by a detected project task runner.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/recipe/index.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/recipe.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/recipe/runner.ts` — op parsing, task resolution, prompt model.\n - `packages/coding-agent/src/tools/recipe/render.ts` — shell-style call/result rendering.\n - `packages/coding-agent/src/tools/recipe/runners/index.ts` — runner registration order.\n - `packages/coding-agent/src/tools/recipe/runners/just.ts` — detect `just` recipes from justfiles.\n - `packages/coding-agent/src/tools/recipe/runners/pkg.ts` — detect `package.json` scripts and workspaces.\n - `packages/coding-agent/src/tools/recipe/runners/cargo.ts` — detect Cargo run/test targets.\n - `packages/coding-agent/src/tools/recipe/runners/make.ts` — parse make targets from makefiles.\n - `packages/coding-agent/src/tools/recipe/runners/task.ts` — detect Taskfile tasks via `task --list-all`.\n - `packages/coding-agent/src/tools/bash.ts` — actual command execution, truncation, cwd/env handling.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `op` | `string` | Yes | Single string containing the task selector plus trailing arguments. The first whitespace-delimited token selects the task; the remainder is appended verbatim to the resolved runner command. Examples from schema/prompt: `test`, `build --release`, `pkg-a/test`, `crate/bin/server`, `pkg:test --watch`. |\n\n### `op` grammar\n\n```text\nop := S* head (S+ tail)?\nhead := explicit-runner / implicit-task\nexplicit-runner := runner-id \":\" task-token\nimplicit-task := task-token\nrunner-id := detected runner id (`just` | `pkg` | `cargo` | `make` | `task`)\ntask-token := first non-whitespace token; may contain `/`\ntail := remaining characters after the first whitespace run\n```\n\nResolution rules from `resolveRunnerAndTask()`:\n- Leading whitespace is ignored; an empty `op` throws `ToolError` with the available task list.\n- Only the first token is parsed structurally. Everything after the first whitespace run becomes `tail` and is appended to the command unchanged.\n- If `head` contains `:` and the prefix matches a detected runner id, the suffix must exactly match a task in that runner.\n- Otherwise `head` is treated as a task name and matched across all detected runners.\n- If exactly one runner has that task, it is used.\n- If multiple runners have that task, the call is rejected and the error tells the model to use `<runner-id>:<task>`.\n- Namespaced task names generated by runners use `/`, not `:`. `/` is part of the task name, not a parser separator.\n\n## Outputs\n- Delegates directly to `BashTool.execute()` and returns the same `AgentToolResult<BashToolDetails>` shape.\n- Success path: one text content block containing merged command output (`result.output` from bash execution, or `(no output)`), plus any timeout clamp notice appended after a blank line.\n- Recipe does not return separate `stdout`, `stderr`, or `exitCode` fields. `stdout`/`stderr` are already merged into the text block by bash execution; `exitCode` is only observed indirectly (success requires `0`, non-zero becomes an error).\n- Error path: throws `ToolError`; for non-zero exits the message is the merged output followed by `Command exited with code <n>`.\n- `details` may include:\n - `timeoutSeconds`: effective timeout used by bash.\n - `requestedTimeoutSeconds`: only when bash clamped a requested timeout; recipe never sets one itself.\n - `meta`: output truncation metadata from bash execution.\n - `async`: defined by bash background execution paths, but recipe does not expose an `async` input.\n- When bash output is truncated, the full text is stored in an artifact and referenced via bash truncation metadata.\n- Call/result rendering in the TUI uses bash shell rendering with a resolved title, command preview, and optional task cwd.\n\n## Flow\n1. `RecipeTool.createIf()` in `packages/coding-agent/src/tools/recipe/index.ts` checks `session.settings.get(\"recipe.enabled\")`; disabled returns `null`.\n2. It probes every runner in `RUNNERS` from `packages/coding-agent/src/tools/recipe/runners/index.ts` with `Promise.all(...)` in this order: `just`, `pkg`, `cargo`, `make`, `task`.\n3. Each runner returns either `null` or a `DetectedRunner { id, label, commandPrefix, tasks }`; runners with zero tasks are discarded.\n4. If no runners remain, the tool is not registered.\n5. Constructor stores detected runners, instantiates `BashTool`, renders the model-facing description by passing `buildPromptModel(runners)` into `packages/coding-agent/src/prompts/tools/recipe.md`, and builds shell renderers from `createRecipeToolRenderer()`.\n6. On execution, `RecipeTool.execute()` calls `resolveCommand(op, this.#runners)`.\n7. `resolveCommand()` in `packages/coding-agent/src/tools/recipe/runner.ts`:\n 1. `parseOp()` trims only leading whitespace, extracts the first non-whitespace token as `head`, and keeps the remainder as `tail`.\n 2. `resolveRunnerAndTask()` resolves `head` either as `runnerId:taskName` or as an unqualified task name.\n 3. It throws `ToolError` for empty ops, missing explicit tasks, ambiguous task names, or unknown tasks; all error variants include the available task list.\n 4. It builds the final shell command with `buildCommand(commandPrefix, commandName, tail)`, joining non-empty parts with spaces.\n 5. If the task defines `cwd`, that relative path is returned alongside the command.\n8. `RecipeTool.execute()` forwards `{ command, cwd }` into `BashTool.execute()`; recipe does not pass timeout, env, async, or pty options.\n9. `BashTool.execute()` resolves internal URLs, validates/normalizes cwd against `session.cwd`, clamps timeout, applies bash interception rules, runs the command, and formats the final result.\n\n## Modes / Variants\n- Tool enablement:\n - Disabled by `recipe.enabled` setting: tool is absent.\n - Enabled but no detected tasks: tool is absent.\n- Task selection:\n - Unqualified task name: succeeds only when exactly one detected runner owns that task.\n - Explicit runner-qualified task: `<runner-id>:<task>`.\n- Runner detection paths:\n - `just`: requires `just` on `PATH`, a justfile, and successful `just --dump --dump-format=json`.\n - `pkg`: requires a readable root `package.json`; picks a package manager command from lockfiles or `bun` availability; discovers root scripts and workspace package scripts.\n - `cargo`: requires `cargo` on `PATH`, `Cargo.toml`, and successful `cargo metadata --no-deps --format-version=1`.\n - `make`: requires `make` on `PATH` and a makefile; parses targets statically.\n - `task`: requires `task` on `PATH`, a Taskfile, and successful `task --list-all --json`.\n- Execution path:\n - Always the synchronous `bash` call surface from recipe inputs.\n - Bash may still auto-background long-running work if `bash.autoBackground.enabled` and session async job support are enabled.\n\n## Side Effects\n- Filesystem\n - Reads manifests from the session cwd during detection: justfiles, `package.json`, workspace `package.json` files, `Cargo.toml`, makefiles, `Taskfile.yml` / `Taskfile.yaml`.\n - Command execution runs in `session.cwd` or a task-specific relative cwd resolved under it.\n - Bash may allocate output artifacts for truncated command output.\n- Subprocesses / native bindings\n - Detection may spawn `just --dump --dump-format=json`, `cargo metadata --no-deps --format-version=1`, and `task --list-all --json`.\n - Execution spawns the resolved shell command through `BashTool` / `executeBash()`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Tool availability depends on session settings.\n - Constructor prompt text is specialized to detected runners/tasks.\n - Bash execution may create async job records and output artifacts if bash auto-background triggers.\n- User-visible prompts / interactive UI\n - The model-facing tool description lists detected runners and up to 20 tasks per runner.\n - TUI rendering shows a shell-style preview using the resolved title/command/cwd.\n- Background work / cancellation\n - Detection is parallelized across runners.\n - Runtime command execution honors the passed abort signal through `BashTool`.\n\n## Limits & Caps\n- Prompt task listing is capped at `PROMPT_TASK_LIMIT = 20` per runner in `packages/coding-agent/src/tools/recipe/runner.ts`; this affects the rendered tool description, not execution.\n- Recipe itself defines no timeout input; delegated bash execution therefore uses bash's default `timeout = 300` seconds from `packages/coding-agent/src/tools/bash.ts`.\n- Bash clamps timeouts to the configured bash range (`clampTimeout(\"bash\", ...)` in `packages/coding-agent/src/tools/bash.ts`), but recipe cannot request a custom value.\n- `pkg` workspace discovery normalizes workspace globs to `.../package.json` and sorts matched package files lexicographically before task generation.\n- `cargo` deduplicates generated task names with a `Set`, so duplicate targets collapse to one recipe task.\n\n## Errors\n- Detection failures in runner modules are mostly soft-failed:\n - Missing binaries, missing manifests, parse failures, or non-zero probe exits usually return `null` and log with `logger.debug(...)`.\n - Result: the affected runner disappears instead of surfacing an error to the model.\n- Invocation failures are hard errors from `resolveRunnerAndTask()`:\n - Empty `op`.\n - Explicit runner prefix with missing/empty task.\n - Ambiguous unqualified task name across runners.\n - Unknown task name.\n- Execution failures come from `BashTool.execute()`:\n - Invalid cwd.\n - Bash interceptor blocks.\n - Aborts/timeouts.\n - Non-zero exit codes.\n - Missing exit status.\n- All `resolveRunnerAndTask()` errors include the current available task list to help the model retry.\n\n## Notes\n- `RecipeTool` sets `concurrency = \"exclusive\"`; calls do not run concurrently with other exclusive tools.\n- Tool registration is all-or-nothing per runner: a detected runner with zero tasks is dropped.\n- Runner ids are fixed string literals from the runner modules: `just`, `pkg`, `cargo`, `make`, `task`.\n- `buildPromptModel()` includes each task's rendered command (`commandPrefix` + `commandName`) and relative cwd when present; the prompt therefore exposes the exact shell form recipe will run.\n- `pkg` task names:\n - Root `package.json` scripts keep bare names like `test`.\n - Workspace scripts are always namespaced as `<package-name-or-dir>/<script>` and set `cwd` to that package directory.\n - Script names are shell-quoted into `commandName`, so a task like `build` becomes `bun run 'build'` / `npm run 'build'` / similar.\n- `pkg` command prefix selection prefers lockfiles in this order: `bun.lock`/`bun.lockb`, `pnpm-lock.yaml`, `yarn.lock`, `package-lock.json`/`npm-shrinkwrap.json`; otherwise it falls back to `bun run` if `bun` exists, else `npm run`.\n- `cargo` task names are generated from metadata targets:\n - Single-package manifests: `bin/<name>`, `example/<name>`, `test/<name>`.\n - Multi-package workspaces: `<package>/bin/<name>`, `<package>/example/<name>`, `<package>/test/<name>`.\n - Each task overrides `commandPrefix` to the full `cargo run ... --bin|--example` or `cargo test ... --test` prefix, and `commandName` to the quoted target name.\n- `make` target parsing is static text parsing, not `make -qp` output:\n - Recognizes makefiles named `Makefile`, `makefile`, `GNUmakefile`.\n - Uses `.PHONY` lines to decide whether to include undocumented file targets; without any `.PHONY`, all parsed targets are exposed.\n - If `.PHONY` exists, documented non-phony targets are kept with ` (file target)` appended to `doc`.\n- `just` detection ignores private recipes and preserves declared parameter names only for prompt display; execution still accepts arbitrary `tail` text.\n- `task` detection uses `desc` first, then `summary`, for task documentation.\n- Recipe has no env input of its own. Commands inherit whatever environment `BashTool` supplies for normal bash execution in the session.",
81
80
  "tools/reflect.md": "# reflect\n\n> Ask the Hindsight server to synthesize an answer over the active memory bank.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/hindsight-reflect.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/reflect.md`\n- Key collaborators:\n - `packages/coding-agent/src/hindsight/bank.ts` — best-effort bank mission initialization.\n - `packages/coding-agent/src/hindsight/state.ts` — session state, shared bank scope, recall/reflect config.\n - `packages/coding-agent/src/hindsight/client.ts` — HTTP `reflect` call and error mapping.\n - `docs/tools/retain.md` — shared backend, storage, seeding, and mental-model bootstrap.\n\n## Inputs\n\n| Field | Type | Required | Description |\n|---|---|---:|---|\n| `query` | `string` | Yes | Question to answer from long-term memory. |\n| `context` | `string` | No | Extra guidance sent to the Hindsight reflect endpoint. |\n\n## Outputs\nReturns a single-shot tool result:\n\n- `content[0].type = \"text\"`\n- `content[0].text = response.text?.trim() || \"No relevant information found to reflect on.\"`\n- `details = {}`\n\nThe tool returns the Hindsight server's synthesized text directly; it does not expose raw recall hits.\n\n## Flow\n1. `HindsightReflectTool.createIf(...)` only exposes the tool when `memory.backend == \"hindsight\"`.\n2. `execute(...)` runs under `untilAborted(...)`.\n3. It reads the active `HindsightSessionState`; missing state throws `Hindsight backend is not initialised for this session.`\n4. Before reflecting, it calls `ensureBankMission(...)` with the current `bankId`, config, and process-local `missionsSet`.\n5. `ensureBankMission(...)` best-effort `PUT`s `/v1/default/banks/{bank_id}` with `reflect_mission` and optional `retain_mission` exactly once per bank/process; failures are swallowed.\n6. It calls `state.client.reflect(...)` with the model `query`, optional `context`, configured recall budget, and bank-scope tag filters.\n7. `HindsightApi.reflect(...)` POSTs `/v1/default/banks/{bank_id}/reflect` and defaults its own budget to `\"low\"` when callers omit one; this tool always passes the configured budget.\n8. Blank or whitespace-only responses are replaced with `No relevant information found to reflect on.`\n9. Failures are logged with `logger.warn(\"reflect failed\", ...)` and rethrown.\n\n## Modes / Variants\n- Tool path: one reflect request, optionally focused by `context`.\n- Bank scoping is inherited from the active `HindsightSessionState`:\n - `global` — no tag filter.\n - `per-project` — separate bank id per cwd basename.\n - `per-project-tagged` — shared bank id plus `project:<cwd basename>` filter with `tagsMatch = \"any\"`.\n- Session scope: reads cross-session server-side memories, but does not persist local output.\n\n## Side Effects\n- Network\n - Optional `PUT /v1/default/banks/{bank_id}` from `ensureBankMission(...)`.\n - `POST /v1/default/banks/{bank_id}/reflect` via `packages/coding-agent/src/hindsight/client.ts`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Reads session-held bank scope and config only. Does not update `lastRecallSnippet`, the mental-model cache, or the retain queue.\n- Background work / cancellation\n - Aborts through `untilAborted(...)` if the tool call signal is cancelled.\n\n## Limits & Caps\n- Tool-level params: only `query` is required; `context` is optional.\n- Default budget setting comes from `hindsight.recallBudget` in `packages/coding-agent/src/config/settings-schema.ts`; default `\"mid\"`.\n- `reflect` itself has no client-side token cap parameter here; unlike `recall`, the tool does not pass `maxTokens`.\n- Mission initialization tracks up to `MISSION_SET_CAP = 10_000` bank ids in `packages/coding-agent/src/hindsight/bank.ts`, then drops the oldest half of the sorted set.\n\n## Errors\n- Throws `Hindsight backend is not initialised for this session.` when no state exists.\n- HTTP and fetch failures become `HindsightError` from `packages/coding-agent/src/hindsight/client.ts` with `statusCode` and parsed `details` when available.\n- `ensureBankMission(...)` failures are silent to the tool caller; only the later reflect request can fail visibly.\n- Non-`Error` failures are normalized to `new Error(String(err))` before rethrow.\n\n## Notes\n- Shared backend details are in `docs/tools/retain.md`: server-side storage, subagent aliasing, bank scoping, seed mental models from `packages/coding-agent/src/hindsight/seeds.json`, and mental-model prompt injection.\n- `reflect` does not read the cached `<mental_models>` block directly. It queries the Hindsight server over the bank contents. The same session may also have separate mental-model context injected into its developer instructions.\n- Reflect mission and retain mission are bank-level server settings, not per-request payload. The tool just ensures they are present best-effort before reflecting.\n",
@@ -84,7 +83,7 @@ export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
84
83
  "tools/retain.md": "# retain\n\n> Queue durable facts for asynchronous write into the active Hindsight bank.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/hindsight-retain.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/retain.md`\n- Key collaborators:\n - `packages/coding-agent/src/hindsight/state.ts` — per-session queue, flush, auto-retain.\n - `packages/coding-agent/src/hindsight/backend.ts` — session bootstrap, prompt injection, subagent aliasing.\n - `packages/coding-agent/src/hindsight/bank.ts` — bank id derivation, tag scoping, mission setup.\n - `packages/coding-agent/src/hindsight/client.ts` — HTTP `retain` / `retainBatch` calls.\n - `packages/coding-agent/src/hindsight/content.ts` — retention transcript shaping, memory-tag stripping.\n - `packages/coding-agent/src/hindsight/mental-models.ts` — bank-scoped mental-model seeding and cache rendering.\n - `packages/coding-agent/src/hindsight/seeds.json` — built-in mental-model seed definitions.\n - `packages/coding-agent/src/hindsight/transcript.ts` — extracts user/assistant turns for auto-retain.\n\n## Inputs\n\n| Field | Type | Required | Description |\n|---|---|---:|---|\n| `items` | `Array<{ content: string; context?: string }>` | Yes | One or more memories to queue. `minItems: 1`. Each item must be self-contained; `context` is optional per-item provenance. |\n\n## Outputs\nReturns a single-shot tool result:\n\n- `content[0].type = \"text\"`\n- `content[0].text = \"<count> memory queued.\"` or `\"<count> memories queued.\"`\n- `details = { count: number }`\n\nThe write is not confirmed before the tool returns. The queue flushes later; flush failures emit a session warning notice and are not returned to the model.\n\n## Flow\n1. `HindsightRetainTool.createIf(...)` only exposes the tool when `memory.backend == \"hindsight\"` in `packages/coding-agent/src/tools/hindsight-retain.ts`.\n2. `execute(...)` fetches `session.getHindsightSessionState()` and throws if the Hindsight backend was not started.\n3. Each input item is handed to `HindsightSessionState.enqueueRetain(...)` in `packages/coding-agent/src/hindsight/state.ts`.\n4. `HindsightRetainQueue.enqueue(...)` appends the item and either:\n - flushes immediately when the queue reaches `RETAIN_FLUSH_BATCH_SIZE`, or\n - starts a debounce timer for `RETAIN_FLUSH_INTERVAL_MS`.\n5. On flush, `HindsightRetainQueue.#doFlush(...)`:\n - verifies the session still owns this state,\n - calls `ensureBankMission(...)` once per bank/process before writing,\n - maps queued items to `MemoryItemInput` with `context ?? config.retainContext`, `metadata.session_id`, and bank-scope tags,\n - sends one async `retainBatch(...)` request.\n6. The tool returns immediately after enqueueing; it does not await the HTTP write.\n\n## Modes / Variants\n- Tool path: queued batch write only.\n- Bank scoping comes from `computeBankScope(...)` in `packages/coding-agent/src/hindsight/bank.ts`:\n - `global` — one shared bank, no project tags.\n - `per-project` — bank id gets `-<cwd basename>` appended.\n - `per-project-tagged` — shared bank plus `project:<cwd basename>` tags on retained memories.\n- Session scope:\n - tool-called retains are per-session queued work in `HindsightSessionState`,\n - persisted memories are cross-session server-side bank data,\n - subagents alias the parent `HindsightSessionState`, so their `retain` calls write into the same bank and queue.\n\n## Side Effects\n- Filesystem\n - None for retained memories. No local memory file is written.\n- Network\n - `POST /v1/default/banks/{bank_id}/memories` via `retainBatch(...)` in `packages/coding-agent/src/hindsight/client.ts`.\n - Optional `PUT /v1/default/banks/{bank_id}` via `ensureBankMission(...)` before first write per bank/process.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Appends to the in-memory `HindsightRetainQueue` on the active `HindsightSessionState`.\n - Includes `metadata.session_id` on each retained item.\n - Shares parent state for subagents (`aliasOf` path in `packages/coding-agent/src/hindsight/backend.ts`).\n- User-visible prompts / interactive UI\n - On async flush failure, emits `session.emitNotice(\"warning\", ...)`; the model is not told.\n- Background work / cancellation\n - Flush runs later on timer, queue-size threshold, `agent_end`, backend `enqueue(...)`, or backend `clear(...)`.\n\n## Limits & Caps\n- Input schema requires `items.length >= 1` in `packages/coding-agent/src/tools/hindsight-retain.ts`.\n- Queue flush threshold: `RETAIN_FLUSH_BATCH_SIZE = 16` in `packages/coding-agent/src/hindsight/state.ts`.\n- Queue debounce: `RETAIN_FLUSH_INTERVAL_MS = 5_000` in `packages/coding-agent/src/hindsight/state.ts`.\n- Queue writes use `retainBatch(..., { async: true })`; the client does not wait for server-side consolidation.\n- Shared auto-retain settings on the same backend:\n - `hindsight.retainEveryNTurns` default `3`\n - `hindsight.retainOverlapTurns` default `2`\n - `hindsight.retainContext` default `\"omp\"`\n - `hindsight.retainMode` default `\"full-session\"`\n from `packages/coding-agent/src/config/settings-schema.ts`.\n\n## Errors\n- Throws `Hindsight backend is not initialised for this session.` when no state exists.\n- Queue enqueue on disposed state throws `Hindsight retain queue is closed.`\n- Flush-time API failures are caught in `HindsightRetainQueue.#doFlush(...)`, logged, and converted into a warning notice instead of a tool error.\n- Mission creation failures are swallowed in `ensureBankMission(...)`; writes continue.\n\n## Notes\n- Storage is server-side. `hindsightBackend.clear(...)` only clears local cache/state and warns that upstream deletion must happen in Hindsight UI or `deleteBank`; see `packages/coding-agent/src/hindsight/backend.ts`.\n- Auto-retain uses the same bank but a different path than this tool: `retainSession(...)` extracts plain user/assistant transcript from `packages/coding-agent/src/hindsight/transcript.ts`, strips `<memories>` / `<mental_models>` blocks via `stripMemoryTags(...)`, and calls single-item `retain(...)`.\n- `retain` itself does not seed or read mental models. Mental-model bootstrap lives in the shared backend: `HindsightSessionState.runMentalModelLoad(...)` optionally resolves seeds from `packages/coding-agent/src/hindsight/seeds.json`, creates missing models with `ensureMentalModels(...)`, then caches a rendered `<mental_models>` block for prompt injection.\n- Built-in seeds are `user-preferences`, `project-conventions`, and `project-decisions`. `projectTagged: true` seeds inherit the active scope's retain tags; untagged seeds read the whole bank.\n- Mental-model defaults from `packages/coding-agent/src/config/settings-schema.ts`: `hindsight.mentalModelsEnabled = true`, `hindsight.mentalModelAutoSeed = true`, `hindsight.mentalModelRefreshIntervalMs = 5 * 60 * 1000`, `hindsight.mentalModelMaxRenderChars = 16_000`. First-turn loading waits up to `MENTAL_MODEL_FIRST_TURN_DEADLINE_MS = 1500` in `packages/coding-agent/src/hindsight/mental-models.ts`.\n- Seed lifecycle is create-only. Changing `packages/coding-agent/src/hindsight/seeds.json` does not mutate existing server-side models.\n- `recall.md` and `reflect.md` rely on the same bank, scoping, and mental-model bootstrap; refer back here for the shared backend behavior.\n",
85
84
  "tools/rewind.md": "# rewind\n\n> End an active checkpoint by pruning exploratory context and retaining a concise report.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/checkpoint.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/rewind.md`\n- Key collaborators:\n - `packages/coding-agent/src/session/agent-session.ts` — validates pending rewind state, applies the actual rewind, and injects the retained report.\n - `packages/coding-agent/src/session/session-manager.ts` — branches the persisted session tree and appends persisted summary/report entries.\n - `packages/coding-agent/src/session/messages.ts` — converts persisted `branch_summary` entries into LLM-visible branch-summary messages on rebuilt context.\n - `packages/coding-agent/src/tools/index.ts` — registers the tool and shares the `checkpoint.enabled` gate.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `report` | `string` | Yes | Investigation findings. `execute()` trims it and rejects the empty result. |\n\n## Outputs\nThe tool returns a single text result plus structured details:\n\n- text body:\n - `Rewind requested.`\n - `Report captured for context replacement.`\n- `details`:\n - `report: string` — trimmed report text\n - `rewound: true`\n\nThe returned tool result is not the final rewind. `AgentSession` waits until `turn_end`, then applies the rewind side effects asynchronously.\n\n## Flow\n1. `RewindTool.createIf()` in `packages/coding-agent/src/tools/checkpoint.ts` hides the tool from subagents.\n2. `RewindTool.execute()` rejects subagent calls with `ToolError(\"Checkpoint not available in subagents.\")`.\n3. It rejects calls with no active checkpoint using `ToolError(\"No active checkpoint.\")`.\n4. It trims `params.report`; if empty, it throws `ToolError(\"Report cannot be empty.\")`.\n5. It returns a `toolResult()` with `details.report` and `details.rewound = true`.\n6. On `tool_execution_end`, `AgentSession` extracts the report from `details.report` or the first text content block and stores it in `#pendingRewindReport`.\n7. On `turn_end`, if `#pendingRewindReport` is set, `AgentSession.#applyRewind()` runs.\n8. `#applyRewind()` computes `safeCount = clamp(checkpointMessageCount, 0, agent.state.messages.length)` and calls `agent.replaceMessages(agent.state.messages.slice(0, safeCount))`.\n9. It then calls `sessionManager.branchWithSummary(checkpointEntryId, report, { startedAt })`. That moves the persisted session leaf back to the checkpoint entry and appends a new `branch_summary` entry whose `summary` is the rewind report.\n10. If `checkpointEntryId` no longer resolves, it logs a warning and falls back to `branchWithSummary(null, report, { startedAt })`, branching from root instead.\n11. `#applyRewind()` appends a hidden in-memory custom message `{ customType: \"rewind-report\", content: report, display: false }` and persists the same payload through `sessionManager.appendCustomMessageEntry(\"rewind-report\", ...)` with `details = { startedAt, rewoundAt }`.\n12. Finally it clears `#checkpointState` and `#pendingRewindReport`.\n\n## Modes / Variants\n- Normal rewind: checkpoint entry exists; session history branches from that exact entry.\n- Fallback rewind: checkpoint entry ID is missing from the current session tree; rewind branches from root and logs a warning.\n- Immediate turn-end apply: rewind side effects happen only after the surrounding assistant turn finishes, not inside `RewindTool.execute()`.\n\n## Side Effects\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Replaces in-memory conversation history with the prefix ending at the checkpoint tool result.\n - Adds a hidden custom message `rewind-report` carrying the retained report.\n - Clears the active checkpoint state and pending rewind report.\n - Repositions the persisted session leaf to the checkpoint branch point and appends new session entries.\n- Filesystem\n - Persists the new `branch_summary` and `custom_message` entries into the session `.jsonl` file through normal `SessionManager` append persistence.\n - Session files are named `<ISO-timestamp-with-:-and-.-replaced>_<uuidv7>.jsonl` in the session directory; default directory selection is documented in `SessionManager.create()` as `~/.omp/agent/sessions/<encoded-cwd>/` when no override is passed.\n- User-visible prompts / interactive UI\n - The tool result itself is visible.\n - The persisted `branch_summary` becomes an LLM-visible `branchSummary` message when context is rebuilt from `SessionManager.buildSessionContext()`; `messages.ts` renders it as a user-role text message using `prompts/compaction/branch-summary-context.md`.\n - The persisted `rewind-report` custom message also participates in rebuilt LLM context because `custom_message` entries are converted through `createCustomMessage()`.\n- Background work / cancellation\n - Rewind application is deferred to `turn_end`. There is no separate job object or cancel handle.\n\n## Limits & Caps\n- Availability is gated by `checkpoint.enabled`, default `false`, in `packages/coding-agent/src/config/settings-schema.ts`.\n- Top-level sessions only.\n- Requires exactly one active checkpoint; there is no path to name or choose among multiple checkpoints.\n- Report text must be non-empty after `trim()`.\n- Rewind restores only the message prefix recorded by `checkpointMessageCount`; there is no file restore, artifact restore, blob restore, or process restore path.\n- Persisted report/summary content is still subject to the global session persistence cap `MAX_PERSIST_CHARS = 500_000` in `packages/coding-agent/src/session/session-manager.ts`.\n\n## Errors\n- `ToolError(\"Checkpoint not available in subagents.\")` — thrown for subagent sessions.\n- `ToolError(\"No active checkpoint.\")` — thrown when no checkpoint state is present.\n- `ToolError(\"Report cannot be empty.\")` — thrown when the trimmed report is empty.\n- Missing checkpoint entry IDs during apply do not fail the tool call; `#applyRewind()` catches the error, logs `Rewind branch checkpoint missing, falling back to root`, and branches from root.\n- If the agent turn is aborted while a checkpoint is active, `AgentSession` clears checkpoint state rather than applying a delayed rewind.\n\n## Notes\n- Checkpoint selection is implicit. `rewind` always targets the single `#checkpointState` captured by the last successful `checkpoint`; there is no checkpoint list, label, or ID parameter.\n- Restored state is transcript/session-tree state only:\n - in-memory `agent.state.messages` prefix up to `checkpointMessageCount`\n - persisted session leaf reset to `checkpointEntryId` or root fallback\n - retained rewind report as `branch_summary` and hidden `rewind-report` custom message\n- Not restored:\n - filesystem contents\n - git state\n - artifacts under `packages/coding-agent/src/session/artifacts.ts`\n - blob-store payloads under `packages/coding-agent/src/session/blob-store.ts`\n - prompt history rows in `packages/coding-agent/src/session/history-storage.ts`\n - auth or other agent storage in `packages/coding-agent/src/session/agent-storage.ts`\n- There is no concurrent-edit reconciliation. If code or session-adjacent state changes during the checkpoint window, rewind does not merge or revert them; it only drops conversation context and rewires the session branch.\n- Rewind is not destructive to persisted session history. `branchWithSummary()` appends a new `branch_summary` entry and moves the leaf; it does not delete the abandoned path from the `.jsonl` session log. The active context is cut over to the new branch, but the old entries remain in session storage.\n",
86
85
  "tools/search.md": "# search\n\n> Search file contents with a regex across files, directories, globs, and internal URLs.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/search.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/search.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/match-line-format.ts` — model-facing anchor formatting.\n - `packages/coding-agent/src/tools/path-utils.ts` — path normalization, glob splitting, internal URL resolution.\n - `packages/coding-agent/src/tools/file-recorder.ts` — file ordering for grouped output.\n - `packages/coding-agent/src/tools/grouped-file-output.ts` — grouped per-file text layout.\n - `packages/coding-agent/src/session/streaming-output.ts` — line truncation and final byte truncation.\n - `packages/coding-agent/src/config/settings-schema.ts` — default context lines.\n - `packages/natives/native/index.d.ts` — native `grep()` types exposed to TS.\n - `crates/pi-natives/src/grep.rs` — native regex/file search implementation.\n - `docs/natives-text-search-pipeline.md` — native search pipeline overview.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `pattern` | `string` | Yes | Regex pattern. `search.ts` trims it and rejects empty input. The native matcher enables multiline only when the pattern text contains a literal newline or the two-character sequence `\\\\n`. The model prompt explicitly documents literal-brace escaping such as ``interface\\\\{\\\\}``, although the native layer also auto-escapes braces that cannot be valid repetition quantifiers. |\n| `paths` | `string[]` | Yes | One or more file paths, directory paths, glob-like paths, or internal URLs. Empty strings are rejected after trimming/quote stripping. Internal URLs must resolve to a backing file and cannot contain glob characters. |\n| `i` | `boolean` | No | Case-insensitive search. Defaults to `false`. Passed to native `ignoreCase`. |\n| `gitignore` | `boolean` | No | Respect `.gitignore` during directory scans. Defaults to `true`. Passed to native `gitignore`. |\n| `skip` | `number` | No | Global match offset. Defaults to `0`. `search.ts` floors finite numbers and rejects negative or non-finite values. |\n\n## Outputs\nThe tool returns a single text block in `content[0].text` plus structured `details`.\n\n- Match lines are formatted by `formatMatchLine()` as `*<anchor>|<line>` for matches and ` <anchor>|<line>` for context.\n - Hashline mode: `*5th|content`, ` 9x}|content`.\n - Plain mode: `*5|content`, ` 9|content`.\n- Directory results are grouped by file, with `# <path>` headings and blank lines between groups.\n- `details` may include:\n - `scopePath` — formatted search scope.\n - `matchCount`, `fileCount`, `files`, `fileMatches` — counts for the returned page, not necessarily total corpus counts.\n - `matchLimitReached` — visible-page limit hit (`100`).\n - `resultLimitReached` — native preselection limit hit (`500`).\n - `linesTruncated` — one or more matched lines were shortened to `1024` chars plus `…`.\n - `truncated` and `meta.truncation` — final text output was head-truncated by `truncateHead()`.\n - `displayContent` — TUI-only rendering text with `│` gutters instead of model anchors.\n - `missingPaths` — multi-path entries skipped because their base path did not exist.\n- No-match result text is `No matches found`, optionally followed by `Skipped missing paths: ...`.\n\n## Flow\n1. `SearchTool.execute()` validates and normalizes input in `packages/coding-agent/src/tools/search.ts`:\n - trims `pattern`, rejects empty patterns;\n - normalizes `skip` to a non-negative integer;\n - reads `search.contextBefore` and `search.contextAfter` from session settings (`1` and `3` by default);\n - enables multiline only when `pattern` contains `\\n` or an actual newline.\n2. Each `paths` entry is normalized with `normalizePathLikeInput()`.\n3. Internal URLs are resolved through `session.internalRouter`:\n - glob metacharacters (`*`, `?`, `[`, `{`) are rejected for internal URLs;\n - URLs without `resource.sourcePath` fail;\n - immutable sources are tracked so output can suppress editable hashline anchors per file.\n4. For multi-path calls, `partitionExistingPaths()` skips only ENOENT entries. If every entry is missing, the tool errors.\n5. Path resolution branches:\n - one entry: `parseSearchPath()` splits `basePath` and optional glob;\n - multiple entries: `resolveExplicitSearchPaths()` computes a common base directory, brace-union glob, exact-file list, or degenerate-root target list.\n6. `search.ts` stats the resolved base path to decide file vs directory behavior.\n7. It calls native `grep()` from `@oh-my-pi/pi-natives` with:\n - `pattern`, `ignoreCase`, `multiline`, `gitignore`;\n - `hidden: true`;\n - `cache: false`;\n - `contextBefore` / `contextAfter` from settings;\n - `maxColumns: 1024`;\n - `mode: content`.\n8. Native execution happens in `crates/pi-natives/src/grep.rs`:\n - `build_matcher()` sanitizes non-quantifier braces before regex compile;\n - if compile fails with unopened/unclosed-group errors, it retries after escaping previously unescaped parentheses;\n - directory scans use the grep pipeline described in `docs/natives-text-search-pipeline.md`.\n9. Search dispatch differs by resolved path set:\n - exact explicit files or degenerate-root multi-targets: JS loops over targets and merges `grep()` results itself;\n - single file/directory base: one `grep()` call handles offset/limit natively.\n10. JS output shaping then:\n - round-robins directory matches down to `100` visible matches so one file does not monopolize the page;\n - keeps the first `100` file matches for single-file searches;\n - formats lines through `formatMatchLine()` for the model and `formatCodeFrameLine()` for TUI;\n - records non-truncated matched/context lines into the session file-read cache with `recordSparse()`.\n11. Final text is passed through `truncateHead(rawOutput, { maxLines: Number.MAX_SAFE_INTEGER })`, so the effective cap is the default byte cap from `streaming-output.ts`, not the default line cap.\n12. `toolResult()` attaches text plus limit/truncation metadata.\n\n## Modes / Variants\n1. **Single file path**\n - `grep()` searches one file.\n - Output is a flat list of match/context lines.\n - Visible limit is the first `100` matches after native offset handling.\n2. **Single directory path or single glob-like path**\n - `parseSearchPath()` may split the input into `path` + `glob`.\n - One native `grep()` scans the directory tree with `gitignore` and `hidden:true`.\n - Native `offset` handles `skip` globally across files.\n - JS round-robins the returned matches to `100` visible rows.\n3. **Multiple explicit paths/globs**\n - `resolveExplicitSearchPaths()` collapses them into a common base and either a brace-union glob, an explicit file list, or per-target searches when the only common base is the filesystem root.\n - Missing entries are skipped non-fatally unless all are missing.\n4. **Internal URL paths**\n - Supported only when the internal resource resolves to a real backing file.\n - No internal-URL globbing.\n - Immutable sources switch to the immutable display mode when formatting anchors.\n\n## Side Effects\n- Filesystem\n - Stats resolved search roots and input paths.\n - Reads matched files through native `grep()`.\n - Records sparse matched/context lines into the session file-read cache via `getFileReadCache(...).recordSparse(...)`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Reads session settings for context defaults.\n - Uses `session.internalRouter` to resolve internal URLs.\n - Populates tool `details.meta` with truncation/limit metadata.\n- Background work / cancellation\n - Wrapped in `untilAborted(signal, ...)` at the JS level.\n - `search.ts` does not pass `signal` or `timeoutMs` into native `grep()`, so native grep cancellation/timeouts are not used by this tool.\n\n## Limits & Caps\n- Visible page limit: `100` matches (`DEFAULT_MATCH_LIMIT` in `packages/coding-agent/src/tools/search.ts`).\n- Native preselection limit: `500` matches (`internalLimit = Math.min(DEFAULT_MATCH_LIMIT * 5, 2000)` in `packages/coding-agent/src/tools/search.ts`).\n- Line truncation: `1024` characters per emitted line (`DEFAULT_MAX_COLUMN` in `packages/coding-agent/src/session/streaming-output.ts`). Native grep marks truncated lines; JS reports `linesTruncated`.\n- Final text truncation: `truncateHead()` default byte cap `50 * 1024` bytes (`DEFAULT_MAX_BYTES` in `packages/coding-agent/src/session/streaming-output.ts`). `search.ts` overrides `maxLines` to `Number.MAX_SAFE_INTEGER`, so normal search output is byte-capped, not line-capped.\n- Context defaults: `search.contextBefore = 1`, `search.contextAfter = 3` in `packages/coding-agent/src/config/settings-schema.ts`.\n- Pagination: `skip` is a global match offset. In single-base searches it is pushed into native `offset`; in exact-file/multi-target aggregation it is applied in JS with `matches.slice(skip)`.\n- Native directory-scan cache: available in `grep.rs`, but this tool always sets `cache: false`.\n\n## Errors\n- `Pattern must not be empty` when trimmed `pattern` is empty.\n- `Skip must be a non-negative number` for negative or non-finite `skip`.\n- `` `paths` must contain non-empty paths or globs `` when any normalized path is empty.\n- `Glob patterns are not supported for internal URLs: ...` for internal URL + glob metacharacters.\n- `Cannot search internal URL without a backing file: ...` when the router resolves a virtual resource without `sourcePath`.\n- `Path not found: ...` when the resolved base path is missing, or when every multi-path entry is missing.\n- Regex compile failures bubble from native `grep()` as tool errors. `search.ts` has a special catch for messages beginning with `regex parse error`, then otherwise rethrows.\n- Multi-file native scans skip per-file open/search failures inside `grep.rs`; the scan continues with surviving files.\n\n## Notes\n- The model-facing prompt documents standard regex syntax plus two search-specific rules: escape literal braces, and use `\\n` or a literal newline for cross-line matching.\n- Native `build_matcher()` already auto-escapes braces that cannot be valid quantifiers, so patterns like `${platform}` become searchable instead of failing. Valid quantifiers like `a{2,4}` remain unchanged.\n- Native compile retry also escapes unescaped literal parentheses only after an unopened/unclosed-group parse error. It is a fallback, not a general parser mode.\n- Internal URLs are resolved before path existence checks. After resolution, the native layer sees ordinary filesystem paths.\n- `hidden:true` is hard-coded in `search.ts`; there is no model-facing flag to exclude dotfiles.\n- `gitignore:false` only affects native directory traversal. It does not disable the tool's own path normalization or explicit-file handling.\n- When `paths` resolves to multiple exact files, `search.ts` does not apply the native `500` match cap and reports `totalMatches` internally as the post-skip length for that branch.\n- The anchor suffix in hashline mode comes from `computeLineHash()` in `packages/coding-agent/src/hashline/hash.ts`; `search` itself only formats it.\n",
87
- "tools/search_tool_bm25.md": "# search_tool_bm25\n\n> Search the hidden tool-discovery index and activate the top matches for the current session.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/search-tool-bm25.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/search-tool-bm25.md`\n- Key collaborators:\n - `packages/coding-agent/src/tool-discovery/tool-index.ts` — discoverable-tool metadata and BM25 index/search.\n - `packages/coding-agent/src/session/agent-session.ts` — session discovery mode, corpus assembly, activation, cache invalidation.\n - `packages/coding-agent/src/sdk.ts` — initial hiding of discoverable built-ins and prompt-time discoverable summary.\n - `packages/coding-agent/src/tools/index.ts` — tool-session discovery hooks, essential/discoverable load modes, registry wiring.\n - `packages/coding-agent/src/config/settings-schema.ts` — `tools.discoveryMode` and legacy `mcp.discoveryMode` settings.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `query` | `string` | Yes | Natural-language or keyword query. Trimmed before search; empty-after-trim is rejected. |\n| `limit` | `integer` | No | Max matches to return and activate. Minimum `1`. Defaults to `8` (`DEFAULT_LIMIT`). |\n\n## Outputs\n- Single-shot `AgentToolResult`.\n- Model-visible `content` is one text part containing JSON with:\n\n```json\n{\"query\":\"...\",\"activated_tools\":[\"...\"],\"match_count\":2,\"total_tools\":17}\n```\n\n- Runtime-only `details` carries the ranked matches used by the TUI renderer:\n - `query`, `limit`, `total_tools`\n - `activated_tools`: tool names activated by this call\n - `active_selected_tools`: cumulative discovered-tool selections still active\n - `tools`: array of match objects with\n - `name`\n - `label`\n - `description` (`tool.summary`; this is the only snippet-like field)\n - optional `server_name`\n - optional `mcp_tool_name`\n - `schema_keys`\n - `score` rounded to 6 decimals\n- The renderer shows a status line plus up to 5 collapsed tree items by default (`COLLAPSED_MATCH_LIMIT`), each with label, optional server name, score to 3 decimals, and truncated description. The ranked match list is not serialized into `content`.\n\n## Flow\n1. `SearchToolBm25Tool.createIf()` in `packages/coding-agent/src/tools/search-tool-bm25.ts` exposes the tool only when `tools.discoveryMode !== \"off\"` or legacy `mcp.discoveryMode === true`, and only if the session implements the discovery hooks.\n2. `description` is rendered from `packages/coding-agent/src/prompts/tools/search-tool-bm25.md` via `renderSearchToolBm25Description()`, using the current discoverable-tool list plus per-server summary/count.\n3. `execute()` re-checks capability and settings:\n - missing discovery hooks -> `ToolError(\"Tool discovery is unavailable in this session.\")`\n - discovery disabled -> `ToolError(\"Tool discovery is disabled. Enable tools.discoveryMode or mcp.discoveryMode to use search_tool_bm25.\")`\n4. `query` is trimmed and validated; `limit` is defaulted/validated.\n5. `getDiscoverableToolSearchIndexForExecution()` fetches the cached generic search index from the session when available, otherwise falls back to the legacy MCP cache, otherwise rebuilds an index from the current discoverable-tool list.\n6. `getSelectedToolNames()` reads the current discovered selections so already-selected tools can be excluded from fresh results.\n7. `searchDiscoverableTools()` in `packages/coding-agent/src/tool-discovery/tool-index.ts` tokenizes the query, scores every document with BM25, sorts by descending score then `tool.name`, and returns up to `searchIndex.documents.length` results; `execute()` then filters already-selected names and slices to `limit`.\n8. If any matches remain, `activateTools()` activates all matched tool names through `session.activateDiscoveredTools()` or legacy `activateDiscoveredMCPTools()`.\n9. `details` is assembled from the activated names, current selected names, corpus size, and formatted matches; `content` is reduced to the compact JSON summary from `buildSearchToolBm25Content()`.\n10. `searchToolBm25Renderer` renders either:\n - the structured `details` view, or\n - a fallback text-only warning block if `details` is absent.\n\n## Modes / Variants\n- Discovery-mode gating:\n - `tools.discoveryMode = \"all\"`: searches hidden discoverable built-ins plus hidden MCP tools.\n - `tools.discoveryMode = \"mcp-only\"`: searches hidden MCP tools only.\n - legacy `mcp.discoveryMode = true` with `tools.discoveryMode = \"off\"`: same as MCP-only.\n- Search-index source:\n - generic cached discoverable index from the session\n - legacy cached MCP index, cast to the generic shape\n - rebuilt ad hoc from the current discoverable-tool list if neither cache path works\n- Activation backend:\n - generic `activateDiscoveredTools()`\n - legacy `activateDiscoveredMCPTools()` fallback\n\n## Side Effects\n- Session state\n - Adds matched tools to the active session tool set through `activateDiscoveredTools()` / `activateDiscoveredMCPTools()`.\n - Updates discovered-tool selection state so repeated searches accumulate selections instead of replacing them.\n - Invalidates the cached discoverable search index when newly activated built-ins change the hidden corpus (`packages/coding-agent/src/session/agent-session.ts`).\n - Tool availability changes before the next model call in the same turn; the prompt text says this explicitly.\n- User-visible prompts / interactive UI\n - The tool description includes discoverable server summaries and total discoverable-tool count.\n - The TUI renderer shows ranked matches, but the model-visible text summary does not.\n\n## Limits & Caps\n- Default result cap: `8` (`DEFAULT_LIMIT` in `packages/coding-agent/src/tools/search-tool-bm25.ts`).\n- `limit` must be a positive integer; no tool-level upper bound beyond corpus size.\n- Renderer collapsed list cap: `5` (`COLLAPSED_MATCH_LIMIT`).\n- Renderer truncation widths:\n - label: `72` chars (`MATCH_LABEL_LEN`)\n - description: `96` chars (`MATCH_DESCRIPTION_LEN`)\n- BM25 parameters in `packages/coding-agent/src/tool-discovery/tool-index.ts`:\n - `BM25_K1 = 1.2`\n - `BM25_B = 0.75`\n- Weighted corpus fields (`FIELD_WEIGHTS`):\n - `name`: `6`\n - `label`: `4`\n - `mcpToolName`: `4`\n - `serverName`: `2`\n - `summary`: `2`\n - each `schemaKey`: `1`\n- Summary fallback length for discoverable metadata: first `200` chars of `description` when no explicit summary exists (`getDiscoverableTool()` in `packages/coding-agent/src/tool-discovery/tool-index.ts`).\n\n## Errors\n- `execute()` throws `ToolError` for unavailable discovery hooks, disabled discovery mode, empty trimmed query, and non-positive/non-integer `limit`.\n- `searchDiscoverableTools()` throws `Error(\"Query must contain at least one letter or number.\")` if tokenization produces no alphanumeric tokens; `execute()` catches `Error` and rethrows `ToolError(error.message)`.\n- Empty corpus is not an error; search returns `[]`, activation is skipped, and the renderer message becomes either `No discoverable tools are currently loaded.` or `No matching tools found.`\n- `getDiscoverableToolsForDescription()` and `getDiscoverableToolSearchIndexForExecution()` swallow discovery-hook/cache errors and fall back to an empty corpus or rebuilt index.\n\n## Notes\n- The tool wire name stays `search_tool_bm25` for persisted-session back-compat, even though the source file is `search-tool-bm25.ts`.\n- Corpus composition is session-dependent and excludes already-active tools:\n - MCP entries come from `#discoverableMCPTools`, filtered to names not currently active, mapped with `summary = description`.\n - Built-in entries appear only in `\"all\"` mode and only for registry tools whose `loadMode === \"discoverable\"` and are not currently active.\n - Hidden/internal built-ins are intentionally excluded from the built-in corpus: `resolve`, `yield`, `exit_plan_mode`, `report_finding`, `report_tool_issue` are called out in the `#collectDiscoverableBuiltinTools()` comment.\n- `DiscoverableToolSource` includes `\"extension\"` and `\"custom\"`, but `AgentSession.getDiscoverableTools()` currently assembles only built-in and MCP sources.\n- On startup, `packages/coding-agent/src/sdk.ts` hides non-essential discoverable built-ins in `tools.discoveryMode = \"all\"`; defaults are `read`, `bash`, and `edit` unless `tools.essentialOverride` changes them.\n- Query tokenization is simple and deterministic: camelCase is split, non-alphanumerics become spaces, tokens are lowercased, and only non-empty alphanumeric tokens survive.\n- Scores are rounded differently by surface: `details.tools[].score` keeps 6 decimals; the TUI line renders 3.\n",
86
+ "tools/search_tool_bm25.md": "# search_tool_bm25\n\n> Search the hidden tool-discovery index and activate the top matches for the current session.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/search-tool-bm25.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/search-tool-bm25.md`\n- Key collaborators:\n - `packages/coding-agent/src/tool-discovery/tool-index.ts` — discoverable-tool metadata and BM25 index/search.\n - `packages/coding-agent/src/session/agent-session.ts` — session discovery mode, corpus assembly, activation, cache invalidation.\n - `packages/coding-agent/src/sdk.ts` — initial hiding of discoverable built-ins and prompt-time discoverable summary.\n - `packages/coding-agent/src/tools/index.ts` — tool-session discovery hooks, essential/discoverable load modes, registry wiring.\n - `packages/coding-agent/src/config/settings-schema.ts` — `tools.discoveryMode` and legacy `mcp.discoveryMode` settings.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `query` | `string` | Yes | Natural-language or keyword query. Trimmed before search; empty-after-trim is rejected. |\n| `limit` | `integer` | No | Max matches to return and activate. Minimum `1`. Defaults to `8` (`DEFAULT_LIMIT`). |\n\n## Outputs\n- Single-shot `AgentToolResult`.\n- Model-visible `content` is one text part containing JSON with:\n\n```json\n{\"query\":\"...\",\"activated_tools\":[\"...\"],\"match_count\":2,\"total_tools\":17}\n```\n\n- Runtime-only `details` carries the ranked matches used by the TUI renderer:\n - `query`, `limit`, `total_tools`\n - `activated_tools`: tool names activated by this call\n - `active_selected_tools`: cumulative discovered-tool selections still active\n - `tools`: array of match objects with\n - `name`\n - `label`\n - `description` (`tool.summary`; this is the only snippet-like field)\n - optional `server_name`\n - optional `mcp_tool_name`\n - `schema_keys`\n - `score` rounded to 6 decimals\n- The renderer shows a status line plus up to 5 collapsed tree items by default (`COLLAPSED_MATCH_LIMIT`), each with label, optional server name, score to 3 decimals, and truncated description. The ranked match list is not serialized into `content`.\n\n## Flow\n1. `SearchToolBm25Tool.createIf()` in `packages/coding-agent/src/tools/search-tool-bm25.ts` exposes the tool only when `tools.discoveryMode !== \"off\"` or legacy `mcp.discoveryMode === true`, and only if the session implements the discovery hooks.\n2. `description` is rendered from `packages/coding-agent/src/prompts/tools/search-tool-bm25.md` via `renderSearchToolBm25Description()`, using the current discoverable-tool list plus per-server summary/count.\n3. `execute()` re-checks capability and settings:\n - missing discovery hooks -> `ToolError(\"Tool discovery is unavailable in this session.\")`\n - discovery disabled -> `ToolError(\"Tool discovery is disabled. Enable tools.discoveryMode or mcp.discoveryMode to use search_tool_bm25.\")`\n4. `query` is trimmed and validated; `limit` is defaulted/validated.\n5. `getDiscoverableToolSearchIndexForExecution()` fetches the cached generic search index from the session when available, otherwise falls back to the legacy MCP cache, otherwise rebuilds an index from the current discoverable-tool list.\n6. `getSelectedToolNames()` reads the current discovered selections so already-selected tools can be excluded from fresh results.\n7. `searchDiscoverableTools()` in `packages/coding-agent/src/tool-discovery/tool-index.ts` tokenizes the query, scores every document with BM25, sorts by descending score then `tool.name`, and returns up to `searchIndex.documents.length` results; `execute()` then filters already-selected names and slices to `limit`.\n8. If any matches remain, `activateTools()` activates all matched tool names through `session.activateDiscoveredTools()` or legacy `activateDiscoveredMCPTools()`.\n9. `details` is assembled from the activated names, current selected names, corpus size, and formatted matches; `content` is reduced to the compact JSON summary from `buildSearchToolBm25Content()`.\n10. `searchToolBm25Renderer` renders either:\n - the structured `details` view, or\n - a fallback text-only warning block if `details` is absent.\n\n## Modes / Variants\n- Discovery-mode gating:\n - `tools.discoveryMode = \"all\"`: searches hidden discoverable built-ins plus hidden MCP tools.\n - `tools.discoveryMode = \"mcp-only\"`: searches hidden MCP tools only.\n - legacy `mcp.discoveryMode = true` with `tools.discoveryMode = \"off\"`: same as MCP-only.\n- Search-index source:\n - generic cached discoverable index from the session\n - legacy cached MCP index, cast to the generic shape\n - rebuilt ad hoc from the current discoverable-tool list if neither cache path works\n- Activation backend:\n - generic `activateDiscoveredTools()`\n - legacy `activateDiscoveredMCPTools()` fallback\n\n## Side Effects\n- Session state\n - Adds matched tools to the active session tool set through `activateDiscoveredTools()` / `activateDiscoveredMCPTools()`.\n - Updates discovered-tool selection state so repeated searches accumulate selections instead of replacing them.\n - Invalidates the cached discoverable search index when newly activated built-ins change the hidden corpus (`packages/coding-agent/src/session/agent-session.ts`).\n - Tool availability changes before the next model call in the same turn; the prompt text says this explicitly.\n- User-visible prompts / interactive UI\n - The tool description includes discoverable server summaries and total discoverable-tool count.\n - The TUI renderer shows ranked matches, but the model-visible text summary does not.\n\n## Limits & Caps\n- Default result cap: `8` (`DEFAULT_LIMIT` in `packages/coding-agent/src/tools/search-tool-bm25.ts`).\n- `limit` must be a positive integer; no tool-level upper bound beyond corpus size.\n- Renderer collapsed list cap: `5` (`COLLAPSED_MATCH_LIMIT`).\n- Renderer truncation widths:\n - label: `72` chars (`MATCH_LABEL_LEN`)\n - description: `96` chars (`MATCH_DESCRIPTION_LEN`)\n- BM25 parameters in `packages/coding-agent/src/tool-discovery/tool-index.ts`:\n - `BM25_K1 = 1.2`\n - `BM25_B = 0.75`\n- Weighted corpus fields (`FIELD_WEIGHTS`):\n - `name`: `6`\n - `label`: `4`\n - `mcpToolName`: `4`\n - `serverName`: `2`\n - `summary`: `2`\n - each `schemaKey`: `1`\n- Summary fallback length for discoverable metadata: first `200` chars of `description` when no explicit summary exists (`getDiscoverableTool()` in `packages/coding-agent/src/tool-discovery/tool-index.ts`).\n\n## Errors\n- `execute()` throws `ToolError` for unavailable discovery hooks, disabled discovery mode, empty trimmed query, and non-positive/non-integer `limit`.\n- `searchDiscoverableTools()` throws `Error(\"Query must contain at least one letter or number.\")` if tokenization produces no alphanumeric tokens; `execute()` catches `Error` and rethrows `ToolError(error.message)`.\n- Empty corpus is not an error; search returns `[]`, activation is skipped, and the renderer message becomes either `No discoverable tools are currently loaded.` or `No matching tools found.`\n- `getDiscoverableToolsForDescription()` and `getDiscoverableToolSearchIndexForExecution()` swallow discovery-hook/cache errors and fall back to an empty corpus or rebuilt index.\n\n## Notes\n- The tool wire name stays `search_tool_bm25` for persisted-session back-compat, even though the source file is `search-tool-bm25.ts`.\n- Corpus composition is session-dependent and excludes already-active tools:\n - MCP entries come from `#discoverableMCPTools`, filtered to names not currently active, mapped with `summary = description`.\n - Built-in entries appear only in `\"all\"` mode and only for registry tools whose `loadMode === \"discoverable\"` and are not currently active.\n - Hidden/internal built-ins are intentionally excluded from the built-in corpus: `resolve`, `yield`, `report_finding`, `report_tool_issue` are called out in the `#collectDiscoverableBuiltinTools()` comment.\n- `DiscoverableToolSource` includes `\"extension\"` and `\"custom\"`, but `AgentSession.getDiscoverableTools()` currently assembles only built-in and MCP sources.\n- On startup, `packages/coding-agent/src/sdk.ts` hides non-essential discoverable built-ins in `tools.discoveryMode = \"all\"`; defaults are `read`, `bash`, and `edit` unless `tools.essentialOverride` changes them.\n- Query tokenization is simple and deterministic: camelCase is split, non-alphanumerics become spaces, tokens are lowercased, and only non-empty alphanumeric tokens survive.\n- Scores are rounded differently by surface: `details.tools[].score` keeps 6 decimals; the TUI line renders 3.\n",
88
87
  "tools/ssh.md": "# ssh\n\n> Execute one remote command on a discovered SSH host.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/ssh.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/ssh.md`\n- Key collaborators:\n - `packages/coding-agent/src/ssh/ssh-executor.ts` — runs `ssh`, captures output\n - `packages/coding-agent/src/ssh/connection-manager.ts` — master-connection reuse, host probing\n - `packages/coding-agent/src/ssh/sshfs-mount.ts` — optional `sshfs` mount side effect\n - `packages/coding-agent/src/discovery/ssh.ts` — discovers host configs\n - `packages/coding-agent/src/capability/ssh.ts` — canonical host shape\n - `packages/coding-agent/src/session/streaming-output.ts` — tail streaming, truncation, artifacts\n - `packages/coding-agent/src/tools/tool-timeouts.ts` — timeout clamp rules\n - `packages/utils/src/dirs.ts` — user/project ssh config paths\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `host` | `string` | Yes | Host name key from discovered SSH config entries, not an arbitrary hostname/IP. |\n| `command` | `string` | Yes | Remote command string passed to `ssh` as the remote command. |\n| `cwd` | `string` | No | Remote working directory. The tool prepends a shell-specific `cd`/`Set-Location` wrapper. |\n| `timeout` | `number` | No | Timeout in seconds. Default `60`; clamped to `1..3600`. |\n\n## Outputs\nThe tool returns a standard text tool result built in `packages/coding-agent/src/tools/ssh.ts`:\n\n- `content`: one text block containing combined remote stdout+stderr, or `\"(no output)\"` when empty.\n- `details.meta.truncation`: present when output exceeded the in-memory tail window; derived from the executor summary.\n\nStreaming behavior:\n\n- While the command runs, `onUpdate` receives tail-only text snapshots built from `TailBuffer` in `packages/coding-agent/src/session/streaming-output.ts`.\n- Final output is single-shot after process exit.\n\nSide-channel artifacts:\n\n- When session artifact allocation is available and output exceeds the spill threshold, full output is written to a session artifact file and the returned summary carries its `artifactId` internally.\n- The ssh tool itself does not print the `artifact://...` URI into the result text.\n\nFailure behavior:\n\n- Unknown host, missing host config, timeout, cancellation, SSH startup failure, key validation failure, or non-zero remote exit all surface as thrown `ToolError`s.\n- Non-zero remote exit includes captured output plus `Command exited with code N`.\n\n## Flow\n1. `loadSshTool()` in `packages/coding-agent/src/tools/ssh.ts` calls `loadCapability(sshCapability.id, { cwd: session.cwd })` to discover hosts.\n2. `packages/coding-agent/src/discovery/ssh.ts` loads host entries from, in this order: project managed ssh config, user managed ssh config, `ssh.json` in the repo root, `.ssh.json` in the repo root.\n3. `getSSHConfigPath(\"project\")` and `getSSHConfigPath(\"user\")` in `packages/utils/src/dirs.ts` resolve those managed files to `.omp/ssh.json` in the project and `~/.omp/agent/ssh.json` in the user config dir. This tool does not read `~/.ssh/config`.\n4. Capability loading deduplicates by host name with first item winning; provider order is priority-sorted and the SSH JSON provider registers at priority `5`.\n5. `loadHosts()` in `packages/coding-agent/src/tools/ssh.ts` builds `hostsByName` and drops later duplicates again with `if (!hostsByName.has(host.name))`.\n6. Tool description text is built from `packages/coding-agent/src/prompts/tools/ssh.md` plus an `Available hosts:` list. Each host entry calls `getHostInfoForHost()` to show detected shell/OS when cached; otherwise it renders `detecting...`.\n7. On execute, `SshTool.execute()` rejects any `host` not in the discovered host-name set.\n8. `ensureHostInfo()` in `packages/coding-agent/src/ssh/connection-manager.ts` ensures an SSH master connection exists, loads cached host info from disk if present, and probes remote OS/shell when cache is missing or stale.\n9. `buildRemoteCommand()` in `packages/coding-agent/src/tools/ssh.ts` prepends a cwd change when `cwd` is provided:\n - Unix-like or Windows compat shells: `cd -- '<cwd>' && <command>`\n - Windows PowerShell: `Set-Location -Path '<cwd>'; <command>`\n - Windows cmd: `cd /d \"<cwd>\" && <command>`\n10. `clampTimeout(\"ssh\", rawTimeout)` applies the `1..3600` second clamp from `packages/coding-agent/src/tools/tool-timeouts.ts`.\n11. `executeSSH()` in `packages/coding-agent/src/ssh/ssh-executor.ts` calls `ensureConnection(host)` again, opportunistically mounts the remote host root with `sshfs` if available, optionally wraps the command in `bash -c` or `sh -c` for Windows compat mode, then spawns `ssh` with `ptree.spawn`.\n12. Output from both stdout and stderr is piped into one `OutputSink`; chunks are sanitized and forwarded to streaming updates through `streamTailUpdates()`.\n13. On normal exit, the sink returns combined output plus truncation counters. On timeout or abort, `executeSSH()` returns `cancelled: true` and prefixes the output with a notice line such as `[SSH: ...]` or `[Command aborted: ...]`.\n14. `SshTool.execute()` converts `cancelled: true` into `ToolError`, converts non-zero exit codes into `ToolError`, otherwise returns the text result with truncation metadata.\n\n## Modes / Variants\n- **Tool unavailable**: `loadSshTool()` returns `null` when discovery finds no hosts, so the tool is not registered for that session.\n- **Unix-like target**: remote command is passed through directly, with optional `cd -- ... &&` prefix.\n- **Windows native shell**: cwd wrapper uses PowerShell `Set-Location` or cmd `cd /d`; command otherwise runs in the remote default Windows shell.\n- **Windows compat shell**: if host probing finds `bash` or `sh` on Windows, `executeSSH()` wraps the remote command as `bash -c '...'` or `sh -c '...'`. Host config can force compat on/off with `compat`.\n- **Cached vs probed host info**: shell/OS detection comes from in-memory cache, persisted JSON under the remote-host dir, or a fresh probe over SSH.\n- **Truncated vs untruncated output**: small output stays in memory; large output keeps only the last 50 KiB in memory and may spill full output to an artifact file.\n\n## Side Effects\n- Filesystem\n - Reads managed SSH config JSON plus legacy `ssh.json` / `.ssh.json`.\n - Validates private-key path existence and permissions before connecting.\n - Persists probed host info as JSON under the remote-host cache dir via `persistHostInfo()`.\n - May create the SSH control socket dir and, when `sshfs` exists, remote mount dirs.\n - May write full command output to a session artifact file.\n- Network\n - Opens SSH connections to the selected host.\n - May issue extra probe commands to detect OS/shell and compat shells.\n- Subprocesses / native bindings\n - Requires `ssh` on `PATH`; spawns it for connection checks, master startup, probing, and command execution.\n - May call `sshfs`, `mountpoint`, `fusermount`/`fusermount3`, or `umount`.\n - Sanitizes streamed text with `@oh-my-pi/pi-natives` text sanitization.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Uses session artifact allocation when available.\n - Registers postmortem cleanup hooks for SSH master connections and sshfs mounts.\n - Tool concurrency is `exclusive`, so the agent scheduler should not run multiple ssh tool calls concurrently.\n- Background work / cancellation\n - Process spawn receives the tool `AbortSignal`.\n - Cancellation/timeout ends the running ssh process and returns a cancelled result that the tool turns into an error.\n\n## Limits & Caps\n- Timeout defaults/clamps: `default=60`, `min=1`, `max=3600` in `packages/coding-agent/src/tools/tool-timeouts.ts`.\n- Output tail window: `DEFAULT_MAX_BYTES = 50 * 1024` in `packages/coding-agent/src/session/streaming-output.ts`.\n- Output sink spill threshold defaults to the same `50 KiB`; once exceeded, only the tail remains in memory.\n- SSH master reuse persistence: `ControlPersist=3600` in `packages/coding-agent/src/ssh/connection-manager.ts` and `packages/coding-agent/src/ssh/sshfs-mount.ts`.\n- SSH host info schema version: `HOST_INFO_VERSION = 2` in `packages/coding-agent/src/ssh/connection-manager.ts`; stale cache entries are reprobed.\n- Streaming tail buffer compacts after more than `10` pending chunks (`MAX_PENDING`) before trimming.\n\n## Errors\n- `Unknown SSH host: ... Available hosts: ...` when the model passes a host name not present in discovery.\n- `SSH host not loaded: ...` if the discovered-name set and `hostsByName` map diverge.\n- `ssh binary not found on PATH` when `ssh` is unavailable.\n- `SSH key not found: ...`, `SSH key is not a file: ...`, or `SSH key permissions must be 600 or stricter: ...` from key validation.\n- `Failed to start SSH master for <target>: <stderr>` when control-master startup fails.\n- Non-zero remote command exit becomes `ToolError` with captured output and `Command exited with code N`.\n- Timeout becomes a cancelled result with output notice `[SSH: <timeout message>]`, then `ToolError`.\n- Abort becomes a cancelled result with output notice `[Command aborted: <message>]`, then `ToolError`.\n- `sshfs` mount failures are logged and ignored in `executeSSH()`; they do not fail the tool call.\n- Discovery parse problems do not fail tool loading; they become capability warnings. If all sources are empty/invalid, the tool simply does not load.\n\n## Notes\n- Host discovery is JSON-based only. The tool does not parse OpenSSH config files.\n- Discovery expands environment variables recursively in the parsed JSON and expands `~` in `key`/`keyPath`.\n- Host names are capability keys; the model must pass the config key, not the raw hostname.\n- Commands run without a PTY. `executeSSH()` uses `ptree.spawn(..., { stdin: \"pipe\", stderr: \"full\" })` and does not request an interactive terminal.\n- The tool exposes `cwd` but no `env`, `pty`, upload, download, or explicit file-transfer fields.\n- Lower layers support an `artifactId` for full output and a `remotePath` mount target, but `SshTool.execute()` does not expose those knobs.\n- Both stdout and stderr are merged into one output stream; ordering is whatever arrives through the two streams.\n- `StrictHostKeyChecking=accept-new` and `BatchMode=yes` are always set for connection checks, master startup, and command runs.\n- Connection reuse is keyed by discovered host name, not by raw target tuple alone.\n- `closeAllConnections()` and sshfs unmount cleanup run through postmortem hooks, not per-call teardown.\n",
89
88
  "tools/task.md": "# task\n\n> Launch subagents for parallel, optionally isolated work.\n\n## Source\n- Entry: `packages/coding-agent/src/task/index.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/task.md`\n- Key collaborators:\n - `packages/coding-agent/src/task/types.ts` — dynamic schema, progress/result types, output caps.\n - `packages/coding-agent/src/task/discovery.ts` — discover project/user/plugin/bundled agents.\n - `packages/coding-agent/src/task/agents.ts` — bundled agent definitions and frontmatter parsing.\n - `packages/coding-agent/src/task/executor.ts` — create child sessions, run subagents, collect output.\n - `packages/coding-agent/src/task/parallel.ts` — concurrency-limited scheduling and async semaphore.\n - `packages/coding-agent/src/task/isolation-backend.ts` — isolation backend resolution and platform fallback.\n - `packages/coding-agent/src/task/worktree.ts` — worktree / FUSE / ProjFS setup, patch capture, branch merge.\n - `packages/coding-agent/src/task/output-manager.ts` — session-scoped `agent://` id allocation.\n - `packages/coding-agent/src/task/simple-mode.ts` — `default` / `schema-free` / `independent` field gating.\n - `packages/coding-agent/src/internal-urls/agent-protocol.ts` — resolve `agent://<id>` to saved subagent output.\n - `packages/coding-agent/src/tools/index.ts` — tool registration and recursion-depth gating.\n - `packages/coding-agent/src/sdk.ts` — child-session router/tool wiring and per-subagent `AgentOutputManager`.\n - `docs/task-agent-discovery.md` — deeper discovery and precedence notes.\n - `docs/handoff-generation-pipeline.md` — session artifact/handoff persistence patterns used by the wider session layer.\n\n## Inputs\n\n### Default mode (`task.simple = \"default\"`)\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `agent` | `string` | Yes | Exact agent name for every task item. Resolved at execution time through `discoverAgents(...)`. |\n| `tasks` | `Array<{ id: string; description: string; assignment: string }>` | Yes | Batch of small, self-contained task items. `id` max length 48 in schema; duplicate ids are rejected case-insensitively at runtime. |\n| `context` | `string` | No | Shared background prepended to every subagent system prompt. Trimmed before use. |\n| `schema` | `string` | No | JSON-encoded JTD schema. Overrides agent/session output schema when this mode allows task-level schemas. |\n| `isolated` | `boolean` | No | Only present when the tool is created with isolation enabled. Requests isolated execution for the whole batch. |\n\n`tasks[].description` is UI-only. `tasks[].assignment` is the actual per-task instruction.\n\n### Schema-free mode (`task.simple = \"schema-free\"`)\n\nSame as default, except `schema` is rejected by `validateTaskModeParams(...)` in `packages/coding-agent/src/task/index.ts`.\n\n### Independent mode (`task.simple = \"independent\"`)\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `agent` | `string` | Yes | Exact agent name. |\n| `tasks` | `Array<{ id: string; description: string; assignment: string }>` | Yes | Same item shape, but each `assignment` must carry all required background because shared `context` is disabled. |\n| `isolated` | `boolean` | No | Same conditional field as above. |\n\nIn this mode both `context` and `schema` are rejected.\n\n## Outputs\nThe tool returns one text block plus `details: TaskToolDetails`.\n\n`details` fields:\n- `projectAgentsDir: string | null` — nearest discovered project `agents/` dir.\n- `results: SingleResult[]` — one entry per task in input order for synchronous execution; empty for async-launch responses.\n- `totalDurationMs: number`\n- `usage?: Usage` — sum of per-subagent assistant-message usage.\n- `outputPaths?: string[]` — written `.md` artifact paths for completed subagent outputs.\n- `progress?: AgentProgress[]` — live or final per-task progress snapshots.\n- `async?: { state: \"running\" | \"completed\" | \"failed\"; jobId: string; type: \"task\" }` — present for background execution updates/results.\n\n`SingleResult` includes:\n- identity: `index`, `id`, `agent`, `agentSource`, `description`, optional `assignment`\n- status: `exitCode`, optional `error`, optional `aborted`, optional `abortReason`\n- output: `output`, `stderr`, `truncated`, `durationMs`, `tokens`\n- artifact metadata: `outputPath?`, `patchPath?`, `branchName?`, `nestedPatches?`, `outputMeta?`\n- extracted tool data: `extractedToolData?` from registered subprocess tool handlers such as `yield` and `report_finding`\n\nArtifacts and side channels:\n- Every subagent with an artifacts dir writes `<id>.md`; `agent://<id>` resolves to that file.\n- If the output file is JSON, `agent://<id>/<path>` and `agent://<id>?q=<query>` perform JSON extraction in `packages/coding-agent/src/internal-urls/agent-protocol.ts`.\n- When the parent session persists artifacts, each subagent also gets `<id>.jsonl` session history.\n- Isolated patch mode writes `<id>.patch` per successful task before merge.\n- Async mode returns immediately after job registration, then emits `onUpdate(...)` progress snapshots and later hands completion to the session async-job pipeline.\n\n## Flow\n1. `TaskTool.create(...)` in `packages/coding-agent/src/task/index.ts` calls `discoverAgents(session.cwd)` once to build the dynamic prompt description from current agents and `task.simple` capabilities.\n2. `execute(...)` validates mode-gated fields with `validateTaskModeParams(...)`.\n3. It decides async vs sync:\n - sync when `async.enabled` is false\n - sync when the selected cached agent has `blocking === true`\n - sync when `tasks.length === 0`\n - otherwise async job scheduling\n4. Async path:\n - allocate unique output ids with `AgentOutputManager.allocateBatch(...)`\n - create one async job per task through `session.asyncJobManager.register(...)`\n - limit concurrent job bodies with `Semaphore(task.maxConcurrency)` from `packages/coding-agent/src/task/parallel.ts`\n - each job body calls `#executeSync(...)` with a one-task batch and the preallocated id\n - `onUpdate(...)` emits aggregate `progress` snapshots and `details.async`\n5. Sync path (`#executeSync(...)`) rediscovers agents from disk via `discoverAgents(...)`, so runtime resolution can differ from the earlier prompt description.\n6. It resolves the requested agent with `getAgent(...)`, rejects unknown or disabled agents, and enforces parent spawn policy plus `PI_BLOCKED_AGENT` self-recursion prevention.\n7. It derives the effective output schema in priority order: task call `schema` (if allowed) → agent frontmatter `output` → inherited parent session schema.\n8. It validates task ids: missing ids and case-insensitive duplicates are immediate errors.\n9. If `isolated` was requested, it requires a git repo (`getRepoRoot(...)` / `captureBaseline(...)`) and resolves the actual backend through `resolveIsolationBackendForTaskExecution(...)`.\n10. It chooses an artifacts dir from the parent session when available, otherwise a temp dir, and writes `context.md` there when `session.getCompactContext?.()` returns content.\n11. It allocates unique ids again if the caller did not preallocate them, then builds `tasksWithUniqueIds`.\n12. For each task, it seeds an `AgentProgress` entry and runs `runTask(...)` through `mapWithConcurrencyLimit(...)` using `task.maxConcurrency`.\n13. Non-isolated `runTask(...)` calls `runSubprocess(...)` directly with parent cwd.\n14. Isolated `runTask(...)`:\n - creates an isolation workspace (`ensureWorktree(...)`, `ensureFuseOverlay(...)`, or `ensureProjfsOverlay(...)`)\n - applies the captured baseline for worktrees\n - runs `runSubprocess(...)` inside that workspace\n - on success, either commits to a per-task branch (`mergeMode === \"branch\"`) or captures a patch with `captureDeltaPatch(...)`\n - always cleans up the isolation workspace/backend\n15. `runSubprocess(...)` in `packages/coding-agent/src/task/executor.ts` creates a child agent session with:\n - isolated settings snapshot via `Settings.isolated(...)`, forcing `async.enabled = false` and `bash.autoBackground.enabled = false`\n - child `agentId` / `parentTaskPrefix` equal to the allocated task id\n - child internal URL router and `AgentOutputManager` from `packages/coding-agent/src/sdk.ts`\n - the shared `context`, optional `context.md` reference, optional isolation worktree path, output schema, and IRC peer roster in the system prompt template\n16. Child tool availability is derived from the agent definition plus runtime guards:\n - explicit `agent.tools` if provided\n - auto-add `task` when the agent has `spawns` and recursion depth allows it\n - remove `task` at or past `task.maxRecursionDepth`\n - expand `exec` to `eval` and `bash`\n - strip parent-owned `todo_write` after session creation\n17. `runSubprocess(...)` subscribes to child agent events, coalesces progress updates every 150 ms, forwards lifecycle/progress events on the parent event bus, and extracts tool data through `subprocessToolRegistry`.\n18. The child must finish through the hidden `yield` tool. If it does not, `runSubprocess(...)` sends up to 3 reminder prompts; the last reminder forces `toolChoice = yield` when supported.\n19. Finalization uses `finalizeSubprocessOutput(...)` to reconcile raw assistant text, `yield` payloads, structured schemas, `report_finding` data, and abort states. Output is truncated with `MAX_OUTPUT_BYTES` / `MAX_OUTPUT_LINES` before returning to the parent, but the full raw output is still written to `<id>.md`.\n20. After all sync tasks finish, `#executeSync(...)` aggregates usage, collects artifact paths, and if isolation was used merges results back:\n - branch mode: cherry-pick per-task branches with `mergeTaskBranches(...)`, then delete merged branches with `cleanupTaskBranches(...)`\n - patch mode: combine non-empty patch artifacts, dry-check with `git.patch.canApplyText(...)`, then apply or leave manual artifacts\n - nested repo patches are applied separately with `applyNestedPatches(...)`\n21. The final text summary is rendered from `packages/coding-agent/src/prompts/tools/task-summary.md` and includes `agent://<id>` handles for outputs that exist.\n\n## Modes / Variants\n- Execution mode\n - Sync inline execution — default path.\n - Async background execution — one async job per task item when `async.enabled` is on and the chosen agent is not marked `blocking`.\n- Simple mode\n - `default` — accepts shared `context` and per-call `schema`.\n - `schema-free` — accepts `context`, rejects `schema`.\n - `independent` — rejects `context` and `schema`; each assignment stands alone.\n- Isolation backend\n - `none` — no isolation.\n - `worktree` — detached git worktree plus baseline replay.\n - `fuse-overlay` — Unix FUSE overlay mount.\n - `fuse-projfs` — Windows ProjFS overlay.\n- Isolation merge strategy\n - Patch mode — capture/apply root patches, keep patch artifacts when application fails.\n - Branch mode — commit each task onto `omp/task/<id>` branch, cherry-pick into parent, preserve failed branches for manual resolution.\n- Agent source\n - Project custom agents — nearest project config/plugin agent directories, first by source-family precedence.\n - User custom agents — user config/plugin agent directories after project dirs of the same source family.\n - Bundled agents — appended last from `packages/coding-agent/src/task/agents.ts`.\n- Bundled agent types\n - `explore` — read-only scout with structured handoff output.\n - `plan` — architecture/planning agent; may spawn `explore`.\n - `designer` — UI/UX specialist.\n - `reviewer` — review agent with `report_finding` extraction.\n - `task` — general-purpose worker with full capabilities.\n - `quick_task` — low-reasoning mechanical worker using the same task prompt body.\n - `librarian` — source-grounded external API/library researcher.\n\n## Side Effects\n- Filesystem\n - Writes `context.md`, `<id>.jsonl`, and `<id>.md` under the session artifacts dir or a temp task dir.\n - In isolated patch mode writes `<id>.patch` artifacts.\n - Creates/removes worktrees or overlay mount directories.\n - In branch mode creates temporary worktrees and task branches.\n- Network\n - Child sessions may use whichever networked tools/models their active tool set permits.\n - MCP proxy tools can call existing parent MCP connections with a 60_000 ms timeout.\n- Subprocesses / native bindings\n - `fuse-overlayfs` and `fusermount`/`fusermount3` for FUSE isolation.\n - ProjFS native bindings via `@oh-my-pi/pi-natives` on Windows.\n - Git operations for baseline capture, patch apply, worktrees, branches, stash, cherry-pick, commits.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Creates child `AgentSession` instances with isolated settings snapshots.\n - Registers async jobs in `session.asyncJobManager` for background task mode.\n - Emits `task:subagent:event`, `task:subagent:progress`, and `task:subagent:lifecycle` on the parent event bus.\n - Allocates session-scoped output ids through `AgentOutputManager` so `agent://` remains unique across invocations and resumes.\n - Shares the parent `local://` root with subagents by passing `localProtocolOptions` through `createAgentSession(...)`.\n- User-visible prompts / interactive UI\n - Async mode streams aggregate progress updates.\n - Missing-`yield` recovery sends up to three internal reminder prompts to the child session.\n - Final summaries include `<system-notification>` blocks for isolation fallbacks or merge failures.\n- Background work / cancellation\n - Parent abort stops scheduling new work, aborts active child sessions, and marks unscheduled tasks as skipped.\n - Async jobs keep their own cancellation via `AsyncJobManager`.\n\n## Limits & Caps\n- Per-subagent output truncation: `MAX_OUTPUT_BYTES = 500_000` and `MAX_OUTPUT_LINES = 5000` in `packages/coding-agent/src/task/types.ts`. Full raw output is still written to `<id>.md` before truncation is returned to the caller.\n- Progress coalescing in child execution: `PROGRESS_COALESCE_MS = 150` in `packages/coding-agent/src/task/executor.ts`.\n- Recent output tail for progress: `RECENT_OUTPUT_TAIL_BYTES = 8 * 1024` and `recentOutput` keeps the last 8 non-empty lines in `packages/coding-agent/src/task/executor.ts`.\n- Missing-`yield` reminder retries: `MAX_YIELD_RETRIES = 3` in `packages/coding-agent/src/task/executor.ts`.\n- MCP proxy timeout: `MCP_CALL_TIMEOUT_MS = 60_000` in `packages/coding-agent/src/task/executor.ts`.\n- Task id schema cap: `tasks[].id` `maxLength: 48` in `packages/coding-agent/src/task/types.ts`.\n- Prompt text says ids should be `≤32` chars, but the runtime schema allows 48; this mismatch is real.\n- Async/full sync parallelism both use `task.maxConcurrency` from settings:\n - sync path: `mapWithConcurrencyLimit(...)`\n - async path: `Semaphore(...)` around job bodies\n- Recursion depth gate: `task.maxRecursionDepth` from settings; `packages/coding-agent/src/tools/index.ts` hides the `task` tool at or beyond the limit, and `runSubprocess(...)` also strips child `task` access at max depth.\n- Final inline summary preview per task uses `fullOutputThreshold = 5000` chars in `packages/coding-agent/src/task/index.ts`; longer outputs are summarized while `agent://<id>` points to the full artifact.\n\n## Errors\n- Most validation failures are returned as normal tool text with empty `results`, not thrown:\n - invalid simple-mode fields\n - unknown/disabled agent\n - missing tasks\n - missing/duplicate task ids\n - spawn-policy denial\n - requesting `isolated` while isolation mode is `none`\n- Isolated execution without a git repo returns `Isolated task execution requires a git repository. ...`.\n- Backend resolution can return a hard error (`ProjFS isolation initialization failed...`) or a non-fatal warning with fallback to `worktree`.\n- `mapWithConcurrencyLimit(...)` fails fast on non-abort worker exceptions; already completed results are preserved only in the thrown path’s local state, not surfaced unless the caller catches and converts them.\n- Child-session failures surface as `SingleResult.exitCode = 1` with `stderr`/`error` populated.\n- If the child omits `yield`, `finalizeSubprocessOutput(...)` injects warnings such as `SYSTEM WARNING: Subagent exited without calling yield tool after 3 reminders.`\n- Async scheduling failures are accumulated per task; if no jobs start, the tool returns `Failed to start background task jobs: ...`.\n- `agent://<id>` resolution errors are model-visible when another tool reads them: no session, no artifacts dir, missing id, conflicting extraction syntax, or invalid JSON for extraction.\n\n## Notes\n- Agent discovery precedence is first-wins by exact name: project dirs before user dirs within a source family, plugin agent dirs after config dirs, bundled agents last. See `packages/coding-agent/src/task/discovery.ts` and `docs/task-agent-discovery.md`.\n- `TaskTool.create(...)` caches discovered agents only for description rendering and the async blocking-agent decision. `#executeSync(...)` rediscovers agents each call.\n- Custom agent frontmatter can override bundled agents by name. Bundled definitions are embedded at build time in `packages/coding-agent/src/task/agents.ts`.\n- Child sessions do not inherit conversation history automatically. The only built-in carry-over is shared `context`, optional `context.md`, workspace tree/skills/context files, and shared `local://` root.\n- `Settings.isolated(...)` gives each child a session-isolated settings snapshot; tool enablement is recomputed inside the child session rather than sharing mutable parent tool state.\n- When the parent passes `mcpManager`, child sessions disable standalone MCP discovery and instead get proxy tools that reuse the parent connections.\n- Plan mode mutates an `effectiveAgent` with a read-only tool subset and plan-mode prompt text, but `runSubprocess(...)` is still invoked with `agent` rather than `effectiveAgent`. Model/thinking/schema overrides use the effective agent; prompt/tool/spawn restrictions do not fully flow through this call path.\n- Branch-mode merge temporarily stashes the parent repo before cherry-picking task branches. A stash-pop conflict is treated as merge failure and leaves recovery state behind.\n- Patch-mode only applies combined root patches if every successful task produced a patch and `git.patch.canApplyText(...)` succeeds.\n- Nested git repos are handled separately from the root repo. They are copied into isolated worktrees, diffed independently, and merged later with `applyNestedPatches(...)` because parent git cannot track their file-level changes.\n- `agent://` ids are numeric-prefixed (`0-Task`, `1-Task`, nested like `0-Parent.0-Child`) by `AgentOutputManager`; this is what prevents artifact collisions across repeated or nested task invocations.\n",
90
89
  "tools/todo_write.md": "# todo_write\n\n> Applies ordered mutations to the session todo list and returns a text summary plus the full phase/task state.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/todo-write.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/todo-write.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/index.ts` — registers tool, exposes session hooks, gates availability.\n - `packages/coding-agent/src/modes/controllers/event-controller.ts` — updates the visible todo UI on tool completion.\n - `packages/coding-agent/src/session/agent-session.ts` — stores cached phases, auto-clears done/dropped tasks, emits failure reminders.\n - `packages/coding-agent/src/modes/controllers/todo-command-controller.ts` — `/todo` command path, custom-entry persistence, transcript reminder injection.\n - `packages/coding-agent/src/tools/render-utils.ts` — collapsed-preview cap for renderer trees.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `ops` | `TodoOpEntry[]` | Yes | Ordered operations to apply. `minItems: 1`.\n\n### `TodoOpEntry`\n\n| Op | Required fields | Optional fields | Effect |\n| --- | --- | --- | --- |\n| `init` | `list` | None of the other fields are used | Replaces the entire list with `list`; every new task starts `pending` before normalization. |\n| `start` | `task` | None | Marks one task `in_progress`; any other `in_progress` task is demoted to `pending`. |\n| `done` | `task` or `phase` or neither | None | Marks the target task, phase, or all tasks `completed`. |\n| `drop` | `task` or `phase` or neither | None | Marks the target task, phase, or all tasks `abandoned`. |\n| `rm` | `task` or `phase` or neither | None | Removes the target task, clears the phase's task list, or clears all task lists. |\n| `append` | `phase`, `items` | None | Appends new `pending` tasks to a phase; creates the phase if missing. |\n| `note` | `task`, `text` | None | Appends one trimmed note string to the task's `notes` array. |\n\n### Fields used inside ops\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `op` | `\"init\" | \"start\" | \"done\" | \"rm\" | \"drop\" | \"append\" | \"note\"` | Yes | Operation discriminator. |\n| `list` | `{ phase: string; items: string[] }[]` | For `init` | Full replacement payload. Each `items` array has `minItems: 1`. |\n| `task` | `string` | For `start`; for task-targeted `done`/`drop`/`rm`/`note` | Exact task content match. |\n| `phase` | `string` | For `append`; for phase-targeted `done`/`drop`/`rm` | Exact phase name match, except `append` lazily creates a missing phase. |\n| `items` | `string[]` | For `append` | Tasks to append. `minItems: 1`. |\n| `text` | `string` | For `note` | Note text; trailing whitespace is stripped before storing. Empty-after-trim is rejected. |\n\n## Outputs\nThe tool returns a single-shot `AgentToolResult`:\n\n- `content`: one text part containing the summary from `formatSummary(...)`.\n - Empty final state with no errors: `Todo list cleared.`\n - Non-empty final state: remaining-item list, current phase progress, then a per-phase tree.\n - If the active `in_progress` task has notes, the summary includes the note bodies inline.\n - If any op produced validation/runtime errors, the summary starts with `Errors: ...` but still returns the mutated state.\n- `details`:\n - `phases: TodoPhase[]`\n - `storage: \"session\" | \"memory\"`\n\n`TodoPhase` / `TodoItem` state model:\n\n- `TodoPhase`: `{ name: string, tasks: TodoItem[] }`\n- `TodoItem`: `{ content: string, status: \"pending\" | \"in_progress\" | \"completed\" | \"abandoned\", notes?: string[] }`\n\nThe TUI renderer (`todoWriteToolRenderer`) merges call and result into one transcript block, renders phases as a tree, shows note counts as superscripts, and renders the note bodies only for the current `in_progress` task. Collapsed transcript previews cap tree items at `PREVIEW_LIMITS.COLLAPSED_ITEMS` (`8`).\n\n## Flow\n1. `TodoWriteTool.execute(...)` clones the current cached phases from `session.getTodoPhases?.() ?? []` (`packages/coding-agent/src/tools/todo-write.ts`).\n2. `applyParams(...)` walks `params.ops` in order and applies each entry with `applyEntry(...)`.\n3. Each op mutates the working phase array:\n - `initPhases(...)` rebuilds the list from scratch.\n - `start` resolves a task by exact `content`, demotes every other `in_progress` task to `pending`, then marks the target `in_progress`.\n - `done` / `drop` use `getTaskTargets(...)` to target one task, one phase, or every task.\n - `rm` removes one task, clears one phase's `tasks`, or clears all phases' task arrays.\n - `appendItems(...)` resolves or creates the target phase and pushes new `pending` tasks unless the same task content already exists anywhere.\n - `note` trims trailing whitespace, rejects empty text, and appends the note to `task.notes`.\n4. Missing task/phase references are recorded in an `errors` array by `resolveTaskOrError(...)` / `resolvePhaseOrError(...)`; execution continues through the rest of the batch.\n5. After the full batch, `normalizeInProgressTask(...)` enforces the single-active-task invariant:\n - if multiple tasks are `in_progress`, only the first stays active and the rest become `pending`;\n - if none are `in_progress`, the first `pending` task in phase/task order is auto-promoted to `in_progress`.\n6. `execute(...)` stores the normalized phases with `session.setTodoPhases?.(...)` and reports `storage` as `\"session\"` when `session.getSessionFile()` exists, else `\"memory\"`.\n7. The agent runtime also watches `todo_write` tool results in `packages/coding-agent/src/session/agent-session.ts`; successful results refresh cached todos, failed results inject a hidden next-turn reminder telling the model that todo progress is not visible until it retries.\n8. The event controller updates the visible todo UI from `result.details.phases` on success, or shows a warning on error (`packages/coding-agent/src/modes/controllers/event-controller.ts`).\n\n## Modes / Variants\n### State transitions\n\n| Current status | `start` | `done` | `drop` | `rm` | `append` | `note` |\n| --- | --- | --- | --- | --- | --- | --- |\n| `pending` | `in_progress` on target | `completed` | `abandoned` | Removed | New tasks enter as `pending` | No status change |\n| `in_progress` | Target stays `in_progress`; non-target active tasks become `pending` | `completed` | `abandoned` | Removed | No status change | No status change |\n| `completed` | Can be set back to `in_progress` if targeted | Stays `completed` | Becomes `abandoned` if targeted | Removed | No status change | No status change |\n| `abandoned` | Can be set back to `in_progress` if targeted | Becomes `completed` if targeted | Stays `abandoned` | Removed | No status change | No status change |\n\nNormalization then re-applies the single-active-task rule after the full op batch.\n\n### Op targeting rules\n- `done`, `drop`, `rm`:\n - `task` set: affect one exact-content task.\n - else `phase` set: affect every task in that exact-name phase.\n - else: affect every task in every phase.\n- `append` is the only op that creates a missing phase.\n- `note` only targets a single task.\n- `init` discards previous phases entirely.\n\n### Markdown round-trip helpers\nThe same file also exposes non-tool helpers used by `/todo`:\n- `phasesToMarkdown(...)` serializes phases as headings plus checklist items (`[ ]`, `[/]`, `[x]`, `[-]`) with blockquote note bodies.\n- `markdownToPhases(...)` parses that format, defaults orphan tasks into a `Todos` phase, accepts `>` as an `in_progress` marker and `~` as `abandoned`, and runs the same normalization step.\n\n## Side Effects\n- Filesystem\n - None in the tool itself.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Mutates the session todo cache through `setTodoPhases`.\n - `storage` reports whether the session has a backing session file, but the tool does not append a custom session entry itself.\n - Successful tool-result messages carry `details.phases`; `getLatestTodoPhasesFromEntries(...)` can reconstruct state later from those transcript entries.\n - Failed `todo_write` results cause `agent-session` to enqueue a hidden next-turn reminder (`customType: \"todo-write-error-reminder\"`).\n- User-visible prompts / interactive UI\n - Transcript block is rendered by `todoWriteToolRenderer` and merged with the call line.\n - `event-controller` updates the visible todo panel from successful results.\n - On error, `event-controller` shows `Todo update failed...`; the visible panel may stay stale until a later successful call.\n- Background work / cancellation\n - `AgentSession.setTodoPhases(...)` schedules auto-clear timers for `completed` / `abandoned` tasks via `tasks.todoClearDelay`.\n\n## Limits & Caps\n- `ops` array: `minItems: 1` (`todoWriteSchema`).\n- `init.list[*].items`: `minItems: 1`.\n- `append.items`: `minItems: 1`.\n- Renderer collapsed preview: `PREVIEW_LIMITS.COLLAPSED_ITEMS = 8` (`packages/coding-agent/src/tools/render-utils.ts`).\n- Auto-clear delay: `tasks.todoClearDelay` default `60` seconds; `< 0` disables auto-clear, `0` clears on the next microtask (`packages/coding-agent/src/session/agent-session.ts`).\n- Tool execution mode: `concurrency = \"exclusive\"`, `strict = true`, `loadMode = \"discoverable\"`.\n\n## Errors\n- The tool does not throw for ordinary bad op payloads; it accumulates human-readable strings in `errors` and still returns success with the mutated state.\n- Error strings come from the helpers in `packages/coding-agent/src/tools/todo-write.ts`, including:\n - `Missing list for init operation`\n - `Missing task content`\n - `Task \"...\" not found` with an extra empty-list hint when applicable\n - `Missing phase name`\n - `Phase \"...\" not found`\n - `Missing phase name for append operation`\n - `Missing items for append operation`\n - `Task \"...\" already exists`\n - `Missing text for note operation`\n- Because ops are processed in order, earlier errors do not roll back later ops.\n- Runtime-level tool failure is handled outside the tool body: `agent-session` injects a hidden reminder and the event controller warns the user that visible progress may be stale.\n- Idempotency is op-specific:\n - `init` is a full replacement; replaying the same payload yields the same state.\n - `start`, `done`, and `drop` are effectively idempotent on an existing target state, but `start` also demotes any other active task.\n - `rm` is not idempotent for targeted removals: the second call errors because the task or phase is gone.\n - `append` is not idempotent: duplicate task content is rejected with `Task \"...\" already exists`.\n - `note` is append-only and never idempotent; replaying it adds another note entry.\n\n## Notes\n- Task lookup is exact string equality inside the tool. The model-facing prompt says task content and phase names are identifiers and should stay unique; `append` enforces task uniqueness globally, but `init` does not validate duplicate task or phase names.\n- `findTaskByContent(...)` returns the first matching task across phases. Duplicate task contents make later targeted ops ambiguous.\n- `normalizeInProgressTask(...)` runs after the whole batch, not after each op. A single call can intentionally build an intermediate invalid state and rely on final normalization.\n- `storage: \"session\"` means the session has a session-file backing; it does not mean this tool wrote a durable custom entry.\n- Reload persistence differs by path:\n - plain `todo_write` calls survive in transcript tool-result details;\n - `/todo` command edits additionally append `customType: \"user_todo_edit\"` entries and inject a visible-to-model `<system-reminder>` developer message describing the manual edit.\n- On session resume, `AgentSession.#syncTodoPhasesFromBranch()` strips `completed` and `abandoned` tasks before restoring the cached list. The `/todo` command works around that by reading the latest transcript/custom-entry state so historical done/dropped tasks still appear to the user.\n- Tool availability is gated by `todo.enabled`, and the registry excludes it when `includeYield` is enabled (`packages/coding-agent/src/tools/index.ts`).\n- Subagents do not inherit `todo_write`; `packages/coding-agent/src/task/executor.ts` filters it out as a parent-owned tool.\n",