@oh-my-pi/pi-coding-agent 15.0.1 → 15.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +94 -1
- package/examples/custom-tools/README.md +11 -7
- package/examples/custom-tools/hello/index.ts +2 -2
- package/examples/extensions/README.md +19 -8
- package/examples/extensions/api-demo.ts +15 -19
- package/examples/extensions/hello.ts +5 -6
- package/examples/extensions/plan-mode.ts +1 -1
- package/examples/extensions/reload-runtime.ts +4 -3
- package/examples/extensions/with-deps/index.ts +4 -3
- package/examples/sdk/06-extensions.ts +4 -2
- package/package.json +8 -18
- package/src/autoresearch/tools/init-experiment.ts +38 -41
- package/src/autoresearch/tools/log-experiment.ts +32 -41
- package/src/autoresearch/tools/run-experiment.ts +3 -3
- package/src/autoresearch/tools/update-notes.ts +11 -11
- package/src/commands/commit.ts +10 -0
- package/src/commit/agentic/tools/analyze-file.ts +4 -4
- package/src/commit/agentic/tools/git-file-diff.ts +4 -4
- package/src/commit/agentic/tools/git-hunk.ts +5 -5
- package/src/commit/agentic/tools/git-overview.ts +4 -4
- package/src/commit/agentic/tools/propose-changelog.ts +13 -13
- package/src/commit/agentic/tools/propose-commit.ts +6 -6
- package/src/commit/agentic/tools/recent-commits.ts +3 -3
- package/src/commit/agentic/tools/schemas.ts +28 -28
- package/src/commit/agentic/tools/split-commit.ts +22 -21
- package/src/commit/analysis/summary.ts +4 -4
- package/src/commit/changelog/generate.ts +7 -11
- package/src/commit/shared-llm.ts +22 -34
- package/src/config/config-file.ts +35 -13
- package/src/config/model-registry.ts +40 -191
- package/src/config/models-config-schema.ts +166 -0
- package/src/config/settings-schema.ts +29 -0
- package/src/discovery/claude-plugins.ts +19 -7
- package/src/edit/index.ts +2 -2
- package/src/edit/modes/apply-patch.ts +7 -6
- package/src/edit/modes/patch.ts +18 -25
- package/src/edit/modes/replace.ts +18 -20
- package/src/eval/js/shared/rewrite-imports.ts +131 -10
- package/src/eval/py/executor.ts +233 -623
- package/src/eval/py/kernel.ts +27 -2
- package/src/eval/py/runner.py +42 -11
- package/src/eval/py/runtime.ts +1 -0
- package/src/exa/factory.ts +5 -4
- package/src/exa/mcp-client.ts +1 -1
- package/src/exa/researcher.ts +9 -20
- package/src/exa/search.ts +26 -52
- package/src/exa/types.ts +1 -1
- package/src/exa/websets.ts +54 -53
- package/src/exec/bash-executor.ts +2 -1
- package/src/extensibility/custom-commands/loader.ts +5 -3
- package/src/extensibility/custom-commands/types.ts +4 -2
- package/src/extensibility/custom-tools/loader.ts +5 -3
- package/src/extensibility/custom-tools/types.ts +7 -6
- package/src/extensibility/custom-tools/wrapper.ts +1 -1
- package/src/extensibility/extensions/get-commands-handler.ts +77 -0
- package/src/extensibility/extensions/loader.ts +7 -3
- package/src/extensibility/extensions/types.ts +9 -5
- package/src/extensibility/extensions/wrapper.ts +1 -2
- package/src/extensibility/hooks/loader.ts +3 -1
- package/src/extensibility/hooks/tool-wrapper.ts +1 -1
- package/src/extensibility/hooks/types.ts +4 -2
- package/src/extensibility/plugins/legacy-pi-compat.ts +78 -31
- package/src/extensibility/shared-events.ts +1 -1
- package/src/extensibility/typebox.ts +391 -0
- package/src/goals/tools/goal-tool.ts +6 -12
- package/src/hashline/input.ts +2 -1
- package/src/hashline/parser.ts +27 -3
- package/src/hashline/types.ts +4 -4
- package/src/hindsight/state.ts +2 -2
- package/src/index.ts +0 -2
- package/src/internal-urls/docs-index.generated.ts +15 -15
- package/src/internal-urls/router.ts +8 -0
- package/src/internal-urls/types.ts +21 -0
- package/src/lsp/config.ts +15 -6
- package/src/lsp/defaults.json +6 -2
- package/src/lsp/types.ts +30 -38
- package/src/mcp/manager.ts +1 -1
- package/src/mcp/tool-bridge.ts +1 -1
- package/src/modes/acp/acp-agent.ts +248 -50
- package/src/modes/components/session-observer-overlay.ts +12 -1
- package/src/modes/components/status-line/segments.ts +39 -4
- package/src/modes/controllers/command-controller.ts +27 -2
- package/src/modes/controllers/event-controller.ts +3 -4
- package/src/modes/controllers/extension-ui-controller.ts +3 -2
- package/src/modes/interactive-mode.ts +1 -1
- package/src/modes/rpc/host-tools.ts +1 -1
- package/src/modes/rpc/host-uris.ts +235 -0
- package/src/modes/rpc/rpc-client.ts +1 -1
- package/src/modes/rpc/rpc-mode.ts +27 -1
- package/src/modes/rpc/rpc-types.ts +58 -1
- package/src/modes/runtime-init.ts +2 -1
- package/src/modes/theme/defaults/dark-poimandres.json +1 -0
- package/src/modes/theme/defaults/light-poimandres.json +1 -0
- package/src/modes/theme/theme.ts +117 -117
- package/src/modes/types.ts +1 -1
- package/src/modes/utils/context-usage.ts +2 -2
- package/src/prompts/tools/github.md +4 -4
- package/src/prompts/tools/hashline.md +22 -26
- package/src/prompts/tools/read.md +55 -37
- package/src/sdk.ts +31 -8
- package/src/session/agent-session.ts +74 -104
- package/src/session/messages.ts +16 -51
- package/src/session/session-manager.ts +22 -2
- package/src/session/streaming-output.ts +16 -6
- package/src/task/discovery.ts +5 -2
- package/src/task/executor.ts +210 -87
- package/src/task/index.ts +15 -11
- package/src/task/render.ts +32 -5
- package/src/task/types.ts +54 -39
- package/src/tools/ask.ts +12 -12
- package/src/tools/ast-edit.ts +11 -15
- package/src/tools/ast-grep.ts +9 -10
- package/src/tools/bash-command-fixup.ts +47 -0
- package/src/tools/bash.ts +48 -38
- package/src/tools/browser/render.ts +2 -2
- package/src/tools/browser.ts +39 -53
- package/src/tools/calculator.ts +12 -11
- package/src/tools/checkpoint.ts +7 -7
- package/src/tools/debug.ts +40 -43
- package/src/tools/eval.ts +16 -10
- package/src/tools/find.ts +10 -13
- package/src/tools/gh.ts +108 -132
- package/src/tools/hindsight-recall.ts +4 -6
- package/src/tools/hindsight-reflect.ts +5 -5
- package/src/tools/hindsight-retain.ts +15 -17
- package/src/tools/image-gen.ts +31 -81
- package/src/tools/index.ts +4 -1
- package/src/tools/inspect-image.ts +8 -9
- package/src/tools/irc.ts +15 -27
- package/src/tools/job.ts +30 -28
- package/src/tools/output-meta.ts +26 -0
- package/src/tools/read.ts +39 -12
- package/src/tools/recipe/index.ts +7 -9
- package/src/tools/render-mermaid.ts +12 -12
- package/src/tools/report-tool-issue.ts +4 -4
- package/src/tools/resolve.ts +11 -11
- package/src/tools/review.ts +14 -26
- package/src/tools/search-tool-bm25.ts +7 -9
- package/src/tools/search.ts +19 -22
- package/src/tools/ssh.ts +10 -9
- package/src/tools/todo-write.ts +26 -34
- package/src/tools/vim.ts +10 -26
- package/src/tools/write.ts +25 -5
- package/src/tools/yield.ts +100 -54
- package/src/web/search/index.ts +9 -24
- package/src/web/search/providers/anthropic.ts +5 -0
- package/src/web/search/providers/exa.ts +3 -0
- package/src/web/search/providers/gemini.ts +5 -0
- package/src/web/search/providers/jina.ts +5 -2
- package/src/web/search/providers/zai.ts +5 -2
- package/src/prompts/compaction/branch-summary-context.md +0 -5
- package/src/prompts/compaction/branch-summary-preamble.md +0 -2
- package/src/prompts/compaction/branch-summary.md +0 -30
- package/src/prompts/compaction/compaction-short-summary.md +0 -9
- package/src/prompts/compaction/compaction-summary-context.md +0 -5
- package/src/prompts/compaction/compaction-summary.md +0 -38
- package/src/prompts/compaction/compaction-turn-prefix.md +0 -17
- package/src/prompts/compaction/compaction-update-summary.md +0 -45
- package/src/prompts/system/auto-handoff-threshold-focus.md +0 -1
- package/src/prompts/system/file-operations.md +0 -10
- package/src/prompts/system/handoff-document.md +0 -49
- package/src/prompts/system/summarization-system.md +0 -3
- package/src/session/compaction/branch-summarization.ts +0 -324
- package/src/session/compaction/compaction.ts +0 -1420
- package/src/session/compaction/errors.ts +0 -31
- package/src/session/compaction/index.ts +0 -8
- package/src/session/compaction/pruning.ts +0 -91
- package/src/session/compaction/utils.ts +0 -184
|
@@ -6,15 +6,15 @@ export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
|
|
|
6
6
|
"ERRATA-GPT5-HARMONY.md": "# ERRATA — GPT-5 Harmony-Header Leakage\n\n## 1. The problem\n\nOpenAI frames tool calls in the Harmony chat protocol:\n\n```\n<|start|>assistant<|channel|>commentary to=functions.<NAME><|message|>{ARGS}<|call|>\n```\n\n`<|channel|>commentary to=functions.NAME` is the **routing header** —\ncontrol tokens consumed by the runtime to dispatch the call. These\ntokens never appear as content under normal operation; the runtime\nstrips them.\n\nThe defect: gpt-5 models occasionally emit, **as ordinary content\ninside `{ARGS}`**, the **plain-text shadow** of these routing tokens —\nthe same characters without the `<|…|>` brackets — and continue\nproducing more pseudo-routing structure (channel name, body marker,\nmultilingual spam, fake tool-result framing). The contamination lives\ninside the visible tool argument and is dispatched to the tool as if it\nwere intended content.\n\n**Critical detail.** The actual `<|start|>` / `<|channel|>` /\n`<|message|>` / `<|call|>` special tokens almost never appear in tool\nargs. What leaks is the bracket-less spelling — `analysis to=functions.X\ncode …` — because OpenAI applies a logit mask suppressing the\ncontrol-token IDs inside the args region. The mass that would have gone\nto those special tokens redistributes onto the un-bracketed plain-text\nrepresentation the model also learned. This makes the leak structurally\ninvisible to the routing parser and lands it in the tool input verbatim.\n\nManifestation in tool args (real corpus example):\n\n```\n~ add_function(iso, ctx, ns, \"installSystemChangeObserver\",\n os_install_system_change_observer);】【\"】【analysis to=functions.edit\n code above เงินไทยฟรีuser to=functions.edit code …\n```\n\nThe leading code is real and intended. Everything after the first\nnon-Latin token through the next clean structural boundary is corruption.\n\n---\n\n## 2. Observed statistics & failure modes\n\nSource: `~/.omp/stats.db` (`ss_tool_calls`, `ss_assistant_msgs`), through\n2026-05-10. 1.05M tool calls scanned.\n\n### 2.1 Rate\n\n| Model | Leaks in tool args | Calls | per million |\n|------------------|-------------------:|--------:|------------:|\n| gpt-5.4 | 37 | 226,957 | 163 |\n| gpt-5.3-codex | 17 | 112,243 | 151 |\n| gpt-5.5 | 2 | 80,750 | 25 |\n| gpt-5.2-codex | 0 | — | — |\n\nPlus 15 hits in assistant visible text / thinking blobs.\n\n### 2.2 Tool distribution\n\n| Tool | Hits |\n|---------------------|-----:|\n| `edit` | 38 |\n| `eval` | 11 |\n| `report_tool_issue` | 3 |\n| `grep`/`read`/`search`/`yield` | 1 each |\n\nConcentrated in tools with free-form (non-JSON-schema) argument formats.\n\n### 2.3 Leak shape (deterministic)\n\n```\nLEAK ::= JUNK_PREFIX MARKER CHANNEL_BODY (LEAK)?\nMARKER ::= \"to=functions.\" TOOL_NAME\nCHANNEL_BODY ::= \" code \" (SPAM | reasoning_prose | fake_tool_output)*\nJUNK_PREFIX ::= (GLITCH_TOKEN | CHANNEL_WORD | NON_LATIN_RUN | \"}\" | \"】【\")+\n```\n\n**Cascading is common.** Of 96 marker occurrences across 71 contaminated\nrecords, 39 contain ≥2 markers and 7 contain ≥3 — the model emits\nmultiple fake `to=functions.X code …` blocks back-to-back, often with\nfake `code_output\\nCell N:\\n…` framing between them. Once the\nplain-text scaffolding is in the residual stream, the prefix now *looks\nlike* a fresh tool envelope start, so the macro prior over continuations\nkeeps voting for more scaffolding. Self-amplifying.\n\n### 2.4 Glitch tokens\n\nSingle-token identifiers in `o200k_base` whose embeddings appear to be\nnear-init from underrepresentation in post-training. ASCII residue\nimmediately before the marker in the natural corpus:\n\n| Surface string | Single-token | Token ID | Hits in corpus |\n|-------------------|:-:|---------:|---:|\n| `Japgolly` | ✅ | 199,745 | 1 |\n| `Jsii` | ✅ | 114,318 | (subtoken of `Jsii_commentary`) |\n| `Jsii_commentary` | — (3 toks) | — | 2 |\n| `changedFiles` | — (2 toks) | — | 8 |\n| `RTLU` | — (2 toks) | — | 3 |\n\n`Japgolly` is in the last 0.13% of the vocabulary — the same family of\nGitHub-corpus residue that produced `SolidGoldMagikarp` in the 2023\nGPT-2 vocabulary (Rumbelow & Watkins). `SolidGoldMagikarp` itself\ntokenizes to 5 tokens in `o200k_base` — that specific token was retired,\nbut the class wasn't.\n\nFor the multi-token entries, the corpus-level signature is the surface\nstring; the underlying glitch trigger is a sub-token (e.g. `Jsii` inside\n`Jsii_commentary`). The detector list (`G` signal) keys on the surface\nstrings.\n\nStable across unrelated sessions. Treated as a high-precision detector\nsignal.\n\n### 2.5 Channel-word leakage\n\n`analysis` (5), `assistant` (5), `commentary` (3), `user` (1) appear\ndirectly preceding `to=`. Always bare words; never `<|channel|>analysis`\nor any other bracketed form. Consistent with §1 — the brackets are\nmasked, the words are not.\n\n### 2.6 Non-Latin spam residue\n\n96 marker hits, by script: CJK 40, Cyrillic 12, Telugu/Kannada/Malayalam\n18, Thai 8, Georgian 7, Armenian 7, Arabic 1. Recurring fragments are\nChinese gambling SEO (`大发时时彩`, `天天中彩票`), Georgian/Abkhaz junk,\nand Thai casino spam — well-known low-quality crawl residue.\n\nThis is the same script distribution observed in the controlled\nreproduction (§7.3), independent of the prompt's natural language.\n\n### 2.7 Failure-mode breakdown for the `edit` tool\n\nThe `edit` tool exists in two variants in the corpus:\n\n| Variant | Calls | Recovery |\n|--------------------------|------:|----------|\n| Patch-DSL (`@PATH`/anchor/`~payload`) | 27 | **Recoverable** by op-truncation (§3.3) |\n| JSON-schema (`{path,edits:[…]}`) | 11 | **Not recoverable** — contamination is escaped *inside* JSON strings, parser accepts it cleanly, content would be written verbatim into source files |\n\nFor Patch-DSL leaks specifically:\n\n- 20/27 cases: contamination on the last input line; nothing follows.\n- 7/27 cases: contamination mid-input; what follows is one of: a\n duplicate replay of an earlier file/anchor, intended content for a\n *different* tool call (the model started its next call inline), or\n pure hallucination. Post-contamination content is never trustworthy.\n\n### 2.8 Mechanism (confirmed)\n\n**Prior collapse from null-embedding glitch tokens, into a\ncontrol-token-masked basin whose mass redistributes onto the\nplain-text shadow of the Harmony protocol.**\n\nStep by step:\n\n1. The model is mid-`{ARGS}` of a Harmony tool call. The runtime applies\n a logit mask suppressing structural control tokens (`<|channel|>`,\n `<|message|>`, `<|call|>`, `<|start|>`, `<|end|>`) inside the args\n region. Without this mask, normal generation would constantly\n hallucinate envelope-closes; with it, those token IDs have logit\n `-∞` in args.\n2. A glitch token `g` is sampled. By construction `g` was in the BPE\n merge corpus but barely in LM/RL training, so its **input embedding\n `e_g` ≈ near-init noise of small norm**.\n3. At position t+1, the residual update `h_{t+1} ≈ LN(h_t + e_g + Attn +\n MLP)` is dominated by the prefix-derived terms; the just-emitted-token\n signal is effectively absent. Generation diversity normally comes\n from `e_x` steering the residual into different sub-regions —\n stripped here.\n4. The next-token distribution therefore collapses onto the **conditional\n prior over continuations of the prefix, with local conditioning\n removed**. In a tool-calling rollout context, that prior is sharply\n peaked on Harmony scaffolding (control tokens + routing tokens) —\n that's what RL trained.\n5. The mask zeros the control-token IDs. Mass redistributes onto the\n **next-best continuation**: the un-bracketed surface-form spelling of\n the same protocol (`analysis`, `commentary`, ` to=functions.X`,\n ` code `). This spelling is unmasked because those characters are\n ordinary tokens.\n6. Once a few tokens of plain-text scaffolding land in the residual\n stream, the prefix now resembles a fresh envelope start. The macro\n prior keeps voting for more scaffolding. Cascading (§2.3) follows.\n7. Multilingual spam after the marker is the same prior-collapse\n continuation, drawn from the training neighborhood of the glitch\n token (often ESL/auto-generated multilingual web junk — exactly the\n crawl residue in §2.6).\n\n**Two corollaries the corpus data demanded but only the experiment\nexplained:**\n\n- **The brackets never appear** (§1, §2.5). The mask is what makes the\n leak land in plain text instead of as a real envelope-close.\n- **Counterintuitive grammar dependency** (§7.4). The leak is *worse* in\n formats closest to OpenAI's training distribution. Off-distribution\n custom grammars dampen the macro-prior basin; the official\n `*** Begin Patch` format is the strongest collapse target.\n\nThe 2023 SolidGoldMagikarp paper documented mechanism (1)+(2)+(4). The\nnew piece is (5): when constrained decoding masks the natural collapse\ntarget, the mass laundered through the un-masked plain-text shadow\nbecomes a structurally-invisible exfiltration channel.",
|
|
7
7
|
"bash-tool-runtime.md": "# Bash tool runtime\n\nThis document describes the **`bash` tool** runtime path used by agent tool calls, from command normalization to execution, truncation/artifacts, and rendering.\n\nIt also calls out where behavior diverges in interactive TUI, print mode, RPC mode, and user-initiated bang (`!`) shell execution.\n\n## Scope and runtime surfaces\n\nThere are two different bash execution surfaces in coding-agent:\n\n1. **Tool-call surface** (`toolName: \"bash\"`): used when the model calls the bash tool.\n - Entry point: `BashTool.execute()`.\n - Parameters include `command`, optional `env`, `timeout`, `cwd`, `head`, `tail`, `pty`, and, when `async.enabled` is true, `async`.\n2. **User bang-command surface** (`!cmd` from interactive input or RPC `bash` command): session-level helper path.\n - Entry point: `AgentSession.executeBash()`.\n\nBoth eventually use `executeBash()` in `src/exec/bash-executor.ts` for non-PTY execution, but only the tool-call path runs normalization/interception, optional managed background-job handling, and tool renderer logic.\n\n## End-to-end tool-call pipeline\n\n## 1) Input handling and parameter merge\n\n`BashTool.execute()` currently handles input before execution as follows:\n\n- validates optional `env` names against shell-variable syntax,\n- extracts a leading `cd <path> && ...` into `cwd` when `cwd` was not supplied,\n- rejects `async: true` when `async.enabled` is false,\n- uses only explicit `head`/`tail` tool args for post-run filtering.\n\n`normalizeBashCommand()` still exists in `src/tools/bash-normalize.ts`, but `BashTool.execute()` does not call it in the current source. Trailing shell pipes such as `| head -n 50` remain part of the shell command unless the caller uses the structured `head`/`tail` args.\n\n## 2) Optional interception (blocked-command path)\n\nIf `bashInterceptor.enabled` is true, `BashTool` loads rules from settings and runs `checkBashInterception()` against the normalized command.\n\nInterception behavior:\n\n- command is blocked **only** when:\n - regex rule matches, and\n - the suggested tool is present in `ctx.toolNames`.\n- invalid regex rules are silently skipped.\n- on block, `BashTool` throws `ToolError` with message:\n - `Blocked: ...`\n - original command included.\n\nDefault rule patterns (defined in code) target common misuses:\n\n- file readers (`cat`, `head`, `tail`, ...)\n- search tools (`grep`, `rg`, ...)\n- file finders (`find`, `fd`, ...)\n- in-place editors (`sed -i`, `perl -i`, `awk -i inplace`)\n- shell redirection writes (`echo ... > file`, heredoc redirection)\n\n### Caveat\n\n`InterceptionResult` includes `suggestedTool`, but `BashTool` currently surfaces only the message text (no structured suggested-tool field in `details`).\n\n## 3) CWD validation and timeout clamping\n\n`cwd` is resolved relative to session cwd (`resolveToCwd`), then validated via `stat`:\n\n- missing path -> `ToolError(\"Working directory does not exist: ...\")`\n- non-directory -> `ToolError(\"Working directory is not a directory: ...\")`\n\nTimeout is clamped to `[1, 3600]` seconds and converted to milliseconds.\n\n## 4) Artifact allocation\n\nBefore execution, the tool allocates an artifact path/id (best-effort) for truncated output storage.\n\n- artifact allocation failure is non-fatal (execution continues without artifact spill file),\n- artifact id/path are passed into execution path for full-output persistence on truncation.\n\n## 5) PTY vs non-PTY execution selection\n\n`BashTool` chooses PTY execution only when all are true:\n\n- tool input `pty === true`\n- `PI_NO_PTY !== \"1\"`\n- tool context has UI (`ctx.hasUI === true` and `ctx.ui` set)\n\nOtherwise it uses non-interactive `executeBash()`.\n\nThat means print mode and non-UI RPC/tool contexts always use non-PTY.\n\n## Non-interactive execution engine (`executeBash`)\n\n## Shell session reuse model\n\n`executeBash()` caches native `Shell` instances in a process-global map keyed by:\n\n- shell path,\n- configured command prefix,\n- snapshot path,\n- serialized shell env,\n- optional agent session key.\n\nSession-level bang-command executions pass `sessionKey: this.sessionId`.\n\nTool-call executions pass `sessionKey: this.session.getSessionId?.()`, when available. In both surfaces, a session key isolates shell reuse per session; without one, reuse falls back to shell config/snapshot/env.\n\n## Shell config and snapshot behavior\n\nAt each call, executor loads settings shell config (`shell`, `env`, optional `prefix`).\n\nIf selected shell includes `bash`, it attempts `getOrCreateSnapshot()`:\n\n- snapshot captures aliases/functions/options from user rc,\n- snapshot creation is best-effort,\n- failure falls back to no snapshot.\n\nIf `prefix` is configured, command becomes:\n\n```text\n<prefix> <command>\n```\n\n## Streaming and cancellation\n\n`Shell.run()` streams chunks to `OutputSink` and optional `onChunk` callback.\n\nCancellation:\n\n- aborted signal triggers `shellSession.abort(...)`,\n- timeout from native result is mapped to `cancelled: true` + annotation text,\n- explicit cancellation similarly returns `cancelled: true` + annotation.\n\nNo exception is thrown inside executor for timeout/cancel; it returns structured `BashResult` and lets caller map error semantics.\n\n## Interactive PTY path (`runInteractiveBashPty`)\n\nWhen PTY is enabled, tool runs `runInteractiveBashPty()` which opens an overlay console component and drives a native `PtySession`.\n\nBehavior highlights:\n\n- xterm-headless virtual terminal renders viewport in overlay,\n- keyboard input is normalized (including Kitty sequences and application cursor mode handling),\n- `esc` while running kills the PTY session,\n- terminal resize propagates to PTY (`session.resize(cols, rows)`).\n\nEnvironment hardening defaults are injected for unattended runs:\n\n- pagers disabled (`PAGER=cat`, `GIT_PAGER=cat`, etc.),\n- editor prompts disabled (`GIT_EDITOR=true`, `EDITOR=true`, ...),\n- terminal/auth prompts reduced (`GIT_TERMINAL_PROMPT=0`, `SSH_ASKPASS=/usr/bin/false`, `CI=1`),\n- package-manager/tool automation flags for non-interactive behavior.\n\nPTY output is normalized (`CRLF`/`CR` to `LF`, `sanitizeText`) and written into `OutputSink`, including artifact spill support.\n\nOn PTY startup/runtime error, sink receives `PTY error: ...` line and command finalizes with undefined exit code.\n\n## Output handling: streaming, truncation, artifact spill\n\nBoth PTY and non-PTY paths use `OutputSink`.\n\n## OutputSink semantics\n\n- keeps an in-memory UTF-8-safe tail buffer (`DEFAULT_MAX_BYTES`, currently 50KB),\n- tracks total bytes/lines seen,\n- if artifact path exists and output overflows (or file already active), writes full stream to artifact file,\n- when memory threshold overflows, trims in-memory buffer to tail (UTF-8 boundary safe),\n- marks `truncated` when overflow/file spill occurs.\n\n`dump()` returns:\n\n- `output` (possibly annotated prefix),\n- `truncated`,\n- `totalLines/totalBytes`,\n- `outputLines/outputBytes`,\n- `artifactId` if artifact file was active.\n\n### Long-output caveat\n\nRuntime truncation is byte-threshold based in `OutputSink` (50KB default). It does not enforce a hard 2000-line cap in this code path.\n\n## Live tool updates and async jobs\n\nFor non-PTY foreground execution, `BashTool` uses a separate `TailBuffer` for partial updates and emits `onUpdate` snapshots while command is running.\n\nFor PTY execution, live rendering is handled by custom UI overlay, not by `onUpdate` text chunks.\n\nWhen `async.enabled` is true and the call passes `async: true`, `BashTool` starts a managed bash job, returns a running job result with a job id, and stores completion through the session managed-job path. Auto-backgrounding can also start this path after `bash.autoBackground.thresholdMs`.\n\n## Result shaping, metadata, and error mapping\n\nAfter execution:\n\n1. `cancelled` handling:\n - if abort signal is aborted -> throw `ToolAbortError` (abort semantics),\n - else -> throw `ToolError` (treated as tool failure).\n2. PTY `timedOut` -> throw `ToolError`.\n3. apply head/tail filters to final output text (`applyHeadTail`, head then tail).\n4. empty output becomes `(no output)`.\n5. attach truncation metadata via `toolResult(...).truncationFromSummary(result, { direction: \"tail\" })`.\n6. exit-code mapping:\n - missing exit code -> `ToolError(\"... missing exit status\")`\n - non-zero exit -> `ToolError(\"... Command exited with code N\")`\n - zero exit -> success result.\n\nSuccess payload structure:\n\n- `content`: text output,\n- `details.meta.truncation` when truncated, including:\n - `direction`, `truncatedBy`, total/output line+byte counts,\n - `shownRange`,\n - `artifactId` when available.\n\nBecause built-in tools are wrapped with `wrapToolWithMetaNotice()`, truncation notice text is appended to final text content automatically (for example: `Full: artifact://<id>`).\n\n## Rendering paths\n\n## Tool-call renderer (`bashToolRenderer`)\n\n`bashToolRenderer` is used for tool-call messages (`toolCall` / `toolResult`):\n\n- collapsed mode shows visual-line-truncated preview,\n- expanded mode shows all currently available output text,\n- warning line includes truncation reason and `artifact://<id>` when truncated,\n- timeout value (from args) is shown in footer metadata line.\n\n### Caveat: full artifact expansion\n\n`BashRenderContext` has `isFullOutput`, but current renderer context builder does not set it for bash tool results. Expanded view still uses the text already in result content (tail/truncated output) unless another caller provides full artifact content.\n\n## User bang-command component (`BashExecutionComponent`)\n\n`BashExecutionComponent` is for user `!` commands in interactive mode (not model tool calls):\n\n- streams chunks live,\n- collapsed preview keeps last 20 logical lines,\n- line clamp at 4000 chars per line,\n- shows truncation + artifact warnings when metadata is present,\n- marks cancelled/error/exit state separately.\n\nThis component is wired by `CommandController.handleBashCommand()` and fed from `AgentSession.executeBash()`.\n\n## Mode-specific behavior differences\n\n| Surface | Entry path | PTY eligible | Live output UX | Error surfacing |\n| ------------------------------ | ----------------------------------------------------- | -------------------------------------------------------------------- | ------------------------------------------------------------------------ | ------------------------------------------------ |\n| Interactive tool call | `BashTool.execute` | Yes, when `pty=true` and UI exists and `PI_NO_PTY!=1` | PTY overlay (interactive) or streamed tail updates | Tool errors become `toolResult.isError` |\n| Print mode tool call | `BashTool.execute` | No (no UI context) | No TUI overlay; output appears in event stream/final assistant text flow | Same tool error mapping |\n| RPC tool call (agent tooling) | `BashTool.execute` | Usually no UI -> non-PTY | Structured tool events/results | Same tool error mapping |\n| Interactive bang command (`!`) | `AgentSession.executeBash` + `BashExecutionComponent` | No (uses executor directly) | Dedicated bash execution component | Controller catches exceptions and shows UI error |\n| RPC `bash` command | `rpc-mode` -> `session.executeBash` | No | Returns `BashResult` directly | Consumer handles returned fields |\n\n## Operational caveats\n\n- Interceptor only blocks commands when suggested tool is currently available in context.\n- If artifact allocation fails, truncation still occurs but no `artifact://` back-reference is available.\n- Shell session cache has no explicit eviction in this module; lifetime is process-scoped.\n- PTY and non-PTY timeout surfaces differ:\n - PTY exposes explicit `timedOut` result field,\n - non-PTY maps timeout into `cancelled + annotation` summary.\n\n## Implementation files\n\n- [`src/tools/bash.ts`](../packages/coding-agent/src/tools/bash.ts) — tool entrypoint, input handling/interception, async and PTY/non-PTY selection, result/error mapping, bash tool renderer.\n- [`src/tools/bash-normalize.ts`](../packages/coding-agent/src/tools/bash-normalize.ts) — post-run head/tail filtering; also contains an unused command-normalization helper.\n- [`src/tools/bash-interceptor.ts`](../packages/coding-agent/src/tools/bash-interceptor.ts) — interceptor rule matching and blocked-command messages.\n- [`src/exec/bash-executor.ts`](../packages/coding-agent/src/exec/bash-executor.ts) — non-PTY executor, shell session reuse, cancellation wiring, output sink integration.\n- [`src/tools/bash-interactive.ts`](../packages/coding-agent/src/tools/bash-interactive.ts) — PTY runtime, overlay UI, input normalization, non-interactive env defaults.\n- [`src/session/streaming-output.ts`](../packages/coding-agent/src/session/streaming-output.ts) — `OutputSink`, `TailBuffer`, truncation/artifact spill, and summary metadata.\n- [`src/tools/output-meta.ts`](../packages/coding-agent/src/tools/output-meta.ts) — truncation metadata shape + notice injection wrapper.\n- [`src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts) — session-level `executeBash`, message recording, abort lifecycle.\n- [`src/modes/components/bash-execution.ts`](../packages/coding-agent/src/modes/components/bash-execution.ts) — interactive `!` command execution component.\n- [`src/modes/controllers/command-controller.ts`](../packages/coding-agent/src/modes/controllers/command-controller.ts) — wiring for interactive `!` command UI stream/update completion.\n- [`src/modes/rpc/rpc-mode.ts`](../packages/coding-agent/src/modes/rpc/rpc-mode.ts) — RPC `bash` and `abort_bash` command surface.\n- [`src/internal-urls/artifact-protocol.ts`](../packages/coding-agent/src/internal-urls/artifact-protocol.ts) — `artifact://<id>` resolution.\n",
|
|
8
8
|
"blob-artifact-architecture.md": "# Blob and artifact storage architecture\n\nThis document describes how coding-agent stores large/binary payloads outside session JSONL, how truncated tool output is persisted, and how internal URLs (`artifact://`, `agent://`) resolve back to stored data.\n\n## Why two storage systems exist\n\nThe runtime uses two different persistence mechanisms for different data shapes:\n\n- **Content-addressed blobs** (`blob:sha256:<hash>`): global storage used to externalize large image base64 payloads and provider image data URLs from persisted session entries.\n- **Session-scoped artifacts** (files under `<sessionFile-without-.jsonl>/`): per-session text files used for full tool outputs and subagent outputs.\n\nThey are intentionally separate:\n\n- blob storage optimizes deduplication and stable references by content hash,\n- artifact storage optimizes append-only session tooling and human/tool retrieval by local IDs.\n\n## Storage boundaries and on-disk layout\n\n## Blob store boundary (global)\n\n`SessionManager` constructs `BlobStore(getBlobsDir())`, so blob files live in a shared global blob directory (not in a session folder).\n\nBlob file naming:\n\n- file path: `<blobsDir>/<sha256-hex>`\n- no extension\n- reference string stored in entries: `blob:sha256:<sha256-hex>`\n\nImplications:\n\n- same binary content across sessions resolves to the same hash/path,\n- writes are idempotent at the content level,\n- blobs can outlive any individual session file.\n\n## Artifact boundary (session-local)\n\n`ArtifactManager` derives artifact directory from session file path:\n\n- session file: `.../<timestamp>_<sessionId>.jsonl`\n- artifacts directory: `.../<timestamp>_<sessionId>/` (strip `.jsonl`)\n\nArtifact types share this directory:\n\n- truncated tool output files: `<numericId>.<toolType>.log` (for `artifact://`)\n- subagent output files: `<outputId>.md` (for `agent://`)\n\n## ID and name allocation schemes\n\n## Blob IDs: content hash\n\n`BlobStore.put()` computes SHA-256 over the bytes it is given and returns:\n\n- `hash`: hex digest,\n- `path`: `<blobsDir>/<hash>`,\n- `ref`: `blob:sha256:<hash>`.\n\nNo session-local counter is used.\n\n## Artifact IDs: session-local monotonic integer\n\n`ArtifactManager` scans existing `*.log` artifact files on first use to find max existing numeric ID and sets `nextId = max + 1`.\n\nAllocation behavior:\n\n- file format: `{id}.{toolType}.log`\n- IDs are sequential strings (`\"0\"`, `\"1\"`, ...)\n- resume does not overwrite existing artifacts because scan happens before allocation.\n\nIf artifact directory is missing, scanning yields empty list and allocation starts from `0`.\n\n## Agent output IDs (`agent://`)\n\n`AgentOutputManager` allocates IDs for subagent outputs as `<index>-<requestedId>` (optionally nested under parent prefix, e.g. `0-Parent.1-Child`). It scans existing `.md` files on initialization to continue from the next index on resume.\n\n## Persistence dataflow\n\n## 1) Session entry persistence rewrite path\n\nBefore session entries are written (`#rewriteFile` / incremental persist), `SessionManager` calls `prepareEntryForPersistence()` (via `truncateForPersistence`).\n\nKey behaviors:\n\n1. **Large string truncation**: oversized strings are cut and suffixed with `\"[Session persistence truncated large content]\"`; signature fields (`thinkingSignature`, `thoughtSignature`, `textSignature`) are cleared instead of truncated.\n2. **Transient field stripping**: `partialJson` and `jsonlEvents` are removed from persisted entries.\n3. **Image externalization to blobs**:\n - image blocks in `content` arrays are externalized when `data` is not already a blob ref and base64 length is at least threshold (`BLOB_EXTERNALIZE_THRESHOLD = 1024`),\n - provider-style `image_url` data URLs are externalized when they start with `data:image/` and contain `;base64,`,\n - image block `data` is stored as decoded binary bytes,\n - provider data URLs are stored as the original UTF-8 data URL string,\n - persisted values are replaced with `blob:sha256:<hash>`.\n\nThis keeps session JSONL compact while preserving recoverability.\n\n## 2) Session load rehydration path\n\nWhen opening a session (`setSessionFile`), after migrations, `SessionManager` runs `resolveBlobRefsInEntries()`.\n\nFor message/custom-message image blocks with `blob:sha256:<hash>` and for persisted provider `image_url` fields with blob refs:\n\n- reads blob bytes from blob store,\n- converts image-block bytes back to base64,\n- converts provider `image_url` blobs back to the original string,\n- mutates in-memory entry fields for runtime consumers.\n\nIf blob is missing:\n\n- `resolveImageData()` logs warning,\n- returns original ref string unchanged,\n- load continues (no hard crash).\n\n## 3) Tool output spill/truncation path\n\n`OutputSink` powers streaming output in bash/python/ssh and related executors.\n\nBehavior:\n\n1. Every chunk is sanitized and appended to in-memory tail buffer.\n2. When in-memory bytes exceed spill threshold (`DEFAULT_MAX_BYTES`, 50KB), sink marks output truncated.\n3. If an artifact path is available, sink opens a file writer and writes:\n - existing buffered content once,\n - all subsequent chunks.\n4. In-memory buffer is always trimmed to tail window for display.\n5. `dump()` returns summary including `artifactId` only when file sink was successfully created.\n\nPractical effect:\n\n- UI/tool return shows truncated tail,\n- full output is preserved in artifact file and referenced as `artifact://<id>`.\n\nIf file sink creation fails (I/O error, missing path, etc.), sink silently falls back to in-memory truncation only; full output is not persisted.\n\n## URL access model\n\n## `blob:` references\n\n`blob:sha256:<hash>` is a persistence reference inside session entry payloads, not an internal URL scheme handled by the router. Resolution is done by `SessionManager` during session load.\n\n## `artifact://<id>`\n\nHandled by `ArtifactProtocolHandler`:\n\n- requires active session artifact directory,\n- ID must be numeric,\n- resolves by matching filename prefix `<id>.`,\n- returns raw text (`text/plain`) from the matched `.log` file,\n- when missing, error includes list of available artifact IDs.\n\nMissing directory behavior:\n\n- if artifacts directory does not exist, throws `No artifacts directory found`.\n\n## `agent://<id>`\n\nHandled by `AgentProtocolHandler` over `<artifactsDir>/<id>.md`:\n\n- plain form returns markdown text,\n- `/path` or `?q=` forms perform JSON extraction,\n- path and query extraction cannot be combined,\n- if extraction requested, file content must parse as JSON.\n\nMissing directory behavior:\n\n- throws `No artifacts directory found`.\n\nMissing output behavior:\n\n- throws `Not found: <id>` with available IDs from existing `.md` files.\n\nRead tool integration:\n\n- `read` supports offset/limit pagination for non-extraction internal URL reads,\n- rejects `offset/limit` when `agent://` extraction is used.\n\n## Resume, fork, and move semantics\n\n## Resume\n\n- `ArtifactManager` scans existing `{id}.*.log` files on first allocation and continues numbering.\n- `AgentOutputManager` scans existing `.md` output IDs and continues numbering.\n- `SessionManager` rehydrates blob refs to base64 on load.\n\n## Fork\n\n`SessionManager.fork()` creates a new session file with new session ID and `parentSession` link, then returns old/new file paths. Artifact copying is handled by `AgentSession.fork()`:\n\n- attempts recursive copy of old artifact directory to new artifact directory,\n- missing old directory is tolerated,\n- non-ENOENT copy errors are logged as warnings and fork still completes.\n\nID implications after fork:\n\n- if copy succeeded, artifact counters in new session continue after max copied ID,\n- if copy failed/skipped, new session artifact IDs start from `0`.\n\nBlob implications after fork:\n\n- blobs are global and content-addressed, so no blob directory copy is required.\n\n## Move to new cwd\n\n`SessionManager.moveTo()` renames both session file and artifact directory to the new default session directory, with rollback logic if a later step fails. This preserves artifact identity while relocating session scope.\n\n## Failure handling and fallback paths\n\n| Case | Behavior |\n| -------------------------------------------------------- | --------------------------------------------------------------------- |\n| Blob file missing during rehydration | Warn and keep `blob:sha256:` ref string in-memory |\n| Blob read ENOENT via `BlobStore.get` | Returns `null` |\n| Artifact directory missing (`ArtifactManager.listFiles`) | Returns empty list (allocation can start fresh) |\n| Artifact directory missing (`artifact://` / `agent://`) | Throws explicit `No artifacts directory found` |\n| Artifact ID not found | Throws with available IDs listing |\n| OutputSink artifact writer init fails | Continues with tail-only truncation (no full-output artifact) |\n| No session file (some task paths) | Task tool falls back to temp artifacts directory for subagent outputs |\n\n## Binary blob externalization vs text-output artifacts\n\n- **Blob externalization** is for image payloads inside persisted session entry content and provider image data URLs; it replaces inline payload strings in JSONL with stable content refs.\n- **Artifacts** are plain text files for execution output and subagent output; they are addressable by session-local IDs through internal URLs.\n\nThe two systems intersect only indirectly (both reduce session JSONL bloat) but have different identity, lifetime, and retrieval paths.\n\n## Implementation files\n\n- [`src/session/blob-store.ts`](../packages/coding-agent/src/session/blob-store.ts) — blob reference format, hashing, put/get, externalize/resolve helpers.\n- [`src/session/artifacts.ts`](../packages/coding-agent/src/session/artifacts.ts) — session artifact directory model and numeric artifact ID/path allocation.\n- [`src/session/streaming-output.ts`](../packages/coding-agent/src/session/streaming-output.ts) — `OutputSink` truncation/spill-to-file behavior and summary metadata.\n- [`src/session/session-manager.ts`](../packages/coding-agent/src/session/session-manager.ts) — persistence transforms, blob rehydration on load, session fork/move interactions.\n- [`src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts) — artifact directory copy during interactive fork.\n- [`src/internal-urls/artifact-protocol.ts`](../packages/coding-agent/src/internal-urls/artifact-protocol.ts) — `artifact://` resolver.\n- [`src/internal-urls/agent-protocol.ts`](../packages/coding-agent/src/internal-urls/agent-protocol.ts) — `agent://` resolver + JSON extraction.\n- [`src/sdk.ts`](../packages/coding-agent/src/sdk.ts) — internal URL router wiring and artifacts-dir resolver.\n- [`src/task/output-manager.ts`](../packages/coding-agent/src/task/output-manager.ts) — session-scoped agent output ID allocation for `agent://`.\n- [`src/task/executor.ts`](../packages/coding-agent/src/task/executor.ts) — subagent output artifact writes (`<id>.md`) and temp artifact directory fallback.\n",
|
|
9
|
-
"compaction.md": "# Compaction and Branch Summaries\n\nCompaction and branch summaries are the two mechanisms that keep long sessions usable without losing prior work context.\n\n- **Compaction** rewrites old history into a summary on the current branch.\n- **Branch summary** captures abandoned branch context during `/tree` navigation.\n\nBoth are persisted as session entries and converted back into user-context messages when rebuilding LLM input.\n\n## Key implementation files\n\n- `src/session/compaction/compaction.ts`\n- `src/session/compaction/branch-summarization.ts`\n- `src/session/compaction/pruning.ts`\n- `src/session/compaction/utils.ts`\n- `src/session/session-manager.ts`\n- `src/session/agent-session.ts`\n- `src/session/messages.ts`\n- `src/extensibility/hooks/types.ts`\n- `src/config/settings-schema.ts`\n\n## Session entry model\n\nCompaction and branch summaries are first-class session entries, not plain assistant/user messages.\n\n- `CompactionEntry`\n - `type: \"compaction\"`\n - `summary`, optional `shortSummary`\n - `firstKeptEntryId` (compaction boundary)\n - `tokensBefore`\n - optional `details`, `preserveData`, `fromExtension`\n- `BranchSummaryEntry`\n - `type: \"branch_summary\"`\n - `fromId`, `summary`\n - optional `details`, `fromExtension`\n\nWhen context is rebuilt (`buildSessionContext`):\n\n1. Latest compaction on the active path is converted to one `compactionSummary` message.\n2. Kept entries from `firstKeptEntryId` to the compaction point are re-included.\n3. Later entries on the path are appended.\n4. `branch_summary` entries are converted to `branchSummary` messages.\n5. `custom_message` entries are converted to `custom` messages.\n\nThose custom roles are then transformed into LLM-facing user messages in `convertToLlm()` using the static templates:\n\n- `prompts/compaction/compaction-summary-context.md`\n- `prompts/compaction/branch-summary-context.md`\n\n## Compaction pipeline\n\n### Triggers\n\nCompaction/context maintenance can run in four ways:\n\n1. **Manual context compaction**: `/compact [instructions]` calls `AgentSession.compact(...)`.\n2. **Automatic overflow recovery**: after a same-model assistant error that matches context overflow.\n3. **Automatic threshold maintenance**: after a successful turn when context exceeds the resolved threshold.\n4. **Idle maintenance**: `runIdleCompaction()` can invoke the same auto-maintenance path with reason `\"idle\"`.\n\n### Compaction shape (visual)\n\n```text\nBefore compaction:\n\n entry: 0 1 2 3 4 5 6 7 8 9\n ┌─────┬─────┬─────┬──────┬─────┬─────┬──────┬──────┬─────┬──────┐\n │ hdr │ usr │ ass │ tool │ usr │ ass │ tool │ tool │ ass │ tool │\n └─────┴─────┴─────┴──────┴─────┴─────┴──────┴──────┴─────┴──────┘\n └────────┬───────┘ └──────────────┬──────────────┘\n messagesToSummarize kept messages\n ↑\n firstKeptEntryId (entry 4)\n\nAfter compaction (new entry appended):\n\n entry: 0 1 2 3 4 5 6 7 8 9 10\n ┌─────┬─────┬─────┬──────┬─────┬─────┬──────┬──────┬─────┬──────┬─────┐\n │ hdr │ usr │ ass │ tool │ usr │ ass │ tool │ tool │ ass │ tool │ cmp │\n └─────┴─────┴─────┴──────┴─────┴─────┴──────┴──────┴─────┴──────┴─────┘\n └──────────┬──────┘ └──────────────────────┬───────────────────┘\n not sent to LLM sent to LLM\n ↑\n starts from firstKeptEntryId\n\nWhat the LLM sees:\n\n ┌────────┬─────────┬─────┬─────┬──────┬──────┬─────┬──────┐\n │ system │ summary │ usr │ ass │ tool │ tool │ ass │ tool │\n └────────┴─────────┴─────┴─────┴──────┴──────┴─────┴──────┘\n ↑ ↑ └─────────────────┬────────────────┘\n prompt from cmp messages from firstKeptEntryId\n```\n\n### Overflow-retry vs threshold/idle maintenance\n\nThe automatic paths are intentionally different:\n\n- **Overflow recovery**\n - Trigger: current-model assistant error is detected as context overflow and the error is not older than the latest compaction.\n - The failing assistant error message is removed from active agent state before retry.\n - Context promotion is tried first; if a configured larger model is available, the agent switches model and retries without compacting.\n - If promotion is unavailable and compaction is enabled, context-full compaction runs with `reason: \"overflow\"` and `willRetry: true`; handoff strategy is not used for overflow.\n - On success, agent auto-continues (`agent.continue()`) after compaction.\n\n- **Threshold maintenance**\n - Trigger: successful, non-error assistant message whose adjusted context tokens exceed `resolveThresholdTokens(...)`.\n - Tool-output pruning can reduce the measured token count before threshold comparison.\n - Context promotion is tried before compaction.\n - If promotion is unavailable, auto maintenance runs with `reason: \"threshold\"` and `willRetry: false`.\n - With `compaction.strategy: \"handoff\"`, threshold maintenance starts a new handoff session instead of writing a compaction entry; if handoff returns no document without aborting, it falls back to context-full compaction.\n - On success, if `compaction.autoContinue !== false`, schedules an agent-authored developer auto-continue prompt from `prompts/system/auto-continue.md`.\n\n- **Idle maintenance**\n - Trigger: `runIdleCompaction()` when not streaming or already compacting.\n - Uses `reason: \"idle\"` and does not auto-continue afterward.\n\n### Pre-compaction pruning\n\nBefore compaction checks, tool-result pruning may run (`pruneToolOutputs`).\n\nDefault prune policy:\n\n- Protect newest `40_000` tool-output tokens.\n- Require at least `20_000` total estimated savings.\n- Never prune tool results from `skill` or `read`.\n\nPruned tool results are replaced with:\n\n- `[Output truncated - N tokens]`\n\nIf pruning changes entries, session storage is rewritten and agent message state is refreshed before compaction decisions.\n\n### Boundary and cut-point logic\n\n`prepareCompaction()` only considers entries since the last compaction entry (if any).\n\n1. Find previous compaction index.\n2. Compute `boundaryStart = prevCompactionIndex + 1`.\n3. Adapt `keepRecentTokens` using measured usage ratio when available.\n4. Run `findCutPoint()` over the boundary window.\n\nValid cut points include:\n\n- message entries with roles: `user`, `assistant`, `bashExecution`, `hookMessage`, `branchSummary`, `compactionSummary`\n- `custom_message` entries\n- `branch_summary` entries\n\nHard rule: never cut at `toolResult`.\n\nIf there are non-message metadata entries immediately before the cut point (`model_change`, `thinking_level_change`, labels, etc.), they are pulled into the kept region by moving cut index backward until a message or compaction boundary is hit.\n\n### Split-turn handling\n\nIf cut point is not at a user-turn start, compaction treats it as a split turn.\n\nTurn start detection treats these as user-turn boundaries:\n\n- `message.role === \"user\"`\n- `message.role === \"bashExecution\"`\n- `custom_message` entry\n- `branch_summary` entry\n\nSplit-turn compaction generates two summaries:\n\n1. History summary (`messagesToSummarize`)\n2. Turn-prefix summary (`turnPrefixMessages`)\n\nFinal stored summary is merged as:\n\n```markdown\n<history summary>\n\n---\n\n**Turn Context (split turn):**\n\n<turn prefix summary>\n```\n\n### Summary generation\n\n`compact(...)` builds summaries from serialized conversation text:\n\n1. Convert messages via `convertToLlm()`.\n2. Serialize with `serializeConversation()`.\n3. Wrap in `<conversation>...</conversation>`.\n4. Optionally include `<previous-summary>...</previous-summary>`.\n5. Optionally inject hook context as `<additional-context>` list.\n6. Execute summarization prompt with `SUMMARIZATION_SYSTEM_PROMPT`.\n\nPrompt selection:\n\n- first compaction: `compaction-summary.md`\n- iterative compaction with prior summary: `compaction-update-summary.md`\n- split-turn second pass: `compaction-turn-prefix.md`\n- short UI summary: `compaction-short-summary.md`\n\nRemote summarization modes:\n\n- If `compaction.remoteEndpoint` is set and remote compaction is enabled, local summary generation POSTs:\n - `{ systemPrompt, prompt }`\n- Expects JSON containing at least `{ summary }`.\n- For OpenAI/OpenAI Codex models, compaction first tries the provider-native `/responses/compact` endpoint when remote compaction is enabled. It preserves provider replacement history in `preserveData.openaiRemoteCompaction` and falls back to local summarization if that native request fails.\n\n### File-operation context in summaries\n\nCompaction tracks cumulative file activity using assistant tool calls:\n\n- `read(path)` → read set\n- `write(path)` → modified set\n- `edit(path)` → modified set\n\nCumulative behavior:\n\n- Includes prior compaction details only when prior entry is pi-generated (`fromExtension !== true`).\n- In split turns, includes turn-prefix file ops too.\n- `readFiles` excludes files also modified.\n\nSummary text gets file tags appended via prompt template:\n\n```xml\n<read-files>\n...\n</read-files>\n<modified-files>\n...\n</modified-files>\n```\n\n### Persist and reload\n\nAfter summary generation (or hook-provided summary), agent session:\n\n1. Appends `CompactionEntry` with `appendCompaction(...)` for context-full maintenance; handoff strategy creates a new session and injects a handoff `custom_message` instead.\n2. Rebuilds display context from the active leaf via `buildDisplaySessionContext()`.\n3. Replaces live agent messages with rebuilt context.\n4. Emits `session_compact` hook event.\n\n## Branch summarization pipeline\n\nBranch summarization is tied to tree navigation, not token overflow.\n\n### Trigger\n\nDuring `navigateTree(...)`:\n\n1. Compute abandoned entries from old leaf to common ancestor using `collectEntriesForBranchSummary(...)`.\n2. If caller requested summary (`options.summarize`), generate summary before switching leaf.\n3. If summary exists, attach it at the navigation target using `branchWithSummary(...)`.\n\nOperationally this is commonly driven by `/tree` flow when `branchSummary.enabled` is enabled.\n\n### Branch switch shape (visual)\n\n```text\nTree before navigation:\n\n ┌─ B ─ C ─ D (old leaf, being abandoned)\n A ───┤\n └─ E ─ F (target)\n\nCommon ancestor: A\nEntries to summarize: B, C, D\n\nAfter navigation with summary:\n\n ┌─ B ─ C ─ D ─ [summary of B,C,D]\n A ───┤\n └─ E ─ F (new leaf)\n```\n\n### Preparation and token budget\n\n`generateBranchSummary(...)` computes budget as:\n\n- `tokenBudget = model.contextWindow - branchSummary.reserveTokens`\n\n`prepareBranchEntries(...)` then:\n\n1. First pass: collect cumulative file ops from all summarized entries, including prior pi-generated `branch_summary` details.\n2. Second pass: walk newest → oldest, adding messages until token budget is reached.\n3. Prefer preserving recent context.\n4. May still include large summary entries near budget edge for continuity.\n\nCompaction entries are included as messages (`compactionSummary`) during branch summarization input.\n\n### Summary generation and persistence\n\nBranch summarization:\n\n1. Converts and serializes selected messages.\n2. Wraps in `<conversation>`.\n3. Uses custom instructions if supplied, otherwise `branch-summary.md`.\n4. Calls summarization model with `SUMMARIZATION_SYSTEM_PROMPT`.\n5. Prepends `branch-summary-preamble.md`.\n6. Appends file-operation tags.\n\nResult is stored as `BranchSummaryEntry` with optional details (`readFiles`, `modifiedFiles`).\n\n## Extension and hook touchpoints\n\n### `session_before_compact`\n\nPre-compaction hook.\n\nCan:\n\n- cancel compaction (`{ cancel: true }`)\n- provide full custom compaction payload (`{ compaction: CompactionResult }`)\n\n### `session.compacting`\n\nPrompt/context customization hook for default compaction.\n\nCan return:\n\n- `prompt` (override base summary prompt)\n- `context` (extra context lines injected into `<additional-context>`)\n- `preserveData` (stored on compaction entry)\n\n### `session_compact`\n\nPost-compaction notification with saved `compactionEntry` and `fromExtension` flag.\n\n### `session_before_tree`\n\nRuns on tree navigation before default branch summary generation.\n\nCan:\n\n- cancel navigation\n- provide custom `{ summary: { summary, details } }` used when user requested summarization\n\n### `session_tree`\n\nPost-navigation event exposing new/old leaf and optional summary entry.\n\n## Runtime behavior and failure semantics\n\n- Manual compaction aborts current agent operation first.\n- `abortCompaction()` cancels both manual and auto-compaction controllers.\n- Auto compaction emits start/end session events for UI/state updates.\n- Auto compaction can try multiple model candidates and retry transient failures; long retry delays prefer the next candidate when one is available.\n- Overflow errors are excluded from generic retry path because they are handled by context promotion/compaction.\n- If auto-compaction fails:\n - overflow path emits `Context overflow recovery failed: ...`\n - threshold path emits `Auto-compaction failed: ...`\n- Branch summarization can be cancelled via abort signal (e.g., Escape), returning canceled/aborted navigation result.\n\n## Settings and defaults\n\nFrom `settings-schema.ts`:\n\n- `compaction.enabled` = `true`\n- `compaction.strategy` = `\"context-full\"` (`\"handoff\"` and `\"off\"` are also supported)\n- `compaction.reserveTokens` = `16384`\n- `compaction.keepRecentTokens` = `20000`\n- `compaction.autoContinue` = `true`\n- `compaction.remoteEnabled` = `true`\n- `compaction.remoteEndpoint` = `undefined`\n- `compaction.thresholdPercent` = `-1` and `compaction.thresholdTokens` = `-1`; when no positive override is set, the threshold is `contextWindow - max(15% of contextWindow, reserveTokens)`\n- `compaction.idleEnabled` = `true`\n- `branchSummary.enabled` = `false`\n- `branchSummary.reserveTokens` = `16384`\n\nThese values are consumed at runtime by `AgentSession` and compaction/branch summarization modules.\n",
|
|
9
|
+
"compaction.md": "# Compaction and Branch Summaries\n\nCompaction and branch summaries are the two mechanisms that keep long sessions usable without losing prior work context.\n\n- **Compaction** rewrites old history into a summary on the current branch.\n- **Branch summary** captures abandoned branch context during `/tree` navigation.\n\nBoth are persisted as session entries and converted back into user-context messages when rebuilding LLM input.\n\n## Key implementation files\n\n- `packages/agent/src/compaction/compaction.ts` (context-full summarization and handoff generation)\n- `packages/agent/src/compaction/branch-summarization.ts`\n- `packages/agent/src/compaction/pruning.ts`\n- `packages/agent/src/compaction/utils.ts`\n- `packages/agent/src/compaction/openai.ts`\n- `packages/coding-agent/src/session/session-manager.ts`\n- `packages/coding-agent/src/session/agent-session.ts`\n- `packages/coding-agent/src/session/messages.ts`\n- `packages/coding-agent/src/extensibility/hooks/types.ts`\n- `packages/coding-agent/src/config/settings-schema.ts`\n\n## Session entry model\n\nCompaction and branch summaries are first-class session entries, not plain assistant/user messages.\n\n- `CompactionEntry`\n - `type: \"compaction\"`\n - `summary`, optional `shortSummary`\n - `firstKeptEntryId` (compaction boundary)\n - `tokensBefore`\n - optional `details`, `preserveData`, `fromExtension`\n- `BranchSummaryEntry`\n - `type: \"branch_summary\"`\n - `fromId`, `summary`\n - optional `details`, `fromExtension`\n\nWhen context is rebuilt (`buildSessionContext`):\n\n1. Latest compaction on the active path is converted to one `compactionSummary` message.\n2. Kept entries from `firstKeptEntryId` to the compaction point are re-included.\n3. Later entries on the path are appended.\n4. `branch_summary` entries are converted to `branchSummary` messages.\n5. `custom_message` entries are converted to `custom` messages.\n\nThose custom roles are then transformed into LLM-facing user messages in `convertToLlm()` using the static templates:\n\n- `packages/agent/src/compaction/prompts/compaction-summary-context.md`\n- `packages/agent/src/compaction/prompts/branch-summary-context.md`\n- `packages/agent/src/compaction/prompts/handoff-document.md`\n\n## Compaction pipeline\n\n### Triggers\n\nCompaction/context maintenance can run in four ways:\n\n1. **Manual context compaction**: `/compact [instructions]` calls `AgentSession.compact(...)`.\n2. **Automatic overflow recovery**: after a same-model assistant error that matches context overflow.\n3. **Automatic threshold maintenance**: after a successful turn when context exceeds the resolved threshold.\n4. **Idle maintenance**: `runIdleCompaction()` can invoke the same auto-maintenance path with reason `\"idle\"`.\n\n### Compaction shape (visual)\n\n```text\nBefore compaction:\n\n entry: 0 1 2 3 4 5 6 7 8 9\n ┌─────┬─────┬─────┬──────┬─────┬─────┬──────┬──────┬─────┬──────┐\n │ hdr │ usr │ ass │ tool │ usr │ ass │ tool │ tool │ ass │ tool │\n └─────┴─────┴─────┴──────┴─────┴─────┴──────┴──────┴─────┴──────┘\n └────────┬───────┘ └──────────────┬──────────────┘\n messagesToSummarize kept messages\n ↑\n firstKeptEntryId (entry 4)\n\nAfter compaction (new entry appended):\n\n entry: 0 1 2 3 4 5 6 7 8 9 10\n ┌─────┬─────┬─────┬──────┬─────┬─────┬──────┬──────┬─────┬──────┬─────┐\n │ hdr │ usr │ ass │ tool │ usr │ ass │ tool │ tool │ ass │ tool │ cmp │\n └─────┴─────┴─────┴──────┴─────┴─────┴──────┴──────┴─────┴──────┴─────┘\n └──────────┬──────┘ └──────────────────────┬───────────────────┘\n not sent to LLM sent to LLM\n ↑\n starts from firstKeptEntryId\n\nWhat the LLM sees:\n\n ┌────────┬─────────┬─────┬─────┬──────┬──────┬─────┬──────┐\n │ system │ summary │ usr │ ass │ tool │ tool │ ass │ tool │\n └────────┴─────────┴─────┴─────┴──────┴──────┴─────┴──────┘\n ↑ ↑ └─────────────────┬────────────────┘\n prompt from cmp messages from firstKeptEntryId\n```\n\n### Overflow-retry vs threshold/idle maintenance\n\nThe automatic paths are intentionally different:\n\n- **Overflow recovery**\n - Trigger: current-model assistant error is detected as context overflow and the error is not older than the latest compaction.\n - The failing assistant error message is removed from active agent state before retry.\n - Context promotion is tried first; if a configured larger model is available, the agent switches model and retries without compacting.\n - If promotion is unavailable and compaction is enabled, context-full compaction runs with `reason: \"overflow\"` and `willRetry: true`; handoff strategy is not used for overflow.\n - On success, agent auto-continues (`agent.continue()`) after compaction.\n\n- **Threshold maintenance**\n - Trigger: successful, non-error assistant message whose adjusted context tokens exceed `resolveThresholdTokens(...)`.\n - Tool-output pruning can reduce the measured token count before threshold comparison.\n - Context promotion is tried before compaction.\n - If promotion is unavailable, auto maintenance runs with `reason: \"threshold\"` and `willRetry: false`.\n - With `compaction.strategy: \"handoff\"`, threshold maintenance starts a new handoff session instead of writing a compaction entry; if handoff returns no document without aborting, it falls back to context-full compaction.\n - On success, if `compaction.autoContinue !== false`, schedules an agent-authored developer auto-continue prompt from `prompts/system/auto-continue.md`.\n\n- **Idle maintenance**\n - Trigger: `runIdleCompaction()` when not streaming or already compacting.\n - Uses `reason: \"idle\"` and does not auto-continue afterward.\n\n### Pre-compaction pruning\n\nBefore compaction checks, tool-result pruning may run (`pruneToolOutputs`).\n\nDefault prune policy:\n\n- Protect newest `40_000` tool-output tokens.\n- Require at least `20_000` total estimated savings.\n- Never prune tool results from `skill` or `read`.\n\nPruned tool results are replaced with:\n\n- `[Output truncated - N tokens]`\n\nIf pruning changes entries, session storage is rewritten and agent message state is refreshed before compaction decisions.\n\n### Boundary and cut-point logic\n\n`prepareCompaction()` only considers entries since the last compaction entry (if any).\n\n1. Find previous compaction index.\n2. Compute `boundaryStart = prevCompactionIndex + 1`.\n3. Adapt `keepRecentTokens` using measured usage ratio when available.\n4. Run `findCutPoint()` over the boundary window.\n\nValid cut points include:\n\n- message entries with roles: `user`, `assistant`, `bashExecution`, `hookMessage`, `branchSummary`, `compactionSummary`\n- `custom_message` entries\n- `branch_summary` entries\n\nHard rule: never cut at `toolResult`.\n\nIf there are non-message metadata entries immediately before the cut point (`model_change`, `thinking_level_change`, labels, etc.), they are pulled into the kept region by moving cut index backward until a message or compaction boundary is hit.\n\n### Split-turn handling\n\nIf cut point is not at a user-turn start, compaction treats it as a split turn.\n\nTurn start detection treats these as user-turn boundaries:\n\n- `message.role === \"user\"`\n- `message.role === \"bashExecution\"`\n- `custom_message` entry\n- `branch_summary` entry\n\nSplit-turn compaction generates two summaries:\n\n1. History summary (`messagesToSummarize`)\n2. Turn-prefix summary (`turnPrefixMessages`)\n\nFinal stored summary is merged as:\n\n```markdown\n<history summary>\n\n---\n\n**Turn Context (split turn):**\n\n<turn prefix summary>\n```\n\n### Summary generation\n\n`compact(...)` builds summaries from serialized conversation text:\n\n1. Convert messages via `convertToLlm()`.\n2. Serialize with `serializeConversation()`.\n3. Wrap in `<conversation>...</conversation>`.\n4. Optionally include `<previous-summary>...</previous-summary>`.\n5. Optionally inject hook context as `<additional-context>` list.\n6. Execute summarization prompt with `SUMMARIZATION_SYSTEM_PROMPT`.\n\nPrompt selection:\n\n- first compaction: `compaction-summary.md`\n- iterative compaction with prior summary: `compaction-update-summary.md`\n- split-turn second pass: `compaction-turn-prefix.md`\n- short UI summary: `compaction-short-summary.md`\n- handoff document: `handoff-document.md` (used by `generateHandoff(...)`, not serialized compaction)\n\nRemote summarization modes:\n\n- If `compaction.remoteEndpoint` is set and remote compaction is enabled, local summary generation POSTs:\n - `{ systemPrompt, prompt }`\n- Expects JSON containing at least `{ summary }`.\n- For OpenAI/OpenAI Codex models, compaction first tries the provider-native `/responses/compact` endpoint when remote compaction is enabled. It preserves provider replacement history in `preserveData.openaiRemoteCompaction` and falls back to local summarization if that native request fails.\n\n### Handoff generation\n\n`packages/agent/src/compaction/compaction.ts` also exports `generateHandoff(...)`. Handoff generation uses the same `completeSimple(...)` oneshot style as summarization, but it preserves the live agent cache prefix by sending the active system prompt, tool array, and real LLM message history, then appending one agent-attributed `user` message containing the handoff prompt. It forces `toolChoice: \"none\"` and returns joined text blocks directly.\n\nHandoff does not write a `CompactionEntry`. `AgentSession.handoff()` owns the session transition: it starts a new session, injects the generated document as a visible `custom_message` with `customType: \"handoff\"`, and rebuilds agent messages from that new session.\n\n### File-operation context in summaries\n\nCompaction tracks cumulative file activity using assistant tool calls:\n\n- `read(path)` → read set\n- `write(path)` → modified set\n- `edit(path)` → modified set\n\nCumulative behavior:\n\n- Includes prior compaction details only when prior entry is pi-generated (`fromExtension !== true`).\n- In split turns, includes turn-prefix file ops too.\n- `readFiles` excludes files also modified.\n\nSummary text gets file tags appended via prompt template:\n\n```xml\n<read-files>\n...\n</read-files>\n<modified-files>\n...\n</modified-files>\n```\n\n### Persist and reload\n\nAfter summary generation (or hook-provided summary), agent session:\n\n1. Appends `CompactionEntry` with `appendCompaction(...)` for context-full maintenance; handoff strategy creates a new session and injects a handoff `custom_message` instead.\n2. Rebuilds display context from the active leaf via `buildDisplaySessionContext()`.\n3. Replaces live agent messages with rebuilt context.\n4. Emits `session_compact` hook event.\n\n## Branch summarization pipeline\n\nBranch summarization is tied to tree navigation, not token overflow.\n\n### Trigger\n\nDuring `navigateTree(...)`:\n\n1. Compute abandoned entries from old leaf to common ancestor using `collectEntriesForBranchSummary(...)`.\n2. If caller requested summary (`options.summarize`), generate summary before switching leaf.\n3. If summary exists, attach it at the navigation target using `branchWithSummary(...)`.\n\nOperationally this is commonly driven by `/tree` flow when `branchSummary.enabled` is enabled.\n\n### Branch switch shape (visual)\n\n```text\nTree before navigation:\n\n ┌─ B ─ C ─ D (old leaf, being abandoned)\n A ───┤\n └─ E ─ F (target)\n\nCommon ancestor: A\nEntries to summarize: B, C, D\n\nAfter navigation with summary:\n\n ┌─ B ─ C ─ D ─ [summary of B,C,D]\n A ───┤\n └─ E ─ F (new leaf)\n```\n\n### Preparation and token budget\n\n`generateBranchSummary(...)` computes budget as:\n\n- `tokenBudget = model.contextWindow - branchSummary.reserveTokens`\n\n`prepareBranchEntries(...)` then:\n\n1. First pass: collect cumulative file ops from all summarized entries, including prior pi-generated `branch_summary` details.\n2. Second pass: walk newest → oldest, adding messages until token budget is reached.\n3. Prefer preserving recent context.\n4. May still include large summary entries near budget edge for continuity.\n\nCompaction entries are included as messages (`compactionSummary`) during branch summarization input.\n\n### Summary generation and persistence\n\nBranch summarization:\n\n1. Converts and serializes selected messages.\n2. Wraps in `<conversation>`.\n3. Uses custom instructions if supplied, otherwise `branch-summary.md`.\n4. Calls summarization model with `SUMMARIZATION_SYSTEM_PROMPT`.\n5. Prepends `branch-summary-preamble.md`.\n6. Appends file-operation tags.\n\nResult is stored as `BranchSummaryEntry` with optional details (`readFiles`, `modifiedFiles`).\n\n## Extension and hook touchpoints\n\n### `session_before_compact`\n\nPre-compaction hook.\n\nCan:\n\n- cancel compaction (`{ cancel: true }`)\n- provide full custom compaction payload (`{ compaction: CompactionResult }`)\n\n### `session.compacting`\n\nPrompt/context customization hook for default compaction.\n\nCan return:\n\n- `prompt` (override base summary prompt)\n- `context` (extra context lines injected into `<additional-context>`)\n- `preserveData` (stored on compaction entry)\n\n### `session_compact`\n\nPost-compaction notification with saved `compactionEntry` and `fromExtension` flag.\n\n### `session_before_tree`\n\nRuns on tree navigation before default branch summary generation.\n\nCan:\n\n- cancel navigation\n- provide custom `{ summary: { summary, details } }` used when user requested summarization\n\n### `session_tree`\n\nPost-navigation event exposing new/old leaf and optional summary entry.\n\n## Runtime behavior and failure semantics\n\n- Manual compaction aborts current agent operation first.\n- `abortCompaction()` cancels both manual and auto-compaction controllers.\n- Auto compaction emits start/end session events for UI/state updates.\n- Auto compaction can try multiple model candidates and retry transient failures; long retry delays prefer the next candidate when one is available.\n- Overflow errors are excluded from generic retry path because they are handled by context promotion/compaction.\n- If auto-compaction fails:\n - overflow path emits `Context overflow recovery failed: ...`\n - threshold path emits `Auto-compaction failed: ...`\n- Branch summarization can be cancelled via abort signal (e.g., Escape), returning canceled/aborted navigation result.\n\n## Settings and defaults\n\nFrom `settings-schema.ts`:\n\n- `compaction.enabled` = `true`\n- `compaction.strategy` = `\"context-full\"` (`\"handoff\"` and `\"off\"` are also supported)\n- `compaction.reserveTokens` = `16384`\n- `compaction.keepRecentTokens` = `20000`\n- `compaction.autoContinue` = `true`\n- `compaction.remoteEnabled` = `true`\n- `compaction.remoteEndpoint` = `undefined`\n- `compaction.thresholdPercent` = `-1` and `compaction.thresholdTokens` = `-1`; when no positive override is set, the threshold is `contextWindow - max(15% of contextWindow, reserveTokens)`\n- `compaction.idleEnabled` = `true`\n- `branchSummary.enabled` = `false`\n- `branchSummary.reserveTokens` = `16384`\n\nThese values are consumed at runtime by `AgentSession` and compaction/branch summarization modules.\n",
|
|
10
10
|
"config-usage.md": "# Configuration Discovery and Resolution\n\nThis document describes how the coding-agent resolves configuration today: which roots are scanned, how precedence works, and how resolved config is consumed by settings, skills, hooks, tools, and extensions.\n\n## Scope\n\nPrimary implementation:\n\n- `packages/coding-agent/src/config.ts`\n- `packages/coding-agent/src/config/settings.ts`\n- `packages/coding-agent/src/config/settings-schema.ts`\n- `packages/coding-agent/src/discovery/builtin.ts`\n- `packages/coding-agent/src/discovery/helpers.ts`\n\nKey integration points:\n\n- `packages/coding-agent/src/capability/index.ts`\n- `packages/coding-agent/src/discovery/index.ts`\n- `packages/coding-agent/src/extensibility/skills.ts`\n- `packages/coding-agent/src/extensibility/hooks/loader.ts`\n- `packages/coding-agent/src/extensibility/custom-tools/loader.ts`\n- `packages/coding-agent/src/extensibility/extensions/loader.ts`\n\n---\n\n## Resolution flow (visual)\n\n```text\n Generic helper order (`config.ts`)\n┌───────────────────────────────────────┐\n│ 1) ~/.omp/agent, ~/.claude, ... │\n│ 2) <cwd>/.omp, <cwd>/.claude, ... │\n└───────────────────────────────────────┘\n │\n ▼\n capability providers enumerate items\n (native provider scans project .omp before user .omp;\n other providers have their own loading rules)\n │\n ▼\n provider priority sort + capability dedup\n │\n ▼\n subsystem-specific consumption\n (settings, skills, hooks, tools, extensions)\n```\n\n## 1) Config roots and source order\n\n## Canonical roots\n\n`src/config.ts` defines a fixed source priority list:\n\n1. `.omp` (native)\n2. `.claude`\n3. `.codex`\n4. `.gemini`\n\nUser-level bases:\n\n- `~/.omp/agent`\n- `~/.claude`\n- `~/.codex`\n- `~/.gemini`\n\nProject-level bases:\n\n- `<cwd>/.omp`\n- `<cwd>/.claude`\n- `<cwd>/.codex`\n- `<cwd>/.gemini`\n\n`CONFIG_DIR_NAME` is `.omp` (`packages/utils/src/dirs.ts`).\n\n## Important constraint\n\nThe generic helpers in `src/config.ts` do **not** include `.pi` in source discovery order.\n\n---\n\n## 2) Core discovery helpers (`src/config.ts`)\n\n## `getConfigDirs(subpath, options)`\n\nReturns ordered entries:\n\n- User-level entries first (by source priority)\n- Then project-level entries (by same source priority)\n\nOptions:\n\n- `user` (default `true`)\n- `project` (default `true`)\n- `cwd` (default `getProjectDir()`)\n- `existingOnly` (default `false`)\n\nThis API is used for directory-based config lookups (commands, hooks, tools, agents, etc.).\n\n## `findConfigFile(subpath, options)` / `findConfigFileWithMeta(...)`\n\nSearches for the first existing file across ordered bases, returns first match (path-only or path+metadata).\n\n## `findAllNearestProjectConfigDirs(subpath, cwd)`\n\nWalks parent directories upward and returns the **nearest existing directory per source base** (`.omp`, `.claude`, `.codex`, `.gemini`), then sorts results by source priority.\n\nUse this when project config should be inherited from ancestor directories (monorepo/nested workspace behavior).\n\n---\n\n## 3) File config wrapper (`ConfigFile<T>` in `src/config.ts`)\n\n`ConfigFile<T>` is the schema-validated loader for single config files.\n\nSupported formats:\n\n- `.yml` / `.yaml`\n- `.json` / `.jsonc`\n\nBehavior:\n\n- Validates parsed data with AJV against a provided TypeBox schema.\n- Caches load result until `invalidate()`.\n- Returns tri-state result via `tryLoad()`:\n - `ok`\n - `not-found`\n - `error` (`ConfigError` with schema/parse context)\n\nLegacy migration still supported:\n\n- If target path is `.yml`/`.yaml`, a sibling `.json` is auto-migrated once (`migrateJsonToYml`).\n\n---\n\n## 4) Settings resolution model (`src/config/settings.ts`)\n\nThe runtime settings model is layered:\n\n1. Global settings: `~/.omp/agent/config.yml`\n2. Project settings: discovered via settings capability (`settings.json` from providers)\n3. Runtime overrides: in-memory, non-persistent\n4. Schema defaults: from `SETTINGS_SCHEMA`\n\nEffective read path:\n\n`defaults <- global <- project <- overrides`\n\nWrite behavior:\n\n- `settings.set(...)` writes to the **global** layer (`config.yml`) and queues background save.\n- Project settings are read-only from capability discovery.\n\n## Migration behavior still active\n\nOn startup, if `config.yml` is missing:\n\n1. Migrate from `~/.omp/agent/settings.json` (renamed to `.bak` on success)\n2. Merge with legacy DB settings from `agent.db`\n3. Write merged result to `config.yml`\n\nField-level migrations in `#migrateRawSettings`:\n\n- `queueMode` -> `steeringMode`\n- `ask.timeout` milliseconds -> seconds when old value looks like ms (`> 1000`)\n- Legacy flat `theme: \"...\"` -> `theme.dark/theme.light` structure\n\n---\n\n## 5) Capability/discovery integration\n\nMost non-core config loading flows through the capability registry (`src/capability/index.ts` + `src/discovery/index.ts`).\n\n## Provider ordering\n\nProviders are sorted by numeric priority (higher first). Example priorities:\n\n- Native OMP (`builtin.ts`): `100`\n- Claude: `80`\n- Codex / agents / Claude marketplace: `70`\n- Gemini: `60`\n\n```text\nProvider precedence (higher wins)\n\nnative (.omp) priority 100\nclaude priority 80\ncodex / agents / ... priority 70\ngemini priority 60\n```\n\n## Dedup semantics\n\nCapabilities define a `key(item)`:\n\n- same key => first item wins (higher-priority/earlier-loaded item)\n- no key (`undefined`) => no dedup, all items retained\n\nRelevant keys:\n\n- skills: `name`\n- tools: `name`\n- hooks: `${type}:${tool}:${name}`\n- extension modules: `name`\n- extensions: `name`\n- settings: no dedup (all items preserved)\n\n---\n\n## 6) Native `.omp` provider behavior (`packages/coding-agent/src/discovery/builtin.ts`)\n\nNative provider (`id: native`) reads native config from:\n\n- project: `<cwd>/.omp/...`\n- user: `~/.omp/agent/...`\n\n### Directory admission rules\n\n- Slash commands, rules, prompts, instructions, hooks, tools, extensions, extension modules, and settings use a project/user root only when the root directory exists and is non-empty.\n- Skills scan `<ancestor>/.omp/skills` for each ancestor from the current working directory up to the repo root/home boundary, plus `~/.omp/agent/skills`, without requiring the root `.omp` directory itself to be non-empty.\n- `SYSTEM.md` and `AGENTS.md` read user-level files directly and use nearest-ancestor project `.omp` lookup for project files, but the project `.omp` directory must be non-empty.\n\n### Scope-specific loading\n\n- Skills: `<ancestor>/.omp/skills/*/SKILL.md` and `~/.omp/agent/skills/*/SKILL.md`\n- Slash commands: `commands/*.md`\n- Rules: `rules/*.{md,mdc}`\n- Prompts: `prompts/*.md`\n- Instructions: `instructions/*.md`\n- Hooks: `hooks/pre/*`, `hooks/post/*`\n- Tools: `tools/*.{json,md,ts,js,sh,bash,py}` and `tools/<name>/index.ts`\n- Extension modules: discovered under `extensions/` (+ legacy `settings.json.extensions` string array)\n- Extensions: `extensions/<name>/gemini-extension.json`\n- Settings capability: `settings.json`\n\n### Nearest-project lookup nuance\n\n## For `SYSTEM.md` and `AGENTS.md`, native provider uses nearest-ancestor project `.omp` directory search (walk-up) and still requires the project `.omp` dir to be non-empty.\n\n## 7) How major subsystems consume config\n\n## Settings subsystem\n\n- `Settings.init()` loads global `config.yml` + discovered project `settings.json` capability items.\n- Only capability items with `level === \"project\"` are merged into project layer.\n\n## Skills subsystem\n\n- `extensibility/skills.ts` loads via `loadCapability(skillCapability.id, { cwd })`.\n- Applies source toggles and filters (`ignoredSkills`, `includeSkills`, custom dirs).\n- Legacy-named toggles still exist (`skills.enablePiUser`, `skills.enablePiProject`) but they gate the native provider (`provider === \"native\"`).\n\n## Hooks subsystem\n\n- `discoverAndLoadHooks()` resolves hook paths from hook capability + explicit configured paths.\n- Then loads modules via Bun import.\n\n## Tools subsystem\n\n- `discoverAndLoadCustomTools()` resolves tool paths from tool capability + plugin tool paths + explicit configured paths.\n- Declarative `.md/.json` tool files are metadata only; executable loading expects code modules.\n\n## Extensions subsystem\n\n- `discoverAndLoadExtensions()` resolves extension modules from extension-module capability plus explicit paths.\n- Current implementation intentionally keeps only capability items with `_source.provider === \"native\"` before loading.\n\n---\n\n## 8) Precedence rules to rely on\n\nUse this mental model:\n\n1. Source directory ordering from `config.ts` determines candidate path order.\n2. Capability provider priority determines cross-provider precedence.\n3. Capability key dedup determines collision behavior (first wins for keyed capabilities).\n4. Subsystem-specific merge logic can further change effective precedence (especially settings).\n\n### Settings-specific caveat\n\nSettings capability items are not deduplicated; `Settings.#loadProjectSettings()` deep-merges project items in returned order. Because merge applies later item values over earlier values, effective override behavior depends on provider emission order, not just capability key semantics.\n\n---\n\n## 9) Legacy/compatibility behaviors still present\n\n- `ConfigFile` JSON -> YAML migration for YAML-targeted files.\n- Settings migration from `settings.json` and `agent.db` to `config.yml`.\n- Settings key migrations (`queueMode`, `ask.timeout`, flat `theme`, `task.isolation.enabled`, `statusLine.plan_mode`).\n- Legacy setting names `skills.enablePiUser` / `skills.enablePiProject` are still active gates for native skill source.\n\nIf these compatibility paths are removed in code, update this document immediately; several runtime behaviors still depend on them today.\n",
|
|
11
|
-
"custom-tools.md": "# Custom Tools\n\nCustom tools are model-callable functions that plug into the same tool execution pipeline as built-in tools.\n\nA custom tool is a TypeScript/JavaScript module that exports a factory. The factory receives a host API (`CustomToolAPI`) and returns one tool or an array of tools.\n\n## What this is (and is not)\n\n- **Custom tool**: callable by the model during a turn (`execute` + TypeBox
|
|
12
|
-
"environment-variables.md": "# Environment Variables (Current Runtime Reference)\n\nThis reference is derived from current code paths in:\n\n- `packages/coding-agent/src/**`\n- `packages/ai/src/**` (provider/auth resolution used by coding-agent)\n- `packages/utils/src/**` and `packages/tui/src/**` where those vars directly affect coding-agent runtime\n\nIt documents only active behavior.\n\n## Resolution model and precedence\n\nMost runtime lookups use `$env` from `@oh-my-pi/pi-utils` (`packages/utils/src/env.ts`).\n\n`$env` loading order:\n\n1. Existing process environment (`Bun.env`)\n2. Project `.env` (`$PWD/.env`) for keys not already set\n3. Agent `.env` (`~/.omp/agent/.env`, respecting `PI_CONFIG_DIR` / `PI_CODING_AGENT_DIR`) for keys not already set\n4. Config-root `.env` (`~/.omp/.env`, respecting `PI_CONFIG_DIR`) for keys not already set\n5. Home `.env` (`~/.env`) for keys not already set\n\nAdditional rule inside each `.env` file: `OMP_*` keys are mirrored to `PI_*` keys in that parsed file.\n\n---\n\n## 1) Model/provider authentication\n\nThese are consumed via `getEnvApiKey()` (`packages/ai/src/stream.ts`) unless noted otherwise.\n\n### Core provider credentials\n\n| Variable | Used for | Required when | Notes / precedence |\n| ------------------------------- | ------------------------------------------------ | -------------------------------------------------------------- | --------------------------------------------------------------------------------------------------- |\n| `ANTHROPIC_OAUTH_TOKEN` | Anthropic API auth | Using Anthropic with OAuth token auth | Takes precedence over `ANTHROPIC_API_KEY` for provider auth resolution |\n| `ANTHROPIC_API_KEY` | Anthropic API auth | Using Anthropic without OAuth token | Fallback after `ANTHROPIC_OAUTH_TOKEN` |\n| `ANTHROPIC_FOUNDRY_API_KEY` | Anthropic via Azure Foundry / enterprise gateway | `CLAUDE_CODE_USE_FOUNDRY` enabled | Takes precedence over `ANTHROPIC_OAUTH_TOKEN` and `ANTHROPIC_API_KEY` when Foundry mode is enabled |\n| `OPENAI_API_KEY` | OpenAI auth | Using OpenAI-family providers without explicit apiKey argument | Used by OpenAI Completions/Responses providers |\n| `GEMINI_API_KEY` | Google Gemini auth | Using `google` provider models | Primary key for Gemini provider mapping |\n| `GOOGLE_API_KEY` | Gemini image tool auth fallback | Using `gemini_image` tool without `GEMINI_API_KEY` | Used by coding-agent image tool fallback path |\n| `GROQ_API_KEY` | Groq auth | Using Groq models | |\n| `CEREBRAS_API_KEY` | Cerebras auth | Using Cerebras models | |\n| `FIREWORKS_API_KEY` | Fireworks auth | Using Fireworks models | |\n| `TOGETHER_API_KEY` | Together auth | Using `together` provider | |\n| `HUGGINGFACE_HUB_TOKEN` | Hugging Face auth | Using `huggingface` provider | Primary Hugging Face token env var |\n| `HF_TOKEN` | Hugging Face auth | Using `huggingface` provider | Fallback when `HUGGINGFACE_HUB_TOKEN` is unset |\n| `SYNTHETIC_API_KEY` | Synthetic auth | Using Synthetic models | |\n| `NVIDIA_API_KEY` | NVIDIA auth | Using `nvidia` provider | |\n| `NANO_GPT_API_KEY` | NanoGPT auth | Using `nanogpt` provider | |\n| `VENICE_API_KEY` | Venice auth | Using `venice` provider | |\n| `LITELLM_API_KEY` | LiteLLM auth | Using `litellm` provider | OpenAI-compatible LiteLLM proxy key |\n| `LM_STUDIO_API_KEY` | LM Studio auth (optional) | Using `lm-studio` provider with authenticated hosts | Local LM Studio usually runs without auth; any non-empty token works when a key is required |\n| `OLLAMA_API_KEY` | Ollama auth (optional) | Using `ollama` provider with authenticated hosts | Local Ollama usually runs without auth; any non-empty token works when a key is required |\n| `LLAMA_CPP_API_KEY` | llama.cpp auth (optional) | Using `llama.cpp` provider with authenticated hosts | Local llama.cpp usually runs without auth; any non-empty token works when a key is configured |\n| `XIAOMI_API_KEY` | Xiaomi MiMo auth | Using `xiaomi` provider | |\n| `MOONSHOT_API_KEY` | Moonshot auth | Using `moonshot` provider | |\n| `XAI_API_KEY` | xAI auth | Using xAI models | |\n| `OPENROUTER_API_KEY` | OpenRouter auth | Using OpenRouter models | Also used by image tool when preferred/auto provider is OpenRouter |\n| `MISTRAL_API_KEY` | Mistral auth | Using Mistral models | |\n| `ZAI_API_KEY` | z.ai auth | Using z.ai models | Also used by z.ai web search provider |\n| `MINIMAX_API_KEY` | MiniMax auth | Using `minimax` provider | |\n| `MINIMAX_CODE_API_KEY` | MiniMax Code auth | Using `minimax-code` provider | |\n| `MINIMAX_CODE_CN_API_KEY` | MiniMax Code CN auth | Using `minimax-code-cn` provider | |\n| `OPENCODE_API_KEY` | OpenCode auth | Using `opencode-go` / `opencode-zen` models | |\n| `QIANFAN_API_KEY` | Qianfan auth | Using `qianfan` provider | |\n| `QWEN_OAUTH_TOKEN` | Qwen Portal auth | Using `qwen-portal` with OAuth token | Takes precedence over `QWEN_PORTAL_API_KEY` |\n| `QWEN_PORTAL_API_KEY` | Qwen Portal auth | Using `qwen-portal` with API key | Fallback after `QWEN_OAUTH_TOKEN` |\n| `ZENMUX_API_KEY` | ZenMux auth | Using `zenmux` provider | Used for ZenMux OpenAI and Anthropic-compatible routes |\n| `VLLM_API_KEY` | vLLM auth/discovery opt-in | Using `vllm` provider (local OpenAI-compatible servers) | Any non-empty value works for no-auth local servers |\n| `CURSOR_ACCESS_TOKEN` | Cursor provider auth | Using Cursor provider | |\n| `AI_GATEWAY_API_KEY` | Vercel AI Gateway auth | Using `vercel-ai-gateway` provider | |\n| `CLOUDFLARE_AI_GATEWAY_API_KEY` | Cloudflare AI Gateway auth | Using `cloudflare-ai-gateway` provider | Base URL must be configured as `https://gateway.ai.cloudflare.com/v1/<account>/<gateway>/anthropic` |\n| `ALIBABA_CODING_PLAN_API_KEY` | Alibaba Coding Plan auth | Using `alibaba-coding-plan` provider | |\n| `DEEPSEEK_API_KEY` | DeepSeek auth | Using DeepSeek models | |\n| `KILO_API_KEY` | Kilo auth | Using Kilo models | |\n| `OLLAMA_CLOUD_API_KEY` | Ollama Cloud auth | Using `ollama-cloud` provider | |\n| `GITLAB_TOKEN` | GitLab Duo auth | Using `gitlab-duo` provider | |\n\n### GitHub/Copilot token chains\n\n| Variable | Used for | Chain |\n| ---------------------- | ------------------------------------------------ | ---------------------------------------------------- |\n| `COPILOT_GITHUB_TOKEN` | GitHub Copilot provider auth | `COPILOT_GITHUB_TOKEN` → `GH_TOKEN` → `GITHUB_TOKEN` |\n| `GH_TOKEN` | Copilot fallback; GitHub API auth in web scraper | In web scraper: `GITHUB_TOKEN` → `GH_TOKEN` |\n| `GITHUB_TOKEN` | Copilot fallback; GitHub API auth in web scraper | In web scraper: checked before `GH_TOKEN` |\n\n---\n\n## 2) Provider-specific runtime configuration\n\n### Anthropic Foundry Gateway (Azure / enterprise proxy)\n\nWhen `CLAUDE_CODE_USE_FOUNDRY` is enabled, Anthropic requests switch to Foundry mode:\n\n- Base URL resolves from `FOUNDRY_BASE_URL` (fallback remains model/default base URL if unset).\n- API key resolution for provider `anthropic` becomes:\n `ANTHROPIC_FOUNDRY_API_KEY` → `ANTHROPIC_OAUTH_TOKEN` → `ANTHROPIC_API_KEY`.\n- `ANTHROPIC_CUSTOM_HEADERS` is parsed as comma/newline-separated `key: value` pairs and merged into request headers.\n- TLS client/server material can be injected from env values:\n `NODE_EXTRA_CA_CERTS`, `CLAUDE_CODE_CLIENT_CERT`, `CLAUDE_CODE_CLIENT_KEY`.\n Each accepts either:\n - a filesystem path to PEM content, or\n - inline PEM (including escaped `\\n` sequences).\n\n| Variable | Value type | Behavior |\n| --------------------------- | ---------------------------------------------- | ----------------------------------------------------------------------------- |\n| `CLAUDE_CODE_USE_FOUNDRY` | Boolean-like string (`1`, `true`, `yes`, `on`) | Enables Foundry mode for Anthropic provider |\n| `FOUNDRY_BASE_URL` | URL string | Anthropic endpoint base URL in Foundry mode |\n| `ANTHROPIC_FOUNDRY_API_KEY` | Token string | Used for `Authorization: Bearer <token>` |\n| `ANTHROPIC_CUSTOM_HEADERS` | Header list string | Extra headers; format `header-a: value, header-b: value` or newline-separated |\n| `NODE_EXTRA_CA_CERTS` | PEM path or inline PEM | Extra CA chain for server certificate validation |\n| `CLAUDE_CODE_CLIENT_CERT` | PEM path or inline PEM | mTLS client certificate |\n| `CLAUDE_CODE_CLIENT_KEY` | PEM path or inline PEM | mTLS client private key (must be paired with cert) |\n\n### Amazon Bedrock\n\n| Variable | Default / behavior |\n| ------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------- |\n| `AWS_REGION` | Primary region source |\n| `AWS_DEFAULT_REGION` | Fallback if `AWS_REGION` unset |\n| `AWS_PROFILE` | Enables named profile auth path |\n| `AWS_ACCESS_KEY_ID` + `AWS_SECRET_ACCESS_KEY` | Enables IAM key auth path |\n| `AWS_BEARER_TOKEN_BEDROCK` | Enables bearer token auth path |\n| `AWS_CONTAINER_CREDENTIALS_RELATIVE_URI` / `AWS_CONTAINER_CREDENTIALS_FULL_URI` | Enables ECS task credential path |\n| `AWS_WEB_IDENTITY_TOKEN_FILE` + `AWS_ROLE_ARN` | Enables web identity auth path |\n| `AWS_BEDROCK_SKIP_AUTH` | If `1`, injects dummy credentials (proxy/non-auth scenarios) |\n| `AWS_BEDROCK_FORCE_HTTP1` | If `1`, forces Node HTTP/1 request handler |\n| `HTTPS_PROXY` / `HTTP_PROXY` / `ALL_PROXY` | Routes Bedrock runtime and AWS SSO credential calls through the configured proxy using HTTP/1 |\n| `NO_PROXY` | Excludes matching hosts from proxy routing when a proxy variable is configured |\n\nRegion fallback in provider code: `options.region` → `AWS_REGION` → `AWS_DEFAULT_REGION` → `us-east-1`.\n\n### Azure OpenAI Responses\n\n| Variable | Default / behavior |\n| ---------------------------------- | --------------------------------------------------------------------------- |\n| `AZURE_OPENAI_API_KEY` | Required unless API key passed as option |\n| `AZURE_OPENAI_API_VERSION` | Default `v1` |\n| `AZURE_OPENAI_BASE_URL` | Direct base URL override |\n| `AZURE_OPENAI_RESOURCE_NAME` | Used to construct base URL: `https://<resource>.openai.azure.com/openai/v1` |\n| `AZURE_OPENAI_DEPLOYMENT_NAME_MAP` | Optional mapping string: `modelId=deploymentName,model2=deployment2` |\n\nBase URL resolution: option `azureBaseUrl` → env `AZURE_OPENAI_BASE_URL` → option/env resource name → `model.baseUrl`.\n\n### Google Vertex AI\n\n| Variable | Required? | Notes |\n| -------------------------------- | ------------------------------ | ------------------------------------------------------------------------------------------------------------------------- |\n| `GOOGLE_CLOUD_PROJECT` | Yes (unless passed in options) | Fallback: `GCLOUD_PROJECT` |\n| `GCLOUD_PROJECT` | Fallback | Used as alternate project ID source |\n| `GOOGLE_CLOUD_PROJECT_ID` | OAuth login helper only | Used by Gemini CLI OAuth project discovery |\n| `GOOGLE_CLOUD_LOCATION` | Yes (unless passed in options) | No default in provider |\n| `GOOGLE_CLOUD_API_KEY` | Conditional | Direct Vertex API-key auth; otherwise ADC fallback can authenticate when project and location are set |\n| `GOOGLE_APPLICATION_CREDENTIALS` | Conditional | If set, file must exist; otherwise ADC fallback path is checked (`~/.config/gcloud/application_default_credentials.json`) |\n\n### Kimi\n\n| Variable | Default / behavior |\n| ---------------------- | -------------------------------------------------------- |\n| `KIMI_CODE_OAUTH_HOST` | Primary OAuth host override |\n| `KIMI_OAUTH_HOST` | Fallback OAuth host override |\n| `KIMI_CODE_BASE_URL` | Overrides Kimi usage endpoint base URL (`usage/kimi.ts`) |\n\nOAuth host chain: `KIMI_CODE_OAUTH_HOST` → `KIMI_OAUTH_HOST` → `https://auth.kimi.com`.\n\n### Gemini CLI compatibility\n\n| Variable | Default / behavior |\n| -------------------------- | --------------------------------------------------------------- |\n| `PI_AI_GEMINI_CLI_VERSION` | Overrides Gemini CLI user-agent version tag (`0.35.3` if unset) |\n\n### OpenAI Codex responses (feature/debug controls)\n\n| Variable | Behavior |\n| ------------------------------------ | ---------------------------------------------------- |\n| `PI_CODEX_DEBUG` | `1`/`true` enables Codex provider debug logging |\n| `PI_CODEX_WEBSOCKET` | `1`/`true` enables websocket transport preference |\n| `PI_CODEX_WEBSOCKET_V2` | `1`/`true` enables websocket v2 path |\n| `PI_CODEX_WEBSOCKET_IDLE_TIMEOUT_MS` | Positive integer override (default 300000) |\n| `PI_CODEX_WEBSOCKET_RETRY_BUDGET` | Non-negative integer override (default 5) |\n| `PI_CODEX_WEBSOCKET_RETRY_DELAY_MS` | Positive integer base backoff override (default 500) |\n| `PI_OPENAI_STREAM_IDLE_TIMEOUT_MS` | Positive integer OpenAI stream idle timeout override |\n\n### Cursor provider debug\n\n| Variable | Behavior |\n| ------------------ | ------------------------------------------------------------------------ |\n| `DEBUG_CURSOR` | Enables provider debug logs; `2`/`verbose` for detailed payload snippets |\n| `DEBUG_CURSOR_LOG` | Optional file path for JSONL debug log output |\n\n### Prompt cache compatibility switch\n\n| Variable | Behavior |\n| -------------------- | ----------------------------------------------------------------------------------------------------------------- |\n| `PI_CACHE_RETENTION` | If `long`, enables long retention where supported (`anthropic`, `openai-responses`, Bedrock retention resolution) |\n\n---\n\n## 3) Web search subsystem\n\n### Search provider credentials\n\n| Variable | Used by |\n| --------------------------------------------------- | ------------------------------------------------------------- |\n| `EXA_API_KEY` | Exa search provider and Exa MCP tools |\n| `BRAVE_API_KEY` | Brave search provider |\n| `PERPLEXITY_API_KEY` | Perplexity search provider API-key mode |\n| `PERPLEXITY_COOKIES` | Perplexity cookie-auth search mode |\n| `TAVILY_API_KEY` | Tavily search provider |\n| `ZAI_API_KEY` | z.ai search provider (also checks stored OAuth in `agent.db`) |\n| `OPENAI_API_KEY` / Codex OAuth in DB | Codex search provider availability/auth |\n| `PI_CODEX_WEB_SEARCH_MODEL` | Codex search provider model override |\n| `MOONSHOT_SEARCH_API_KEY` / `KIMI_SEARCH_API_KEY` | Kimi/Moonshot search provider env auth |\n| `MOONSHOT_SEARCH_BASE_URL` / `KIMI_SEARCH_BASE_URL` | Kimi/Moonshot search endpoint override |\n| `KAGI_API_KEY` | Kagi search provider |\n| `JINA_API_KEY` | Jina search provider |\n| `PARALLEL_API_KEY` | Parallel search provider |\n| `SEARXNG_ENDPOINT`, `SEARXNG_TOKEN` | SearXNG endpoint and optional bearer token |\n| `SEARXNG_BASIC_USERNAME`, `SEARXNG_BASIC_PASSWORD` | SearXNG HTTP Basic Auth credentials |\n\nSearXNG also reads the equivalent `searxng.endpoint`, `searxng.token`, `searxng.basicUsername`, and `searxng.basicPassword` settings from `~/.omp/agent/config.yml`; environment variables are fallbacks.\n\n### Anthropic web search auth chain\n\nAnthropic web search uses `findAnthropicAuth()` from `packages/ai/src/utils/anthropic-auth.ts` in this order:\n\n1. `ANTHROPIC_SEARCH_API_KEY` (+ optional `ANTHROPIC_SEARCH_BASE_URL`)\n2. `ANTHROPIC_FOUNDRY_API_KEY` when `CLAUDE_CODE_USE_FOUNDRY` is enabled\n3. Anthropic OAuth credentials from `agent.db` (must not expire within 5-minute buffer)\n4. Anthropic API-key credentials from `agent.db`\n5. Generic Anthropic env fallback: provider key (`ANTHROPIC_FOUNDRY_API_KEY` in Foundry mode, otherwise `ANTHROPIC_OAUTH_TOKEN`/`ANTHROPIC_API_KEY`) + optional `ANTHROPIC_BASE_URL` (`FOUNDRY_BASE_URL` when Foundry mode is enabled)\n\nRelated vars:\n\n| Variable | Default / behavior |\n| --------------------------- | ---------------------------------------------------- |\n| `ANTHROPIC_SEARCH_API_KEY` | Highest-priority explicit search key |\n| `ANTHROPIC_SEARCH_BASE_URL` | Defaults to `https://api.anthropic.com` when omitted |\n| `ANTHROPIC_SEARCH_MODEL` | Defaults to `claude-haiku-4-5` |\n| `ANTHROPIC_BASE_URL` | Generic fallback base URL for tier-4 auth path |\n\n### Perplexity OAuth flow behavior flag\n\n| Variable | Behavior |\n| ------------------- | ------------------------------------------------------------------------------- |\n| `PI_AUTH_NO_BORROW` | If set, disables macOS native-app token borrowing path in Perplexity login flow |\n\n---\n\n## 4) Python tooling and kernel runtime\n\n| Variable | Default / behavior |\n| ------------------------- | ------------------------------------------------------------------------------------------------------------------- |\n| `PI_PY` | Eval backend override: `0`/`bash`=JavaScript only, `1`/`py`=Python only, `mix`/`both`=both; invalid values ignored |\n| `PI_PYTHON_SKIP_CHECK` | If `1`, skips Python interpreter availability checks (subprocess runner still starts on demand) |\n| `PI_PYTHON_INTEGRATION` | If `1`, opts gated integration tests in (e.g. `python-runner.integration.test.ts`) into running against real Python |\n| `PI_PYTHON_IPC_TRACE` | If `1`, logs NDJSON frames exchanged with the Python runner subprocess |\n| `VIRTUAL_ENV` | Highest-priority venv path for Python runtime resolution |\n\nExtra conditional behavior:\n\n- If `BUN_ENV=test` or `NODE_ENV=test`, Python availability checks are treated as OK and warming is skipped.\n- Python env filtering denies common API keys and allows safe base vars + `LC_`, `XDG_`, `PI_` prefixes.\n\n---\n\n## 5) Agent/runtime behavior toggles\n\n| Variable | Default / behavior |\n| ---------------------------- | -------------------------------------------------------------------------------------------------- |\n| `PI_SMOL_MODEL` | Ephemeral model-role override for `smol` (CLI `--smol` takes precedence) |\n| `PI_SLOW_MODEL` | Ephemeral model-role override for `slow` (CLI `--slow` takes precedence) |\n| `PI_PLAN_MODEL` | Ephemeral model-role override for `plan` (CLI `--plan` takes precedence) |\n| `PI_NO_TITLE` | If set (any non-empty value), disables auto session title generation on first user message |\n| `NULL_PROMPT` | If `true`, system prompt builder returns empty string |\n| `PI_BLOCKED_AGENT` | Blocks a specific subagent type in task tool |\n| `PI_SUBPROCESS_CMD` | Overrides subagent spawn command (`omp` / `omp.cmd` resolution bypass) |\n| `PI_TASK_MAX_OUTPUT_BYTES` | Max captured output bytes per subagent (default `500000`) |\n| `PI_TASK_MAX_OUTPUT_LINES` | Max captured output lines per subagent (default `5000`) |\n| `PI_TIMING` | If `1`, enables startup/tool timing instrumentation logs |\n| `PI_DEBUG_STARTUP` | Enables startup stage debug prints to stderr in multiple startup paths |\n| `PI_PACKAGE_DIR` | Overrides package asset base dir resolution (docs/examples/changelog path lookup) |\n| `PI_DISABLE_LSPMUX` | If `1`, disables lspmux detection/integration and forces direct LSP server spawning |\n| `PI_RPC_EMIT_TITLE` | Boolean-like flag enabling title events in RPC mode |\n| `SMITHERY_URL` | Smithery web URL override (default `https://smithery.ai`) |\n| `SMITHERY_API_URL` | Smithery API base URL override (default `https://api.smithery.ai`) |\n| `PUPPETEER_EXECUTABLE_PATH` | Browser tool Chromium executable override |\n| `LM_STUDIO_BASE_URL` | Default implicit LM Studio discovery base URL override (`http://127.0.0.1:1234/v1` if unset) |\n| `OLLAMA_BASE_URL` | Default implicit Ollama discovery base URL override (`http://127.0.0.1:11434` if unset) |\n| `LLAMA_CPP_BASE_URL` | Default implicit Llama.cpp discovery base URL override (`http://127.0.0.1:8080` if unset) |\n| `PI_EDIT_VARIANT` | Forces edit tool variant when valid (`patch`, `replace`, `hashline`, `atom`, `vim`, `apply_patch`) |\n| `PI_FORCE_IMAGE_PROTOCOL` | Forces supported image protocol (`kitty`, `iterm2`/`iterm`, `sixel`, `none`) where used |\n| `PI_ALLOW_SIXEL_PASSTHROUGH` | Allows SIXEL passthrough when `PI_FORCE_IMAGE_PROTOCOL=sixel` |\n| `PI_NO_PTY` | If `1`, disables interactive PTY path for bash tool |\n\n`PI_NO_PTY` is also set internally when CLI `--no-pty` is used.\n\n---\n\n## 6) Storage and config root paths\n\nThese are consumed via `@oh-my-pi/pi-utils/dirs` and affect where coding-agent stores data.\n\n| Variable | Default / behavior |\n| --------------------- | ----------------------------------------------------------------------------- |\n| `PI_CONFIG_DIR` | Config root dirname under home (default `.omp`) |\n| `PI_CODING_AGENT_DIR` | Full override for agent directory (default `~/<PI_CONFIG_DIR or .omp>/agent`) |\n| `PWD` | Used when matching canonical current working directory in path helpers |\n\n---\n\n## 7) Shell/tool execution environment\n\n(From `packages/utils/src/procmgr.ts` and coding-agent bash tool integration.)\n\n| Variable | Behavior |\n| -------------------------- | ------------------------------------------------------------------------------ |\n| `PI_BASH_NO_CI` | Suppresses automatic `CI=true` injection into spawned shell env |\n| `CLAUDE_BASH_NO_CI` | Legacy alias fallback for `PI_BASH_NO_CI` |\n| `PI_BASH_NO_LOGIN` | Disables login-shell mode; shell args become `['-c']` instead of `['-l','-c']` |\n| `CLAUDE_BASH_NO_LOGIN` | Legacy alias fallback for `PI_BASH_NO_LOGIN` |\n| `PI_SHELL_PREFIX` | Optional command prefix wrapper |\n| `CLAUDE_CODE_SHELL_PREFIX` | Legacy alias fallback for `PI_SHELL_PREFIX` |\n| `VISUAL` | Preferred external editor command |\n| `EDITOR` | Fallback external editor command |\n\nCurrent implementation: `PI_BASH_NO_LOGIN`/`CLAUDE_BASH_NO_LOGIN` are active; when either is set, `getShellArgs()` returns `['-c']`.\n\n---\n\n## 8) UI/theme/session detection (auto-detected env)\n\nThese are read as runtime signals; they are usually set by the terminal/OS rather than manually configured.\n\n| Variable | Used for |\n| ------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------- |\n| `COLORTERM`, `TERM`, `WT_SESSION` | Color capability detection (theme color mode) |\n| `COLORFGBG` | Terminal background light/dark auto-detection |\n| `TERM_PROGRAM`, `TERM_PROGRAM_VERSION`, `TERMINAL_EMULATOR` | Terminal identity in system prompt/context |\n| `KDE_FULL_SESSION`, `XDG_CURRENT_DESKTOP`, `DESKTOP_SESSION`, `XDG_SESSION_DESKTOP`, `GDMSESSION`, `WINDOWMANAGER` | Desktop/window-manager detection in system prompt/context |\n| `KITTY_WINDOW_ID`, `TMUX_PANE`, `TERM_SESSION_ID`, `WT_SESSION` | Stable per-terminal session breadcrumb IDs |\n| `SHELL`, `ComSpec`, `TERM_PROGRAM`, `TERM` | System info diagnostics |\n| `APPDATA`, `XDG_CONFIG_HOME` | lspmux config path resolution |\n| `HOME` | Path shortening in MCP command UI |\n\n---\n\n## 9) TUI runtime flags (shared package, affects coding-agent UX)\n\n| Variable | Behavior |\n| ------------------------- | ------------------------------------------------------------------------------------- |\n| `PI_NOTIFICATIONS` | `off` / `0` / `false` suppress desktop notifications |\n| `PI_TUI_WRITE_LOG` | If set, logs TUI writes to file |\n| `PI_HARDWARE_CURSOR` | If `1`, enables hardware cursor mode |\n| `PI_CLEAR_ON_SHRINK` | If `1`, clears empty rows when content shrinks |\n| `PI_DEBUG_REDRAW` | If `1`, enables redraw debug logging |\n| `PI_TUI_DEBUG` | If `1`, enables deep TUI debug dump path |\n| `PI_FORCE_IMAGE_PROTOCOL` | Forces terminal image protocol detection (`kitty`, `iterm2`/`iterm`, `sixel`, `none`) |\n\n---\n\n## 10) Commit generation controls\n\n| Variable | Behavior |\n| ------------------------- | ------------------------------------------------------------------- |\n| `PI_COMMIT_TEST_FALLBACK` | If `true` (case-insensitive), force commit fallback generation path |\n| `PI_COMMIT_NO_FALLBACK` | If `true`, disables fallback when agent returns no proposal |\n| `PI_COMMIT_MAP_REDUCE` | If `false`, disables map-reduce commit analysis path |\n| `DEBUG` | If set, commit agent error stack traces are printed |\n\n---\n\n## Security-sensitive variables\n\nTreat these as secrets; do not log or commit them:\n\n- Provider/API keys and OAuth/bearer credentials (all `*_API_KEY`, `*_TOKEN`, OAuth access/refresh tokens)\n- Cloud credentials (`AWS_*`, `GOOGLE_APPLICATION_CREDENTIALS` path may expose service-account material)\n- Search/provider auth vars (`EXA_API_KEY`, `BRAVE_API_KEY`, `PERPLEXITY_API_KEY`, Anthropic search keys)\n- Foundry mTLS material (`CLAUDE_CODE_CLIENT_CERT`, `CLAUDE_CODE_CLIENT_KEY`, `NODE_EXTRA_CA_CERTS` when it points to private CA bundles)\n\nPython runtime also explicitly strips many common key vars before spawning kernel subprocesses (`packages/coding-agent/src/eval/py/runtime.ts`).\n",
|
|
11
|
+
"custom-tools.md": "# Custom Tools\n\nCustom tools are model-callable functions that plug into the same tool execution pipeline as built-in tools.\n\nA custom tool is a TypeScript/JavaScript module that exports a factory. The factory receives a host API (`CustomToolAPI`) and returns one tool or an array of tools.\n\n## What this is (and is not)\n\n- **Custom tool**: callable by the model during a turn (`execute` + Zod parameter schema; legacy TypeBox is still accepted and lifted to Zod at registration).\n- **Extension**: lifecycle/event framework that can register tools and intercept/modify events.\n- **Hook**: external pre/post command scripts.\n- **Skill**: static guidance/context package, not executable tool code.\n\nIf you need the model to call code directly, use a custom tool.\n\n## Integration paths in current code\n\nThere are two active integration styles:\n\n1. **SDK-provided custom tools** (`options.customTools`)\n - Wrapped into agent tools via `CustomToolAdapter` or extension wrappers.\n - Always included in the initial active tool set in SDK bootstrap.\n\n2. **Filesystem-discovered modules via loader API** (`discoverAndLoadCustomTools` / `loadCustomTools`)\n - Exposed as library APIs in `src/extensibility/custom-tools/loader.ts`.\n - Host code can call these to discover and load tool modules from config/provider/plugin paths.\n\n```text\nModel tool call flow\n\nLLM tool call\n │\n ▼\nTool registry (built-ins + custom tool adapters)\n │\n ▼\nCustomTool.execute(toolCallId, params, onUpdate, ctx, signal)\n │\n ├─ onUpdate(...) -> streamed partial result\n └─ return result -> final tool content/details\n```\n\n## Discovery locations (loader API)\n\n`discoverAndLoadCustomTools(configuredPaths, cwd, builtInToolNames)` merges:\n\n1. Capability providers (`toolCapability`), including:\n - Native OMP config (`~/.omp/agent/tools`, `.omp/tools`)\n - Claude config (`~/.claude/tools`, `.claude/tools`)\n - Codex config (`~/.codex/tools`, `.codex/tools`)\n - Claude marketplace plugin cache provider\n2. Installed plugin manifests (`~/.omp/plugins/node_modules/*` via plugin loader)\n3. Explicit configured paths passed to the loader\n\n### Important behavior\n\n- Duplicate resolved paths are deduplicated.\n- Tool name conflicts are rejected against built-ins and already-loaded custom tools.\n- `.md` and `.json` files are discovered as tool metadata by some providers, but the executable module loader rejects them as runnable tools.\n- Relative configured paths are resolved from `cwd`; `~` is expanded.\n\n## Module contract\n\nA custom tool module must export a function (default export preferred):\n\n```ts\nimport type { CustomToolFactory } from \"@oh-my-pi/pi-coding-agent\";\n\nconst factory: CustomToolFactory = (pi) => ({\n\tname: \"repo_stats\",\n\tlabel: \"Repo Stats\",\n\tdescription: \"Counts tracked TypeScript files\",\n\tparameters: pi.zod.object({\n\t\tglob: pi.zod.string().optional().default(\"**/*.ts\"),\n\t}),\n\n\tasync execute(toolCallId, params, onUpdate, ctx, signal) {\n\t\tonUpdate?.({\n\t\t\tcontent: [{ type: \"text\", text: \"Scanning files...\" }],\n\t\t\tdetails: { phase: \"scan\" },\n\t\t});\n\n\t\tconst result = await pi.exec(\"git\", [\"ls-files\", params.glob ?? \"**/*.ts\"], { signal, cwd: pi.cwd });\n\t\tif (result.killed) {\n\t\t\tthrow new Error(\"Scan was cancelled\");\n\t\t}\n\t\tif (result.code !== 0) {\n\t\t\tthrow new Error(result.stderr || \"git ls-files failed\");\n\t\t}\n\n\t\tconst files = result.stdout.split(\"\\n\").filter(Boolean);\n\t\treturn {\n\t\t\tcontent: [{ type: \"text\", text: `Found ${files.length} files` }],\n\t\t\tdetails: { count: files.length, sample: files.slice(0, 10) },\n\t\t};\n\t},\n\n\tonSession(event) {\n\t\tif (event.reason === \"shutdown\") {\n\t\t\t// cleanup resources if needed\n\t\t}\n\t},\n});\n\nexport default factory;\n```\n\nLegacy TypeBox-authored factories can still call `pi.typebox` — it's now a small Zod-backed shim (`Type.Object`, `Type.String`, etc.) baked into the host, not the real `@sinclair/typebox` package. Schemas flow through the same Zod pipeline as `pi.zod` and need no separate normalization.\n\nFactory return type:\n\n- `CustomTool`\n- `CustomTool[]`\n- `Promise<CustomTool | CustomTool[]>`\n\n## API surface passed to factories (`CustomToolAPI`)\n\nFrom `types.ts` and `loader.ts`:\n\n- `cwd`: host working directory\n- `exec(command, args, options?)`: process execution helper\n- `ui`: UI context (can be no-op in headless modes)\n- `hasUI`: `false` in non-interactive flows\n- `logger`: shared file logger\n- `zod`: injected `zod` module (**preferred** for new tool schemas; use `pi.zod.object`, `pi.zod.string`, …)\n- `typebox`: injected zod-backed `Type.*` shim (legacy extension compatibility)\n- `pi`: injected `@oh-my-pi/pi-coding-agent` exports\n- `pushPendingAction(action)`: register a preview action for hidden `resolve` tool (`docs/resolve-tool-runtime.md`)\n\nLoader starts with a no-op UI context and requires host code to call `setUIContext(...)` when real UI is ready.\n\n## Execution contract and typing\n\n`CustomTool.execute` signature:\n\n```ts\nexecute(toolCallId, params, onUpdate, ctx, signal);\n```\n\n- `params` is statically typed from your Zod schema via `z.infer<typeof schema>` (`Static<TParams>` in API types). Legacy TypeBox schemas are lifted to Zod internally.\n- Runtime argument validation happens before execution in the agent loop.\n- `onUpdate` emits partial results for UI streaming.\n- `ctx` includes session/model state and an `abort()` helper.\n- `signal` carries cancellation.\n\n`CustomToolAdapter` bridges this to the agent tool interface and forwards calls in the correct argument order.\n\n## How tools are exposed to the model\n\n- Tools are wrapped into `AgentTool` instances (`CustomToolAdapter` or extension wrappers).\n- They are inserted into the session tool registry by name.\n- In SDK bootstrap, custom and extension-registered tools are force-included in the initial active set.\n- CLI `--tools` currently validates only built-in tool names; custom tool inclusion is handled through discovery/registration paths and SDK options.\n\n## Rendering hooks\n\nOptional rendering hooks:\n\n- `renderCall(args, options, theme)`\n- `renderResult(result, options, theme, args?)`\n\nRuntime behavior in TUI:\n\n- If hooks exist, tool output is rendered inside a `Box` container.\n- `renderResult` receives `{ expanded, isPartial, spinnerFrame? }`.\n- Renderer errors are caught and logged; UI falls back to default text rendering.\n\n## Session/state handling\n\nOptional `onSession(event, ctx)` receives session lifecycle events, including:\n\n- `start`, `switch`, `branch`, `tree`, `shutdown`\n- `auto_compaction_start`, `auto_compaction_end`\n- `auto_retry_start`, `auto_retry_end`\n- `ttsr_triggered`, `todo_reminder`\n\nUse `ctx.sessionManager` to reconstruct state from history when branch/session context changes.\n\n## Failures and cancellation semantics\n\n### Synchronous/async failures\n\n- Throwing (or rejected promises) in `execute` is treated as tool failure.\n- Agent runtime converts failures into tool result messages with `isError: true` and error text content.\n- With extension wrappers, `tool_result` handlers can further rewrite content/details and even override error status.\n\n### Cancellation\n\n- Agent abort propagates through `AbortSignal` to `execute`.\n- Forward `signal` to subprocess work (`pi.exec(..., { signal })`) for cooperative cancellation.\n- `ctx.abort()` lets a tool request abort of the current agent operation.\n\n### onSession errors\n\n- `onSession` errors are caught and logged as warnings; they do not crash the session.\n\n## Real constraints to design for\n\n- Tool names must be globally unique in the active registry.\n- Prefer deterministic, schema-shaped outputs in `details` for renderer/state reconstruction.\n- Guard UI usage with `pi.hasUI`.\n- Treat `.md`/`.json` in tool directories as metadata, not executable modules.\n",
|
|
12
|
+
"environment-variables.md": "# Environment Variables (Current Runtime Reference)\n\nThis reference is derived from current code paths in:\n\n- `packages/coding-agent/src/**`\n- `packages/ai/src/**` (provider/auth resolution used by coding-agent)\n- `packages/utils/src/**` and `packages/tui/src/**` where those vars directly affect coding-agent runtime\n\nIt documents only active behavior.\n\n## Resolution model and precedence\n\nMost runtime lookups use `$env` from `@oh-my-pi/pi-utils` (`packages/utils/src/env.ts`).\n\n`$env` loading order:\n\n1. Existing process environment (`Bun.env`)\n2. Project `.env` (`$PWD/.env`) for keys not already set\n3. Agent `.env` (`~/.omp/agent/.env`, respecting `PI_CONFIG_DIR` / `PI_CODING_AGENT_DIR`) for keys not already set\n4. Config-root `.env` (`~/.omp/.env`, respecting `PI_CONFIG_DIR`) for keys not already set\n5. Home `.env` (`~/.env`) for keys not already set\n\nAdditional rule inside each `.env` file: `OMP_*` keys are mirrored to `PI_*` keys in that parsed file.\n\n---\n\n## 1) Model/provider authentication\n\nThese are consumed via `getEnvApiKey()` (`packages/ai/src/stream.ts`) unless noted otherwise.\n\n### Core provider credentials\n\n| Variable | Used for | Required when | Notes / precedence |\n| ------------------------------- | ------------------------------------------------ | -------------------------------------------------------------- | --------------------------------------------------------------------------------------------------- |\n| `ANTHROPIC_OAUTH_TOKEN` | Anthropic API auth | Using Anthropic with OAuth token auth | Takes precedence over `ANTHROPIC_API_KEY` for provider auth resolution |\n| `ANTHROPIC_API_KEY` | Anthropic API auth | Using Anthropic without OAuth token | Fallback after `ANTHROPIC_OAUTH_TOKEN` |\n| `ANTHROPIC_FOUNDRY_API_KEY` | Anthropic via Azure Foundry / enterprise gateway | `CLAUDE_CODE_USE_FOUNDRY` enabled | Takes precedence over `ANTHROPIC_OAUTH_TOKEN` and `ANTHROPIC_API_KEY` when Foundry mode is enabled |\n| `OPENAI_API_KEY` | OpenAI auth | Using OpenAI-family providers without explicit apiKey argument | Used by OpenAI Completions/Responses providers |\n| `GEMINI_API_KEY` | Google Gemini auth | Using `google` provider models | Primary key for Gemini provider mapping |\n| `GOOGLE_API_KEY` | Gemini image tool auth fallback | Using `gemini_image` tool without `GEMINI_API_KEY` | Used by coding-agent image tool fallback path |\n| `GROQ_API_KEY` | Groq auth | Using Groq models | |\n| `CEREBRAS_API_KEY` | Cerebras auth | Using Cerebras models | |\n| `FIREWORKS_API_KEY` | Fireworks auth | Using Fireworks models | |\n| `TOGETHER_API_KEY` | Together auth | Using `together` provider | |\n| `HUGGINGFACE_HUB_TOKEN` | Hugging Face auth | Using `huggingface` provider | Primary Hugging Face token env var |\n| `HF_TOKEN` | Hugging Face auth | Using `huggingface` provider | Fallback when `HUGGINGFACE_HUB_TOKEN` is unset |\n| `SYNTHETIC_API_KEY` | Synthetic auth | Using Synthetic models | |\n| `NVIDIA_API_KEY` | NVIDIA auth | Using `nvidia` provider | |\n| `NANO_GPT_API_KEY` | NanoGPT auth | Using `nanogpt` provider | |\n| `VENICE_API_KEY` | Venice auth | Using `venice` provider | |\n| `LITELLM_API_KEY` | LiteLLM auth | Using `litellm` provider | OpenAI-compatible LiteLLM proxy key |\n| `LM_STUDIO_API_KEY` | LM Studio auth (optional) | Using `lm-studio` provider with authenticated hosts | Local LM Studio usually runs without auth; any non-empty token works when a key is required |\n| `OLLAMA_API_KEY` | Ollama auth (optional) | Using `ollama` provider with authenticated hosts | Local Ollama usually runs without auth; any non-empty token works when a key is required |\n| `LLAMA_CPP_API_KEY` | llama.cpp auth (optional) | Using `llama.cpp` provider with authenticated hosts | Local llama.cpp usually runs without auth; any non-empty token works when a key is configured |\n| `XIAOMI_API_KEY` | Xiaomi MiMo auth | Using `xiaomi` provider | |\n| `MOONSHOT_API_KEY` | Moonshot auth | Using `moonshot` provider | |\n| `XAI_API_KEY` | xAI auth | Using xAI models | |\n| `OPENROUTER_API_KEY` | OpenRouter auth | Using OpenRouter models | Also used by image tool when preferred/auto provider is OpenRouter |\n| `MISTRAL_API_KEY` | Mistral auth | Using Mistral models | |\n| `ZAI_API_KEY` | z.ai auth | Using z.ai models | Also used by z.ai web search provider |\n| `MINIMAX_API_KEY` | MiniMax auth | Using `minimax` provider | |\n| `MINIMAX_CODE_API_KEY` | MiniMax Code auth | Using `minimax-code` provider | |\n| `MINIMAX_CODE_CN_API_KEY` | MiniMax Code CN auth | Using `minimax-code-cn` provider | |\n| `OPENCODE_API_KEY` | OpenCode auth | Using `opencode-go` / `opencode-zen` models | |\n| `QIANFAN_API_KEY` | Qianfan auth | Using `qianfan` provider | |\n| `QWEN_OAUTH_TOKEN` | Qwen Portal auth | Using `qwen-portal` with OAuth token | Takes precedence over `QWEN_PORTAL_API_KEY` |\n| `QWEN_PORTAL_API_KEY` | Qwen Portal auth | Using `qwen-portal` with API key | Fallback after `QWEN_OAUTH_TOKEN` |\n| `ZENMUX_API_KEY` | ZenMux auth | Using `zenmux` provider | Used for ZenMux OpenAI and Anthropic-compatible routes |\n| `VLLM_API_KEY` | vLLM auth/discovery opt-in | Using `vllm` provider (local OpenAI-compatible servers) | Any non-empty value works for no-auth local servers |\n| `CURSOR_ACCESS_TOKEN` | Cursor provider auth | Using Cursor provider | |\n| `AI_GATEWAY_API_KEY` | Vercel AI Gateway auth | Using `vercel-ai-gateway` provider | |\n| `CLOUDFLARE_AI_GATEWAY_API_KEY` | Cloudflare AI Gateway auth | Using `cloudflare-ai-gateway` provider | Base URL must be configured as `https://gateway.ai.cloudflare.com/v1/<account>/<gateway>/anthropic` |\n| `ALIBABA_CODING_PLAN_API_KEY` | Alibaba Coding Plan auth | Using `alibaba-coding-plan` provider | |\n| `DEEPSEEK_API_KEY` | DeepSeek auth | Using DeepSeek models | |\n| `KILO_API_KEY` | Kilo auth | Using Kilo models | |\n| `OLLAMA_CLOUD_API_KEY` | Ollama Cloud auth | Using `ollama-cloud` provider | |\n| `GITLAB_TOKEN` | GitLab Duo auth | Using `gitlab-duo` provider | |\n\n### GitHub/Copilot token chains\n\n| Variable | Used for | Chain |\n| ---------------------- | ------------------------------------------------ | ---------------------------------------------------- |\n| `COPILOT_GITHUB_TOKEN` | GitHub Copilot provider auth | `COPILOT_GITHUB_TOKEN` → `GH_TOKEN` → `GITHUB_TOKEN` |\n| `GH_TOKEN` | Copilot fallback; GitHub API auth in web scraper | In web scraper: `GITHUB_TOKEN` → `GH_TOKEN` |\n| `GITHUB_TOKEN` | Copilot fallback; GitHub API auth in web scraper | In web scraper: checked before `GH_TOKEN` |\n\n---\n\n## 2) Provider-specific runtime configuration\n\n### Anthropic Foundry Gateway (Azure / enterprise proxy)\n\nWhen `CLAUDE_CODE_USE_FOUNDRY` is enabled, Anthropic requests switch to Foundry mode:\n\n- Base URL resolves from `FOUNDRY_BASE_URL` (fallback remains model/default base URL if unset).\n- API key resolution for provider `anthropic` becomes:\n `ANTHROPIC_FOUNDRY_API_KEY` → `ANTHROPIC_OAUTH_TOKEN` → `ANTHROPIC_API_KEY`.\n- `ANTHROPIC_CUSTOM_HEADERS` is parsed as comma/newline-separated `key: value` pairs and merged into request headers.\n- TLS client/server material can be injected from env values:\n `NODE_EXTRA_CA_CERTS`, `CLAUDE_CODE_CLIENT_CERT`, `CLAUDE_CODE_CLIENT_KEY`.\n Each accepts either:\n - a filesystem path to PEM content, or\n - inline PEM (including escaped `\\n` sequences).\n\n| Variable | Value type | Behavior |\n| --------------------------- | ---------------------------------------------- | ----------------------------------------------------------------------------- |\n| `CLAUDE_CODE_USE_FOUNDRY` | Boolean-like string (`1`, `true`, `yes`, `on`) | Enables Foundry mode for Anthropic provider |\n| `FOUNDRY_BASE_URL` | URL string | Anthropic endpoint base URL in Foundry mode |\n| `ANTHROPIC_FOUNDRY_API_KEY` | Token string | Used for `Authorization: Bearer <token>` |\n| `ANTHROPIC_CUSTOM_HEADERS` | Header list string | Extra headers; format `header-a: value, header-b: value` or newline-separated |\n| `NODE_EXTRA_CA_CERTS` | PEM path or inline PEM | Extra CA chain for server certificate validation |\n| `CLAUDE_CODE_CLIENT_CERT` | PEM path or inline PEM | mTLS client certificate |\n| `CLAUDE_CODE_CLIENT_KEY` | PEM path or inline PEM | mTLS client private key (must be paired with cert) |\n\n### Amazon Bedrock\n\n| Variable | Default / behavior |\n| ------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------- |\n| `AWS_REGION` | Primary region source |\n| `AWS_DEFAULT_REGION` | Fallback if `AWS_REGION` unset |\n| `AWS_PROFILE` | Enables named profile auth path |\n| `AWS_ACCESS_KEY_ID` + `AWS_SECRET_ACCESS_KEY` | Enables IAM key auth path |\n| `AWS_BEARER_TOKEN_BEDROCK` | Enables bearer token auth path |\n| `AWS_CONTAINER_CREDENTIALS_RELATIVE_URI` / `AWS_CONTAINER_CREDENTIALS_FULL_URI` | Enables ECS task credential path |\n| `AWS_WEB_IDENTITY_TOKEN_FILE` + `AWS_ROLE_ARN` | Enables web identity auth path |\n| `AWS_BEDROCK_SKIP_AUTH` | If `1`, injects dummy credentials (proxy/non-auth scenarios) |\n| `AWS_BEDROCK_FORCE_HTTP1` | If `1`, forces Node HTTP/1 request handler |\n| `HTTPS_PROXY` / `HTTP_PROXY` / `ALL_PROXY` | Routes Bedrock runtime and AWS SSO credential calls through the configured proxy using HTTP/1 |\n| `NO_PROXY` | Excludes matching hosts from proxy routing when a proxy variable is configured |\n\nRegion fallback in provider code: `options.region` → `AWS_REGION` → `AWS_DEFAULT_REGION` → `us-east-1`.\n\n### Azure OpenAI Responses\n\n| Variable | Default / behavior |\n| ---------------------------------- | --------------------------------------------------------------------------- |\n| `AZURE_OPENAI_API_KEY` | Required unless API key passed as option |\n| `AZURE_OPENAI_API_VERSION` | Default `v1` |\n| `AZURE_OPENAI_BASE_URL` | Direct base URL override |\n| `AZURE_OPENAI_RESOURCE_NAME` | Used to construct base URL: `https://<resource>.openai.azure.com/openai/v1` |\n| `AZURE_OPENAI_DEPLOYMENT_NAME_MAP` | Optional mapping string: `modelId=deploymentName,model2=deployment2` |\n\nBase URL resolution: option `azureBaseUrl` → env `AZURE_OPENAI_BASE_URL` → option/env resource name → `model.baseUrl`.\n\n### Google Vertex AI\n\n| Variable | Required? | Notes |\n| -------------------------------- | ------------------------------ | ------------------------------------------------------------------------------------------------------------------------- |\n| `GOOGLE_CLOUD_PROJECT` | Yes (unless passed in options) | Fallback: `GCLOUD_PROJECT` |\n| `GCLOUD_PROJECT` | Fallback | Used as alternate project ID source |\n| `GOOGLE_CLOUD_PROJECT_ID` | OAuth login helper only | Used by Gemini CLI OAuth project discovery |\n| `GOOGLE_CLOUD_LOCATION` | Yes (unless passed in options) | No default in provider |\n| `GOOGLE_CLOUD_API_KEY` | Conditional | Direct Vertex API-key auth; otherwise ADC fallback can authenticate when project and location are set |\n| `GOOGLE_APPLICATION_CREDENTIALS` | Conditional | If set, file must exist; otherwise ADC fallback path is checked (`~/.config/gcloud/application_default_credentials.json`) |\n\n### Kimi\n\n| Variable | Default / behavior |\n| ---------------------- | -------------------------------------------------------- |\n| `KIMI_CODE_OAUTH_HOST` | Primary OAuth host override |\n| `KIMI_OAUTH_HOST` | Fallback OAuth host override |\n| `KIMI_CODE_BASE_URL` | Overrides Kimi usage endpoint base URL (`usage/kimi.ts`) |\n\nOAuth host chain: `KIMI_CODE_OAUTH_HOST` → `KIMI_OAUTH_HOST` → `https://auth.kimi.com`.\n\n### Gemini CLI compatibility\n\n| Variable | Default / behavior |\n| -------------------------- | --------------------------------------------------------------- |\n| `PI_AI_GEMINI_CLI_VERSION` | Overrides Gemini CLI user-agent version tag (`0.35.3` if unset) |\n\n### OpenAI Codex responses (feature/debug controls)\n\n| Variable | Behavior |\n| ------------------------------------ | ---------------------------------------------------- |\n| `PI_CODEX_DEBUG` | `1`/`true` enables Codex provider debug logging |\n| `PI_CODEX_WEBSOCKET` | `1`/`true` enables websocket transport preference |\n| `PI_CODEX_WEBSOCKET_V2` | `1`/`true` enables websocket v2 path |\n| `PI_CODEX_WEBSOCKET_IDLE_TIMEOUT_MS` | Positive integer override (default 300000) |\n| `PI_CODEX_WEBSOCKET_RETRY_BUDGET` | Non-negative integer override (default 5) |\n| `PI_CODEX_WEBSOCKET_RETRY_DELAY_MS` | Positive integer base backoff override (default 500) |\n| `PI_OPENAI_STREAM_IDLE_TIMEOUT_MS` | Positive integer OpenAI stream idle timeout override |\n\n### Cursor provider debug\n\n| Variable | Behavior |\n| ------------------ | ------------------------------------------------------------------------ |\n| `DEBUG_CURSOR` | Enables provider debug logs; `2`/`verbose` for detailed payload snippets |\n| `DEBUG_CURSOR_LOG` | Optional file path for JSONL debug log output |\n\n### Prompt cache compatibility switch\n\n| Variable | Behavior |\n| -------------------- | ----------------------------------------------------------------------------------------------------------------- |\n| `PI_CACHE_RETENTION` | If `long`, enables long retention where supported (`anthropic`, `openai-responses`, Bedrock retention resolution) |\n\n---\n\n## 3) Web search subsystem\n\n### Search provider credentials\n\n| Variable | Used by |\n| --------------------------------------------------- | ------------------------------------------------------------- |\n| `EXA_API_KEY` | Exa search provider and Exa MCP tools |\n| `BRAVE_API_KEY` | Brave search provider |\n| `PERPLEXITY_API_KEY` | Perplexity search provider API-key mode |\n| `PERPLEXITY_COOKIES` | Perplexity cookie-auth search mode |\n| `TAVILY_API_KEY` | Tavily search provider |\n| `ZAI_API_KEY` | z.ai search provider (also checks stored OAuth in `agent.db`) |\n| `OPENAI_API_KEY` / Codex OAuth in DB | Codex search provider availability/auth |\n| `PI_CODEX_WEB_SEARCH_MODEL` | Codex search provider model override |\n| `MOONSHOT_SEARCH_API_KEY` / `KIMI_SEARCH_API_KEY` | Kimi/Moonshot search provider env auth |\n| `MOONSHOT_SEARCH_BASE_URL` / `KIMI_SEARCH_BASE_URL` | Kimi/Moonshot search endpoint override |\n| `KAGI_API_KEY` | Kagi search provider |\n| `JINA_API_KEY` | Jina search provider |\n| `PARALLEL_API_KEY` | Parallel search provider |\n| `SEARXNG_ENDPOINT`, `SEARXNG_TOKEN` | SearXNG endpoint and optional bearer token |\n| `SEARXNG_BASIC_USERNAME`, `SEARXNG_BASIC_PASSWORD` | SearXNG HTTP Basic Auth credentials |\n\nSearXNG also reads the equivalent `searxng.endpoint`, `searxng.token`, `searxng.basicUsername`, and `searxng.basicPassword` settings from `~/.omp/agent/config.yml`; environment variables are fallbacks.\n\n### Anthropic web search auth chain\n\nAnthropic web search uses `findAnthropicAuth()` from `packages/ai/src/utils/anthropic-auth.ts` in this order:\n\n1. `ANTHROPIC_SEARCH_API_KEY` (+ optional `ANTHROPIC_SEARCH_BASE_URL`)\n2. `ANTHROPIC_FOUNDRY_API_KEY` when `CLAUDE_CODE_USE_FOUNDRY` is enabled\n3. Anthropic OAuth credentials from `agent.db` (must not expire within 5-minute buffer)\n4. Anthropic API-key credentials from `agent.db`\n5. Generic Anthropic env fallback: provider key (`ANTHROPIC_FOUNDRY_API_KEY` in Foundry mode, otherwise `ANTHROPIC_OAUTH_TOKEN`/`ANTHROPIC_API_KEY`) + optional `ANTHROPIC_BASE_URL` (`FOUNDRY_BASE_URL` when Foundry mode is enabled)\n\nRelated vars:\n\n| Variable | Default / behavior |\n| --------------------------- | ---------------------------------------------------- |\n| `ANTHROPIC_SEARCH_API_KEY` | Highest-priority explicit search key |\n| `ANTHROPIC_SEARCH_BASE_URL` | Defaults to `https://api.anthropic.com` when omitted |\n| `ANTHROPIC_SEARCH_MODEL` | Defaults to `claude-haiku-4-5` |\n| `ANTHROPIC_BASE_URL` | Generic fallback base URL for tier-4 auth path |\n\n### Perplexity OAuth flow behavior flag\n\n| Variable | Behavior |\n| ------------------- | ------------------------------------------------------------------------------- |\n| `PI_AUTH_NO_BORROW` | If set, disables macOS native-app token borrowing path in Perplexity login flow |\n\n---\n\n## 4) Python tooling and kernel runtime\n\n| Variable | Default / behavior |\n| ------------------------- | ------------------------------------------------------------------------------------------------------------------- |\n| `PI_PY` | Eval backend override: `0`/`bash`=JavaScript only, `1`/`py`=Python only, `mix`/`both`=both; invalid values ignored |\n| `PI_PYTHON_SKIP_CHECK` | If `1`, skips Python interpreter availability checks (subprocess runner still starts on demand) |\n| `PI_PYTHON_INTEGRATION` | If `1`, opts gated integration tests in (e.g. `python-runner.integration.test.ts`) into running against real Python |\n| `PI_PYTHON_IPC_TRACE` | If `1`, logs NDJSON frames exchanged with the Python runner subprocess |\n| `VIRTUAL_ENV` | Highest-priority venv path for Python runtime resolution |\n\nExtra conditional behavior:\n\n- If `BUN_ENV=test` or `NODE_ENV=test`, Python availability checks are treated as OK and warming is skipped.\n- Python env filtering denies common API keys and allows safe base vars + `LC_`, `XDG_`, `PI_` prefixes.\n\n---\n\n## 5) Agent/runtime behavior toggles\n\n| Variable | Default / behavior |\n| ---------------------------- | -------------------------------------------------------------------------------------------------- |\n| `PI_SMOL_MODEL` | Ephemeral model-role override for `smol` (CLI `--smol` takes precedence) |\n| `PI_SLOW_MODEL` | Ephemeral model-role override for `slow` (CLI `--slow` takes precedence) |\n| `PI_PLAN_MODEL` | Ephemeral model-role override for `plan` (CLI `--plan` takes precedence) |\n| `PI_NO_TITLE` | If set (any non-empty value), disables auto session title generation on first user message |\n| `NULL_PROMPT` | If `true`, system prompt builder returns empty string |\n| `PI_BLOCKED_AGENT` | Blocks a specific subagent type in task tool |\n| `PI_SUBPROCESS_CMD` | Overrides subagent spawn command (`omp` / `omp.cmd` resolution bypass) |\n| `PI_TASK_MAX_OUTPUT_BYTES` | Max captured output bytes per subagent (default `500000`) |\n| `PI_TASK_MAX_OUTPUT_LINES` | Max captured output lines per subagent (default `5000`) |\n| `PI_TIMING` | If `1`, enables startup/tool timing instrumentation logs |\n| `PI_PACKAGE_DIR` | Overrides package asset base dir resolution (docs/examples/changelog path lookup) |\n| `PI_DISABLE_LSPMUX` | If `1`, disables lspmux detection/integration and forces direct LSP server spawning |\n| `PI_RPC_EMIT_TITLE` | Boolean-like flag enabling title events in RPC mode |\n| `SMITHERY_URL` | Smithery web URL override (default `https://smithery.ai`) |\n| `SMITHERY_API_URL` | Smithery API base URL override (default `https://api.smithery.ai`) |\n| `PUPPETEER_EXECUTABLE_PATH` | Browser tool Chromium executable override |\n| `LM_STUDIO_BASE_URL` | Default implicit LM Studio discovery base URL override (`http://127.0.0.1:1234/v1` if unset) |\n| `OLLAMA_BASE_URL` | Default implicit Ollama discovery base URL override (`http://127.0.0.1:11434` if unset) |\n| `LLAMA_CPP_BASE_URL` | Default implicit Llama.cpp discovery base URL override (`http://127.0.0.1:8080` if unset) |\n| `PI_EDIT_VARIANT` | Forces edit tool variant when valid (`patch`, `replace`, `hashline`, `atom`, `vim`, `apply_patch`) |\n| `PI_FORCE_IMAGE_PROTOCOL` | Forces supported image protocol (`kitty`, `iterm2`/`iterm`, `sixel`, `none`) where used |\n| `PI_ALLOW_SIXEL_PASSTHROUGH` | Allows SIXEL passthrough when `PI_FORCE_IMAGE_PROTOCOL=sixel` |\n| `PI_NO_PTY` | If `1`, disables interactive PTY path for bash tool |\n\n`PI_NO_PTY` is also set internally when CLI `--no-pty` is used.\n\n---\n\n## 6) Storage and config root paths\n\nThese are consumed via `@oh-my-pi/pi-utils/dirs` and affect where coding-agent stores data.\n\n| Variable | Default / behavior |\n| --------------------- | ----------------------------------------------------------------------------- |\n| `PI_CONFIG_DIR` | Config root dirname under home (default `.omp`) |\n| `PI_CODING_AGENT_DIR` | Full override for agent directory (default `~/<PI_CONFIG_DIR or .omp>/agent`) |\n| `PWD` | Used when matching canonical current working directory in path helpers |\n\n---\n\n## 7) Shell/tool execution environment\n\n(From `packages/utils/src/procmgr.ts` and coding-agent bash tool integration.)\n\n| Variable | Behavior |\n| -------------------------- | ------------------------------------------------------------------------------ |\n| `PI_BASH_NO_CI` | Suppresses automatic `CI=true` injection into spawned shell env |\n| `CLAUDE_BASH_NO_CI` | Legacy alias fallback for `PI_BASH_NO_CI` |\n| `PI_BASH_NO_LOGIN` | Disables login-shell mode; shell args become `['-c']` instead of `['-l','-c']` |\n| `CLAUDE_BASH_NO_LOGIN` | Legacy alias fallback for `PI_BASH_NO_LOGIN` |\n| `PI_SHELL_PREFIX` | Optional command prefix wrapper |\n| `CLAUDE_CODE_SHELL_PREFIX` | Legacy alias fallback for `PI_SHELL_PREFIX` |\n| `VISUAL` | Preferred external editor command |\n| `EDITOR` | Fallback external editor command |\n\nCurrent implementation: `PI_BASH_NO_LOGIN`/`CLAUDE_BASH_NO_LOGIN` are active; when either is set, `getShellArgs()` returns `['-c']`.\n\n---\n\n## 8) UI/theme/session detection (auto-detected env)\n\nThese are read as runtime signals; they are usually set by the terminal/OS rather than manually configured.\n\n| Variable | Used for |\n| ------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------- |\n| `COLORTERM`, `TERM`, `WT_SESSION` | Color capability detection (theme color mode) |\n| `COLORFGBG` | Terminal background light/dark auto-detection |\n| `TERM_PROGRAM`, `TERM_PROGRAM_VERSION`, `TERMINAL_EMULATOR` | Terminal identity in system prompt/context |\n| `KDE_FULL_SESSION`, `XDG_CURRENT_DESKTOP`, `DESKTOP_SESSION`, `XDG_SESSION_DESKTOP`, `GDMSESSION`, `WINDOWMANAGER` | Desktop/window-manager detection in system prompt/context |\n| `KITTY_WINDOW_ID`, `TMUX_PANE`, `TERM_SESSION_ID`, `WT_SESSION` | Stable per-terminal session breadcrumb IDs |\n| `SHELL`, `ComSpec`, `TERM_PROGRAM`, `TERM` | System info diagnostics |\n| `APPDATA`, `XDG_CONFIG_HOME` | lspmux config path resolution |\n| `HOME` | Path shortening in MCP command UI |\n\n---\n\n## 9) TUI runtime flags (shared package, affects coding-agent UX)\n\n| Variable | Behavior |\n| ------------------------- | ------------------------------------------------------------------------------------- |\n| `PI_NOTIFICATIONS` | `off` / `0` / `false` suppress desktop notifications |\n| `PI_TUI_WRITE_LOG` | If set, logs TUI writes to file |\n| `PI_HARDWARE_CURSOR` | If `1`, enables hardware cursor mode |\n| `PI_CLEAR_ON_SHRINK` | If `1`, clears empty rows when content shrinks |\n| `PI_DEBUG_REDRAW` | If `1`, enables redraw debug logging |\n| `PI_TUI_DEBUG` | If `1`, enables deep TUI debug dump path |\n| `PI_FORCE_IMAGE_PROTOCOL` | Forces terminal image protocol detection (`kitty`, `iterm2`/`iterm`, `sixel`, `none`) |\n\n---\n\n## 10) Commit generation controls\n\n| Variable | Behavior |\n| ------------------------- | ------------------------------------------------------------------- |\n| `PI_COMMIT_TEST_FALLBACK` | If `true` (case-insensitive), force commit fallback generation path |\n| `PI_COMMIT_NO_FALLBACK` | If `true`, disables fallback when agent returns no proposal |\n| `PI_COMMIT_MAP_REDUCE` | If `false`, disables map-reduce commit analysis path |\n| `DEBUG` | If set, commit agent error stack traces are printed |\n\n---\n\n## Security-sensitive variables\n\nTreat these as secrets; do not log or commit them:\n\n- Provider/API keys and OAuth/bearer credentials (all `*_API_KEY`, `*_TOKEN`, OAuth access/refresh tokens)\n- Cloud credentials (`AWS_*`, `GOOGLE_APPLICATION_CREDENTIALS` path may expose service-account material)\n- Search/provider auth vars (`EXA_API_KEY`, `BRAVE_API_KEY`, `PERPLEXITY_API_KEY`, Anthropic search keys)\n- Foundry mTLS material (`CLAUDE_CODE_CLIENT_CERT`, `CLAUDE_CODE_CLIENT_KEY`, `NODE_EXTRA_CA_CERTS` when it points to private CA bundles)\n\nPython runtime also explicitly strips many common key vars before spawning kernel subprocesses (`packages/coding-agent/src/eval/py/runtime.ts`).\n",
|
|
13
13
|
"extension-loading.md": "# Extension Loading (TypeScript/JavaScript Modules)\n\nThis document covers how the coding agent discovers and loads **extension modules** (`.ts`/`.js`) at startup.\n\nIt does **not** cover `gemini-extension.json` manifest extensions (documented separately).\n\n## What this subsystem does\n\nExtension loading builds a list of module entry files, imports each module with Bun, executes its factory, and returns:\n\n- loaded extension definitions\n- per-path load errors (without aborting the whole load)\n- a shared extension runtime object used later by `ExtensionRunner`\n\n## Primary implementation files\n\n- `src/extensibility/extensions/loader.ts` — path discovery + import/execution\n- `src/extensibility/extensions/index.ts` — public exports\n- `src/extensibility/extensions/runner.ts` — runtime/event execution after load\n- `src/discovery/builtin.ts` — native auto-discovery provider for extension modules\n- `src/config/settings.ts` — loads merged `extensions` / `disabledExtensions` settings\n\n---\n\n## Inputs to extension loading\n\n### 1) Auto-discovered native extension modules\n\n`discoverAndLoadExtensions()` first asks discovery providers for `extension-module` capability items, then keeps only provider `native` items.\n\nEffective native locations:\n\n- Project: `<cwd>/.omp/extensions`\n- User: `~/.omp/agent/extensions`\n\nPath roots come from the native provider (`SOURCE_PATHS.native`).\n\nNotes:\n\n- Native auto-discovery is currently `.omp` based.\n- Legacy `.pi` is still accepted in `package.json` manifest keys (`pi.extensions`), but not as a native root here.\n\n### 2) Installed plugin extension entries\n\nAfter native auto-discovery, `discoverAndLoadExtensions()` appends extension entry points from enabled installed plugins via `getAllPluginExtensionPaths(cwd)`.\n\nPlugin extension entries come from package `omp.extensions` / `pi.extensions` manifests, including enabled feature entries.\n\n### 3) Explicitly configured paths\n\nAfter plugin extension entries, configured paths are appended and resolved.\n\nConfigured path sources in the main session startup path (`sdk.ts`):\n\n1. CLI-provided paths (`--extension/-e`, and `--hook` is also treated as an extension path)\n2. Settings `extensions` array (merged global + project settings)\n\nGlobal settings file:\n\n- `~/.omp/agent/config.yml` (or custom agent dir via `PI_CODING_AGENT_DIR`)\n\nProject settings file:\n\n- `<cwd>/.omp/settings.json`\n\nExamples:\n\n```yaml\n# ~/.omp/agent/config.yml\nextensions:\n - ~/my-exts/safety.ts\n - ./local/ext-pack\n```\n\n```json\n{\n \"extensions\": [\"./.omp/extensions/my-extra\"]\n}\n```\n\n---\n\n## Enable/disable controls\n\n### Disable discovery\n\n- CLI: `--no-extensions`\n- SDK option: `disableExtensionDiscovery`\n\nBehavior split:\n\n- SDK: when `disableExtensionDiscovery=true`, it still loads `additionalExtensionPaths` via `loadExtensions()`.\n- CLI path building (`main.ts`) currently clears CLI extension paths when `--no-extensions` is set, so explicit `-e/--hook` are not forwarded in that mode.\n\n### Disable specific extension modules\n\n`disabledExtensions` setting filters by extension id format:\n\n- `extension-module:<derivedName>`\n\n`derivedName` is based on entry path (`getExtensionNameFromPath`), for example:\n\n- `/x/foo.ts` -> `foo`\n- `/x/bar/index.ts` -> `bar`\n\nExample:\n\n```yaml\ndisabledExtensions:\n - extension-module:foo\n```\n\n---\n\n## Path and entry resolution\n\n### Path normalization\n\nFor configured paths:\n\n1. Normalize unicode spaces\n2. Expand `~`\n3. If relative, resolve against current `cwd`\n\n### If configured path is a file\n\nIt is used directly as a module entry candidate.\n\n### If configured path is a directory\n\nResolution order:\n\n1. `package.json` in that directory with `omp.extensions` (or legacy `pi.extensions`) -> use declared entries\n2. `index.ts`\n3. `index.js`\n4. Otherwise scan one level for extension entries:\n - direct `*.ts` / `*.js`\n - subdir `index.ts` / `index.js`\n - subdir `package.json` with `omp.extensions` / `pi.extensions`\n\nRules and constraints:\n\n- no recursive discovery beyond one subdirectory level\n- declared `extensions` manifest entries are resolved relative to that package directory\n- declared entries are included only if file exists/access is allowed\n- in `*/index.{ts,js}` pairs, TypeScript is preferred over JavaScript\n- symlinks are treated as eligible files/directories\n\n### Ignore behavior differs by source\n\n- Native auto-discovery (`discoverExtensionModulePaths` in discovery helpers) uses native glob with `gitignore: true` and `hidden: false`.\n- Explicit configured directory scanning in `loader.ts` uses `readdir` rules and does **not** apply gitignore filtering.\n\n---\n\n## Load order and precedence\n\n`discoverAndLoadExtensions()` builds one ordered list and then calls `loadExtensions()`.\n\nOrder:\n\n1. Native auto-discovered modules\n2. Installed plugin extension entries\n3. Explicit configured paths (in provided order)\n\nIn `sdk.ts`, configured order is:\n\n1. CLI additional paths\n2. Settings `extensions`\n\nDe-duplication:\n\n- absolute path based\n- first seen path wins\n- later duplicates are ignored\n\nImplication: if the same module path is both auto-discovered and explicitly configured, it is loaded once at the first position (auto-discovered stage).\n\n---\n\n## Module import and factory contract\n\nEach candidate path is loaded with dynamic import:\n\n- `await import(resolvedPath)`\n- factory is `module.default ?? module`\n- factory must be a function (`ExtensionFactory`)\n\nIf export is not a function, that path fails with a structured error and loading continues.\n\n---\n\n## Failure handling and isolation\n\n### During loading\n\nPer extension path, failures are captured as `{ path, error }` and do not stop other paths from loading.\n\nCommon cases:\n\n- import failure / missing file\n- invalid factory export (non-function)\n- exception thrown while executing factory\n\n### Runtime isolation model\n\n- Extensions are **not sandboxed** (same process/runtime).\n- They share one `EventBus` and one `ExtensionRuntime` instance.\n- During load, runtime action methods intentionally throw `ExtensionRuntimeNotInitializedError`; action wiring happens later in `ExtensionRunner.initialize()`.\n\n### After loading\n\nWhen events run through `ExtensionRunner`, handler exceptions are caught and emitted as extension errors instead of crashing the runner loop.\n\n---\n\n## Minimal user/project layout examples\n\n### User-level\n\n```text\n~/.omp/agent/\n config.yml\n extensions/\n guardrails.ts\n audit/\n index.ts\n```\n\n### Project-level\n\n```text\n<repo>/\n .omp/\n settings.json\n extensions/\n checks/\n package.json\n lint-gates.ts\n```\n\n`checks/package.json`:\n\n```json\n{\n \"omp\": {\n \"extensions\": [\"./src/check-a.ts\", \"./src/check-b.js\"]\n }\n}\n```\n\nLegacy manifest key still accepted:\n\n```json\n{\n \"pi\": {\n \"extensions\": [\"./index.ts\"]\n }\n}\n```\n",
|
|
14
|
-
"extensions.md": "# Extensions\n\nPrimary guide for authoring runtime extensions in `packages/coding-agent`.\n\nThis document covers the current extension runtime in:\n\n- `src/extensibility/extensions/types.ts`\n- `src/extensibility/extensions/runner.ts`\n- `src/extensibility/extensions/wrapper.ts`\n- `src/extensibility/extensions/index.ts`\n- `src/modes/controllers/extension-ui-controller.ts`\n\nFor discovery paths and filesystem loading rules, see `docs/extension-loading.md`.\n\n## What an extension is\n\nAn extension is a TS/JS module exporting a default factory:\n\n```ts\nimport type { ExtensionAPI } from \"@oh-my-pi/pi-coding-agent\";\n\nexport default function myExtension(pi: ExtensionAPI) {\n // register handlers/tools/commands/renderers\n}\n```\n\nExtensions can combine all of the following in one module:\n\n- event handlers (`pi.on(...)`)\n- LLM-callable tools (`pi.registerTool(...)`)\n- slash commands (`pi.registerCommand(...)`)\n- keyboard shortcuts and flags\n- custom message rendering\n- session/message injection APIs (`sendMessage`, `sendUserMessage`, `appendEntry`)\n\n## Runtime model\n\n1. Extensions are imported and their factory functions run.\n2. During that load phase, registration methods are valid; runtime action methods are not yet initialized.\n3. `ExtensionRunner.initialize(...)` wires live actions/contexts for the active mode.\n4. Session/agent/tool lifecycle events are emitted to handlers.\n5. Every tool execution is wrapped with extension interception (`tool_call` / `tool_result`).\n\n```text\nExtension lifecycle (simplified)\n\nload paths\n │\n ▼\nimport module + run factory (registration only)\n │\n ▼\nExtensionRunner.initialize(mode/session/tool registry)\n │\n ├─ emit session/agent events to handlers\n ├─ wrap tool execution (tool_call/tool_result)\n └─ expose runtime actions (sendMessage, setActiveTools, ...)\n```\n\nImportant constraint from `loader.ts`:\n\n- calling action methods like `pi.sendMessage()` during extension load throws `ExtensionRuntimeNotInitializedError`\n- register first; perform runtime behavior from events/commands/tools\n\n## Quick start\n\n```ts\nimport type { ExtensionAPI } from \"@oh-my-pi/pi-coding-agent\";\nimport { Type } from \"@sinclair/typebox\";\n\nexport default function (pi: ExtensionAPI) {\n pi.setLabel(\"Safety + Utilities\");\n\n pi.on(\"session_start\", async (_event, ctx) => {\n ctx.ui.notify(`Extension loaded in ${ctx.cwd}`, \"info\");\n });\n\n pi.on(\"tool_call\", async (event) => {\n if (event.toolName === \"bash\" && event.input.command?.includes(\"rm -rf\")) {\n return { block: true, reason: \"Blocked by extension policy\" };\n }\n });\n\n pi.registerTool({\n name: \"hello_extension\",\n label: \"Hello Extension\",\n description: \"Return a greeting\",\n parameters: Type.Object({ name: Type.String() }),\n async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {\n return {\n content: [{ type: \"text\", text: `Hello, ${params.name}` }],\n details: { greeted: params.name },\n };\n },\n });\n\n pi.registerCommand(\"hello-ext\", {\n description: \"Show queue state\",\n handler: async (_args, ctx) => {\n ctx.ui.notify(`pending=${ctx.hasPendingMessages()}`, \"info\");\n },\n });\n}\n```\n\n## Extension API surfaces\n\n## 1) Registration and actions (`ExtensionAPI`)\n\nCore methods:\n\n- `on(event, handler)`\n- `registerTool`, `registerCommand`, `registerShortcut`, `registerFlag`\n- `registerMessageRenderer`\n- `sendMessage`, `sendUserMessage`, `appendEntry`\n- `getActiveTools`, `getAllTools`, `setActiveTools`\n- `getSessionName`, `setSessionName`\n- `setModel`, `getThinkingLevel`, `setThinkingLevel`\n- `registerProvider`\n- `events` (shared event bus)\n\nIn interactive mode, `input` handlers run before the built-in first-message auto-title check. Extensions that call `await pi.setSessionName(...)` from `input` can set the persisted session name and prevent the default auto-generated title from running for that session.\n\nAlso exposed:\n\n- `pi.logger`\n- `pi.typebox`\n- `pi.pi` (package exports)\n\n### Message delivery semantics\n\n`pi.sendMessage(message, options)` supports:\n\n- `deliverAs: \"steer\"` (default) — interrupts current run\n- `deliverAs: \"followUp\"` — queued to run after current run\n- `deliverAs: \"nextTurn\"` — stored and injected on the next user prompt\n- `triggerTurn: true` — starts a turn when idle (`nextTurn` ignores this)\n\n`pi.sendUserMessage(content, { deliverAs })` always goes through prompt flow; while streaming it queues as steer/follow-up.\n\n## 2) Handler context (`ExtensionContext`)\n\nHandlers and tool `execute` receive `ctx` with:\n\n- `ui`\n- `hasUI`\n- `cwd`\n- `sessionManager` (read-only)\n- `modelRegistry`, `model`\n- `getContextUsage()`\n- `compact(...)`\n- `isIdle()`, `hasPendingMessages()`, `abort()`\n- `shutdown()`\n- `getSystemPrompt()`\n\n## 3) Command context (`ExtensionCommandContext`)\n\nCommand handlers additionally get:\n\n- `waitForIdle()`\n- `newSession(...)`\n- `switchSession(...)`\n- `branch(entryId)`\n- `navigateTree(targetId, { summarize })`\n- `reload()`\n\nUse command context for session-control flows; these methods are intentionally separated from general event handlers.\n\n## Event surface (current names and behavior)\n\nCanonical event unions and payload types are in `types.ts`.\n\n### Session lifecycle\n\n- `session_start`\n- `session_before_switch` / `session_switch`\n- `session_before_branch` / `session_branch`\n- `session_before_compact` / `session.compacting` / `session_compact`\n- `session_before_tree` / `session_tree`\n- `session_shutdown`\n\nCancelable pre-events:\n\n- `session_before_switch` → `{ cancel?: boolean }`\n- `session_before_branch` → `{ cancel?: boolean; skipConversationRestore?: boolean }`\n- `session_before_compact` → `{ cancel?: boolean; compaction?: CompactionResult }`\n- `session_before_tree` → `{ cancel?: boolean; summary?: { summary: string; details?: unknown } }`\n\n### Prompt and turn lifecycle\n\n- `input`\n- `before_agent_start`\n- `context`\n- `agent_start` / `agent_end`\n- `turn_start` / `turn_end`\n- `message_start` / `message_update` / `message_end`\n\n### Tool lifecycle\n\n- `tool_call` (pre-exec, may block)\n- `tool_result` (post-exec, may patch content/details/isError)\n- `tool_execution_start` / `tool_execution_update` / `tool_execution_end` (observability)\n\n`tool_result` is middleware-style: handlers run in extension order and each sees prior modifications.\n\n### Reliability/runtime signals\n\n- `auto_compaction_start` / `auto_compaction_end`\n- `auto_retry_start` / `auto_retry_end`\n- `ttsr_triggered`\n- `todo_reminder`\n\n### User command interception\n\n- `user_bash` (override with `{ result }`)\n- `user_python` (override with `{ result }`)\n\n### `resources_discover`\n\n`resources_discover` exists in extension types and `ExtensionRunner`.\nCurrent runtime note: `ExtensionRunner.emitResourcesDiscover(...)` is implemented, but there are no `AgentSession` callsites invoking it in the current codebase.\n\n## Tool authoring details\n\n`registerTool` uses `ToolDefinition` from `types.ts`.\n\nCurrent `execute` signature:\n\n```ts\nexecute(\n\ttoolCallId,\n\tparams,\n\tsignal,\n\tonUpdate,\n\tctx,\n): Promise<AgentToolResult>\n```\n\nTemplate:\n\n```ts\npi.registerTool({\n name: \"my_tool\",\n label: \"My Tool\",\n description: \"...\",\n parameters: Type.Object({}),\n async execute(_id, _params, signal, onUpdate, ctx) {\n if (signal?.aborted) {\n return { content: [{ type: \"text\", text: \"Cancelled\" }] };\n }\n onUpdate?.({ content: [{ type: \"text\", text: \"Working...\" }] });\n return { content: [{ type: \"text\", text: \"Done\" }], details: {} };\n },\n onSession(event, ctx) {\n // reason: start|switch|branch|tree|shutdown\n },\n renderCall(args, options, theme) {\n // optional TUI render\n },\n renderResult(result, options, theme, args) {\n // optional TUI render\n },\n});\n```\n\n`tool_call`/`tool_result` intercept all tools once the registry is wrapped in `sdk.ts`, including built-ins and extension/custom tools.\n\n## UI integration points\n\n`ctx.ui` implements the `ExtensionUIContext` interface. Support differs by mode.\n\n### Interactive mode (`extension-ui-controller.ts`)\n\nSupported:\n\n- dialogs: `select`, `confirm`, `input`, `editor`\n- notifications/status/editor text/terminal input/custom overlays\n- theme listing/loading by name (`setTheme` supports string names)\n- tools expanded toggle\n\nCurrent no-op methods in this controller:\n\n- `setFooter`\n- `setHeader`\n- `setEditorComponent`\n\nAlso note: `setWidget` currently routes to status-line text via `setHookWidget(...)`.\n\n### RPC mode (`rpc-mode.ts`)\n\n`ctx.ui` is backed by RPC `extension_ui_request` events:\n\n- dialog methods (`select`, `confirm`, `input`, `editor`) round-trip to client responses\n- fire-and-forget methods emit requests (`notify`, `setStatus`, `setWidget` for string arrays, `setTitle`, `setEditorText`)\n\nUnsupported/no-op in RPC implementation:\n\n- `onTerminalInput`\n- `custom`\n- `setFooter`, `setHeader`, `setEditorComponent`\n- `setWorkingMessage`\n- theme switching/loading (`setTheme` returns failure)\n- tool expansion controls are inert\n\n### Print/headless/subagent paths\n\nWhen no UI context is supplied to runner init, `ctx.hasUI` is `false` and methods are no-op/default-returning.\n\n### Background interactive mode\n\nBackground mode installs a non-interactive UI context object. In current implementation, `ctx.hasUI` may still be `true` while interactive dialogs return defaults/no-op behavior.\n\n## Session and state patterns\n\nFor durable extension state:\n\n1. Persist with `pi.appendEntry(customType, data)`.\n2. Rebuild state from `ctx.sessionManager.getBranch()` on `session_start`, `session_branch`, `session_tree`.\n3. Keep tool result `details` structured when state should be visible/reconstructible from tool result history.\n\nExample reconstruction pattern:\n\n```ts\npi.on(\"session_start\", async (_event, ctx) => {\n let latest;\n for (const entry of ctx.sessionManager.getBranch()) {\n if (entry.type === \"custom\" && entry.customType === \"my-state\") {\n latest = entry.data;\n }\n }\n // restore from latest\n});\n```\n\n## Rendering extension points\n\n## Custom message renderer\n\n```ts\npi.registerMessageRenderer(\"my-type\", (message, { expanded }, theme) => {\n // return pi-tui Component\n});\n```\n\nUsed by interactive rendering when custom messages are displayed.\n\n## Tool call/result renderer\n\nProvide `renderCall` / `renderResult` on `registerTool` definitions for custom tool visualization in TUI.\n\n## Constraints and pitfalls\n\n- Runtime actions are unavailable during extension load.\n- `tool_call` errors block execution (fail-closed).\n- Command name conflicts with built-ins are skipped with diagnostics.\n- Reserved shortcuts are ignored (`ctrl+c`, `ctrl+d`, `ctrl+z`, `ctrl+k`, `ctrl+p`, `ctrl+l`, `ctrl+o`, `ctrl+t`, `ctrl+g`, `shift+tab`, `shift+ctrl+p`, `alt+enter`, `escape`, `enter`).\n- Treat `ctx.reload()` as terminal for the current command handler frame.\n\n## Extensions vs hooks vs custom-tools\n\nUse the right surface:\n\n- **Extensions** (`src/extensibility/extensions/*`): unified system (events + tools + commands + renderers + provider registration).\n- **Hooks** (`src/extensibility/hooks/*`): separate legacy event API.\n- **Custom-tools** (`src/extensibility/custom-tools/*`): tool-focused modules; when loaded alongside extensions they are adapted and still pass through extension interception wrappers.\n\nIf you need one package that owns policy, tools, command UX, and rendering together, use extensions.\n",
|
|
14
|
+
"extensions.md": "# Extensions\n\nPrimary guide for authoring runtime extensions in `packages/coding-agent`.\n\nThis document covers the current extension runtime in:\n\n- `src/extensibility/extensions/types.ts`\n- `src/extensibility/extensions/runner.ts`\n- `src/extensibility/extensions/wrapper.ts`\n- `src/extensibility/extensions/index.ts`\n- `src/modes/controllers/extension-ui-controller.ts`\n\nFor discovery paths and filesystem loading rules, see `docs/extension-loading.md`.\n\n## What an extension is\n\nAn extension is a TS/JS module exporting a default factory:\n\n```ts\nimport type { ExtensionAPI } from \"@oh-my-pi/pi-coding-agent\";\n\nexport default function myExtension(pi: ExtensionAPI) {\n // register handlers/tools/commands/renderers\n}\n```\n\nExtensions can combine all of the following in one module:\n\n- event handlers (`pi.on(...)`)\n- LLM-callable tools (`pi.registerTool(...)`)\n- slash commands (`pi.registerCommand(...)`)\n- keyboard shortcuts and flags\n- custom message rendering\n- session/message injection APIs (`sendMessage`, `sendUserMessage`, `appendEntry`)\n\n## Runtime model\n\n1. Extensions are imported and their factory functions run.\n2. During that load phase, registration methods are valid; runtime action methods are not yet initialized.\n3. `ExtensionRunner.initialize(...)` wires live actions/contexts for the active mode.\n4. Session/agent/tool lifecycle events are emitted to handlers.\n5. Every tool execution is wrapped with extension interception (`tool_call` / `tool_result`).\n\n```text\nExtension lifecycle (simplified)\n\nload paths\n │\n ▼\nimport module + run factory (registration only)\n │\n ▼\nExtensionRunner.initialize(mode/session/tool registry)\n │\n ├─ emit session/agent events to handlers\n ├─ wrap tool execution (tool_call/tool_result)\n └─ expose runtime actions (sendMessage, setActiveTools, ...)\n```\n\nImportant constraint from `loader.ts`:\n\n- calling action methods like `pi.sendMessage()` during extension load throws `ExtensionRuntimeNotInitializedError`\n- register first; perform runtime behavior from events/commands/tools\n\n## Quick start\n\n```ts\nimport type { ExtensionAPI } from \"@oh-my-pi/pi-coding-agent\";\n\nexport default function (pi: ExtensionAPI) {\n const { z } = pi.zod;\n\n pi.setLabel(\"Safety + Utilities\");\n\n pi.on(\"session_start\", async (_event, ctx) => {\n ctx.ui.notify(`Extension loaded in ${ctx.cwd}`, \"info\");\n });\n\n pi.on(\"tool_call\", async (event) => {\n if (event.toolName === \"bash\" && event.input.command?.includes(\"rm -rf\")) {\n return { block: true, reason: \"Blocked by extension policy\" };\n }\n });\n\n pi.registerTool({\n name: \"hello_extension\",\n label: \"Hello Extension\",\n description: \"Return a greeting\",\n parameters: z.object({ name: z.string() }),\n async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {\n return {\n content: [{ type: \"text\", text: `Hello, ${params.name}` }],\n details: { greeted: params.name },\n };\n },\n });\n\n pi.registerCommand(\"hello-ext\", {\n description: \"Show queue state\",\n handler: async (_args, ctx) => {\n ctx.ui.notify(`pending=${ctx.hasPendingMessages()}`, \"info\");\n },\n });\n}\n```\n\n## Extension API surfaces\n\n## 1) Registration and actions (`ExtensionAPI`)\n\nCore methods:\n\n- `on(event, handler)`\n- `registerTool`, `registerCommand`, `registerShortcut`, `registerFlag`\n- `registerMessageRenderer`\n- `sendMessage`, `sendUserMessage`, `appendEntry`\n- `getActiveTools`, `getAllTools`, `setActiveTools`\n- `getSessionName`, `setSessionName`\n- `setModel`, `getThinkingLevel`, `setThinkingLevel`\n- `registerProvider`\n- `events` (shared event bus)\n\nIn interactive mode, `input` handlers run before the built-in first-message auto-title check. Extensions that call `await pi.setSessionName(...)` from `input` can set the persisted session name and prevent the default auto-generated title from running for that session.\n\nAlso exposed:\n\n- `pi.logger`\n- `pi.zod` (injected `zod` module — **preferred** for new tool schemas)\n- `pi.typebox` (zod-backed `Type.*` shim — retained for legacy extension compat)\n- `pi.pi` (package exports)\n\n### Message delivery semantics\n\n`pi.sendMessage(message, options)` supports:\n\n- `deliverAs: \"steer\"` (default) — interrupts current run\n- `deliverAs: \"followUp\"` — queued to run after current run\n- `deliverAs: \"nextTurn\"` — stored and injected on the next user prompt\n- `triggerTurn: true` — starts a turn when idle (`nextTurn` ignores this)\n\n`pi.sendUserMessage(content, { deliverAs })` always goes through prompt flow; while streaming it queues as steer/follow-up.\n\n## 2) Handler context (`ExtensionContext`)\n\nHandlers and tool `execute` receive `ctx` with:\n\n- `ui`\n- `hasUI`\n- `cwd`\n- `sessionManager` (read-only)\n- `modelRegistry`, `model`\n- `getContextUsage()`\n- `compact(...)`\n- `isIdle()`, `hasPendingMessages()`, `abort()`\n- `shutdown()`\n- `getSystemPrompt()`\n\n## 3) Command context (`ExtensionCommandContext`)\n\nCommand handlers additionally get:\n\n- `waitForIdle()`\n- `newSession(...)`\n- `switchSession(...)`\n- `branch(entryId)`\n- `navigateTree(targetId, { summarize })`\n- `reload()`\n\nUse command context for session-control flows; these methods are intentionally separated from general event handlers.\n\n## Event surface (current names and behavior)\n\nCanonical event unions and payload types are in `types.ts`.\n\n### Session lifecycle\n\n- `session_start`\n- `session_before_switch` / `session_switch`\n- `session_before_branch` / `session_branch`\n- `session_before_compact` / `session.compacting` / `session_compact`\n- `session_before_tree` / `session_tree`\n- `session_shutdown`\n\nCancelable pre-events:\n\n- `session_before_switch` → `{ cancel?: boolean }`\n- `session_before_branch` → `{ cancel?: boolean; skipConversationRestore?: boolean }`\n- `session_before_compact` → `{ cancel?: boolean; compaction?: CompactionResult }`\n- `session_before_tree` → `{ cancel?: boolean; summary?: { summary: string; details?: unknown } }`\n\n### Prompt and turn lifecycle\n\n- `input`\n- `before_agent_start`\n- `context`\n- `agent_start` / `agent_end`\n- `turn_start` / `turn_end`\n- `message_start` / `message_update` / `message_end`\n\n### Tool lifecycle\n\n- `tool_call` (pre-exec, may block)\n- `tool_result` (post-exec, may patch content/details/isError)\n- `tool_execution_start` / `tool_execution_update` / `tool_execution_end` (observability)\n\n`tool_result` is middleware-style: handlers run in extension order and each sees prior modifications.\n\n### Reliability/runtime signals\n\n- `auto_compaction_start` / `auto_compaction_end`\n- `auto_retry_start` / `auto_retry_end`\n- `ttsr_triggered`\n- `todo_reminder`\n\n### User command interception\n\n- `user_bash` (override with `{ result }`)\n- `user_python` (override with `{ result }`)\n\n### `resources_discover`\n\n`resources_discover` exists in extension types and `ExtensionRunner`.\nCurrent runtime note: `ExtensionRunner.emitResourcesDiscover(...)` is implemented, but there are no `AgentSession` callsites invoking it in the current codebase.\n\n## Tool authoring details\n\n`registerTool` uses `ToolDefinition` from `types.ts`.\n\nCurrent `execute` signature:\n\n```ts\nexecute(\n\ttoolCallId,\n\tparams,\n\tsignal,\n\tonUpdate,\n\tctx,\n): Promise<AgentToolResult>\n```\n\nTemplate:\n\n```ts\nconst { z } = pi.zod;\n\npi.registerTool({\n name: \"my_tool\",\n label: \"My Tool\",\n description: \"...\",\n parameters: z.object({}),\n async execute(_id, _params, signal, onUpdate, ctx) {\n if (signal?.aborted) {\n return { content: [{ type: \"text\", text: \"Cancelled\" }] };\n }\n onUpdate?.({ content: [{ type: \"text\", text: \"Working...\" }] });\n return { content: [{ type: \"text\", text: \"Done\" }], details: {} };\n },\n onSession(event, ctx) {\n // reason: start|switch|branch|tree|shutdown\n },\n renderCall(args, options, theme) {\n // optional TUI render\n },\n renderResult(result, options, theme, args) {\n // optional TUI render\n },\n});\n```\n\n`tool_call`/`tool_result` intercept all tools once the registry is wrapped in `sdk.ts`, including built-ins and extension/custom tools.\n\n## UI integration points\n\n`ctx.ui` implements the `ExtensionUIContext` interface. Support differs by mode.\n\n### Interactive mode (`extension-ui-controller.ts`)\n\nSupported:\n\n- dialogs: `select`, `confirm`, `input`, `editor`\n- notifications/status/editor text/terminal input/custom overlays\n- theme listing/loading by name (`setTheme` supports string names)\n- tools expanded toggle\n\nCurrent no-op methods in this controller:\n\n- `setFooter`\n- `setHeader`\n- `setEditorComponent`\n\nAlso note: `setWidget` currently routes to status-line text via `setHookWidget(...)`.\n\n### RPC mode (`rpc-mode.ts`)\n\n`ctx.ui` is backed by RPC `extension_ui_request` events:\n\n- dialog methods (`select`, `confirm`, `input`, `editor`) round-trip to client responses\n- fire-and-forget methods emit requests (`notify`, `setStatus`, `setWidget` for string arrays, `setTitle`, `setEditorText`)\n\nUnsupported/no-op in RPC implementation:\n\n- `onTerminalInput`\n- `custom`\n- `setFooter`, `setHeader`, `setEditorComponent`\n- `setWorkingMessage`\n- theme switching/loading (`setTheme` returns failure)\n- tool expansion controls are inert\n\n### Print/headless/subagent paths\n\nWhen no UI context is supplied to runner init, `ctx.hasUI` is `false` and methods are no-op/default-returning.\n\n### Background interactive mode\n\nBackground mode installs a non-interactive UI context object. In current implementation, `ctx.hasUI` may still be `true` while interactive dialogs return defaults/no-op behavior.\n\n## Session and state patterns\n\nFor durable extension state:\n\n1. Persist with `pi.appendEntry(customType, data)`.\n2. Rebuild state from `ctx.sessionManager.getBranch()` on `session_start`, `session_branch`, `session_tree`.\n3. Keep tool result `details` structured when state should be visible/reconstructible from tool result history.\n\nExample reconstruction pattern:\n\n```ts\npi.on(\"session_start\", async (_event, ctx) => {\n let latest;\n for (const entry of ctx.sessionManager.getBranch()) {\n if (entry.type === \"custom\" && entry.customType === \"my-state\") {\n latest = entry.data;\n }\n }\n // restore from latest\n});\n```\n\n## Rendering extension points\n\n## Custom message renderer\n\n```ts\npi.registerMessageRenderer(\"my-type\", (message, { expanded }, theme) => {\n // return pi-tui Component\n});\n```\n\nUsed by interactive rendering when custom messages are displayed.\n\n## Tool call/result renderer\n\nProvide `renderCall` / `renderResult` on `registerTool` definitions for custom tool visualization in TUI.\n\n## Constraints and pitfalls\n\n- Runtime actions are unavailable during extension load.\n- `tool_call` errors block execution (fail-closed).\n- Command name conflicts with built-ins are skipped with diagnostics.\n- Reserved shortcuts are ignored (`ctrl+c`, `ctrl+d`, `ctrl+z`, `ctrl+k`, `ctrl+p`, `ctrl+l`, `ctrl+o`, `ctrl+t`, `ctrl+g`, `shift+tab`, `shift+ctrl+p`, `alt+enter`, `escape`, `enter`).\n- Treat `ctx.reload()` as terminal for the current command handler frame.\n\n## Extensions vs hooks vs custom-tools\n\nUse the right surface:\n\n- **Extensions** (`src/extensibility/extensions/*`): unified system (events + tools + commands + renderers + provider registration).\n- **Hooks** (`src/extensibility/hooks/*`): separate legacy event API.\n- **Custom-tools** (`src/extensibility/custom-tools/*`): tool-focused modules; when loaded alongside extensions they are adapted and still pass through extension interception wrappers.\n\nIf you need one package that owns policy, tools, command UX, and rendering together, use extensions.\n",
|
|
15
15
|
"fs-scan-cache-architecture.md": "# Filesystem Scan Cache Architecture Contract\n\nThis document defines the current contract for the shared filesystem scan cache implemented in Rust (`crates/pi-natives/src/fs_cache.rs`) and consumed by native discovery/search APIs exposed to `packages/coding-agent`.\n\n## What this cache is\n\nThe cache stores full directory-scan entry lists (`GlobMatch[]`) keyed by scan scope and traversal policy, then lets higher-level operations (glob filtering, fuzzy scoring, grep file selection) run against those cached entries.\n\nPrimary goals:\n\n- avoid repeated filesystem walks for repeated discovery/search calls\n- keep consistency across `glob`, `fuzzyFind`, and `grep` when they share the same scan policy\n- allow explicit staleness recovery for empty results and explicit invalidation after file mutations\n\n## Ownership and public surface\n\n- Cache implementation and policy: `crates/pi-natives/src/fs_cache.rs`\n- Native consumers:\n - `crates/pi-natives/src/glob.rs`\n - `crates/pi-natives/src/fd.rs` (`fuzzyFind`)\n - `crates/pi-natives/src/grep.rs`\n- JS binding/export:\n - `packages/natives/src/glob/index.ts` (`invalidateFsScanCache`)\n - `packages/natives/src/glob/types.ts`\n - `packages/natives/src/grep/types.ts`\n- Coding-agent mutation invalidation helpers:\n - `packages/coding-agent/src/tools/fs-cache-invalidation.ts`\n\n## Cache key partitioning (hard contract)\n\nEach entry is keyed by:\n\n- canonicalized `root` directory path\n- `include_hidden` boolean\n- `use_gitignore` boolean\n- `skip_node_modules` boolean\n\nImplications:\n\n- Hidden and non-hidden scans do **not** share entries.\n- Gitignore-respecting and ignore-disabled scans do **not** share entries.\n- Scans that prune `node_modules` do **not** share entries with scans that include it.\n- Consumers must pass stable semantics for hidden/gitignore/node_modules behavior; changing any flag creates a different cache partition.\n\n## Scan collection behavior\n\nCache population uses a deterministic walker (`ignore::WalkBuilder`) configured by `include_hidden`, `use_gitignore`, and `skip_node_modules`:\n\n- `follow_links(false)`\n- sorted by file path\n- `.git` is always skipped\n- `node_modules` is pruned at traversal time when `skip_node_modules=true`\n- entry file type + `mtime` are captured via `symlink_metadata`\n\nSearch roots are resolved by `resolve_search_path`:\n\n- relative paths are resolved against current cwd\n- target must be an existing directory\n- root is canonicalized when possible\n\n## Freshness and eviction policy\n\nGlobal policy (environment-overridable):\n\n- `FS_SCAN_CACHE_TTL_MS` (default `1000`)\n- `FS_SCAN_EMPTY_RECHECK_MS` (default `200`)\n- `FS_SCAN_CACHE_MAX_ENTRIES` (default `16`)\n\nBehavior:\n\n- `get_or_scan(...)`\n - if TTL is `0`: bypass cache entirely, always fresh scan (`cache_age_ms = 0`)\n - on cache hit within TTL: return cached entries + non-zero `cache_age_ms`\n - on expired hit: evict key, rescan, store fresh entry\n- max entry enforcement is oldest-first eviction by `created_at`\n\n## Empty-result fast recheck (separate from normal hits)\n\nNormal cache hit:\n\n- a cache hit inside TTL returns cached entries and does nothing else.\n\nEmpty-result fast recheck:\n\n- this is a **caller-side** policy using `ScanResult.cache_age_ms`\n- if filtered/query result is empty and cached scan age is at least `empty_recheck_ms()`, caller performs one `force_rescan(...)` and retries\n- intended to reduce stale-negative results when files were recently added but cache is still within TTL\n\nCurrent consumers:\n\n- `glob`: rechecks when filtered matches are empty and scan age exceeds threshold\n- `fuzzyFind` (`fd.rs`): rechecks only when query is non-empty and scored matches are empty\n- `grep`: rechecks when selected candidate file list is empty\n\n## Consumer defaults and cache usage\n\nCache is opt-in on all exposed APIs (`cache?: boolean`, default `false`).\n\nCurrent defaults in native APIs:\n\n- `glob`: `hidden=false`, `gitignore=true`, `cache=false`, and `node_modules` included only when the pattern mentions `node_modules`\n- `fuzzyFind`: `hidden=false`, `gitignore=true`, `cache=false`, and `node_modules` is skipped\n- `grep`: `hidden=true`, `gitignore=true`, `cache=false`, and `node_modules` included only when the glob mentions `node_modules`\n\nCoding-agent callers today:\n\n- High-volume mention candidate discovery enables cache:\n - `packages/coding-agent/src/utils/file-mentions.ts`\n - profile: `hidden=true`, `gitignore=true`, `includeNodeModules=true`, `cache=true`\n- Tool-level `grep` integration currently disables scan cache (`cache: false`):\n - `packages/coding-agent/src/tools/grep.ts`\n\n## Invalidation contract\n\nNative invalidation entrypoint:\n\n- `invalidateFsScanCache(path?: string)`\n - with `path`: remove cache entries whose root is a prefix of target path\n - without path: clear all scan cache entries\n\nPath handling details:\n\n- relative invalidation paths are resolved against cwd\n- invalidation attempts canonicalization\n- if target does not exist (e.g., delete), fallback canonicalizes parent and reattaches filename when possible\n- this preserves invalidation behavior for create/delete/rename where one side may not exist\n\n## Coding-agent mutation flow responsibilities\n\nCoding-agent code must invalidate after successful filesystem mutations.\n\nCentral helpers:\n\n- `invalidateFsScanAfterWrite(path)`\n- `invalidateFsScanAfterDelete(path)`\n- `invalidateFsScanAfterRename(oldPath, newPath)` (invalidates both sides when paths differ)\n\nCurrent mutation tool callsites:\n\n- `packages/coding-agent/src/tools/write.ts`\n- `packages/coding-agent/src/patch/index.ts` (hashline/patch/replace flows)\n\nRule: if a flow mutates filesystem content or location and bypasses these helpers, cache staleness bugs are expected.\n\n## Adding a new cache consumer safely\n\nWhen introducing cache use in a new scanner/search path:\n\n1. **Use stable scan policy inputs**\n - decide hidden/gitignore/node_modules semantics first\n - pass them consistently to `get_or_scan`/`force_rescan` so cache partitions are intentional\n\n2. **Treat cache data as pre-filtered only by traversal policy**\n - apply tool-specific filtering (glob patterns, type filters, scoring) after retrieval\n - never assume cached entries already reflect your higher-level filters\n\n3. **Implement empty-result fast recheck only for stale-negative risk**\n - use `scan.cache_age_ms >= empty_recheck_ms()`\n - retry once with `force_rescan(..., store=true, ...)`\n - keep this path separate from normal cache-hit logic\n\n4. **Respect no-cache mode explicitly**\n - when caller disables cache, call `force_rescan(..., store=false, ...)`\n - do not populate shared cache in a no-cache request path\n\n5. **Wire mutation invalidation for any new write path**\n - after successful write/edit/delete/rename, call the coding-agent invalidation helper\n - for rename/move, invalidate both old and new paths\n\n6. **Do not add per-call TTL knobs**\n - current contract is global policy only (env-configured), no per-request TTL override\n\n## Known boundaries\n\n- Cache scope is process-local in-memory (`DashMap`), not persisted across process restarts.\n- Cache stores scan entries, not final tool results.\n- `glob`/`fuzzyFind`/`grep` share scan entries only when key dimensions (`root`, `hidden`, `gitignore`, `skip_node_modules`) match.\n- `.git` is always excluded at scan collection time regardless of caller options.\n",
|
|
16
16
|
"gemini-manifest-extensions.md": "# Gemini Manifest Extensions (`gemini-extension.json`)\n\nThis document covers how the coding-agent discovers and parses Gemini-style manifest extensions (`gemini-extension.json`) into the `extensions` capability.\n\nIt does **not** cover TypeScript/JavaScript extension module loading (`extensions/*.ts`, `index.ts`, `package.json omp.extensions`), which is documented in `extension-loading.md`.\n\n## Implementation files\n\n- [`../src/discovery/gemini.ts`](../packages/coding-agent/src/discovery/gemini.ts)\n- [`../src/discovery/builtin.ts`](../packages/coding-agent/src/discovery/builtin.ts)\n- [`../src/discovery/helpers.ts`](../packages/coding-agent/src/discovery/helpers.ts)\n- [`../src/capability/extension.ts`](../packages/coding-agent/src/capability/extension.ts)\n- [`../src/capability/index.ts`](../packages/coding-agent/src/capability/index.ts)\n- [`../src/extensibility/extensions/loader.ts`](../packages/coding-agent/src/extensibility/extensions/loader.ts)\n\n---\n\n## What gets discovered\n\nThe Gemini provider (`id: gemini`, priority `60`) registers an `extensions` loader that scans two fixed roots:\n\n- User: `~/.gemini/extensions`\n- Project: `<cwd>/.gemini/extensions`\n\nPath resolution is direct from `ctx.home` and `ctx.cwd` via `getUserPath()` / `getProjectPath()`.\n\nImportant scope rule: project lookup is **cwd-only**. It does not walk parent directories.\n\n---\n\n## Directory scan rules\n\nFor each root (`~/.gemini/extensions` and `<cwd>/.gemini/extensions`), discovery does:\n\n1. `readDirEntries(root)`\n2. keep only direct child directories (`entry.isDirectory()`)\n3. for each child `<name>`, attempt to read exactly:\n - `<root>/<name>/gemini-extension.json`\n\nThere is no recursive scan beyond one directory level.\n\n### Hidden directories\n\nGemini manifest discovery does **not** filter out dot-prefixed directory names. If a hidden child directory exists and contains `gemini-extension.json`, it is considered.\n\n### Missing/unreadable files\n\nIf `gemini-extension.json` is missing or unreadable, that directory is skipped silently (no warning).\n\n---\n\n## Manifest shape (as implemented)\n\nThe capability type defines this manifest shape:\n\n```ts\ninterface ExtensionManifest {\n name?: string;\n description?: string;\n mcpServers?: Record<string, Omit<MCPServer, \"name\" | \"_source\">>;\n tools?: unknown[];\n context?: unknown;\n}\n```\n\nDiscovery-time behavior is intentionally loose:\n\n- JSON parse success is required.\n- There is no runtime schema validation for field types/content beyond JSON syntax.\n- The parsed object is stored as `manifest` on the capability item.\n\n### Name normalization\n\n`Extension.name` is set to:\n\n1. `manifest.name` if it is not `null`/`undefined`\n2. otherwise the extension directory name\n\nNo string-type enforcement is applied here.\n\n---\n\n## Materialization into capability items\n\nA valid parsed manifest creates one `Extension` capability item:\n\n```ts\n{\n\tname: manifest.name ?? <directory-name>,\n\tpath: <extension-directory>,\n\tmanifest: <parsed-json>,\n\tlevel: \"user\" | \"project\",\n\t_source: {\n\t\tprovider: \"gemini\",\n\t\tproviderName: \"Gemini CLI\" // attached by capability registry\n\t\tpath: <absolute-manifest-path>,\n\t\tlevel: \"user\" | \"project\"\n\t}\n}\n```\n\nNotes:\n\n- `_source.path` is normalized to an absolute path by `createSourceMeta()`.\n- Registry-level capability validation for `extensions` only checks presence of `name` and `path`.\n- Manifest internals (`mcpServers`, `tools`, `context`) are not validated during discovery.\n\n---\n\n## Error handling and warning semantics\n\n### Warned\n\n- Invalid JSON in a manifest file:\n - warning format: `Invalid JSON in <manifestPath>`\n\n### Not warned (silent skip)\n\n- `extensions` directory missing\n- child directory has no `gemini-extension.json`\n- unreadable manifest file\n- manifest JSON is syntactically valid but semantically odd/incomplete\n\nThis means partial validity is accepted: only syntactic JSON failure emits a warning.\n\n---\n\n## Precedence and deduplication with other sources\n\n`extensions` capability is aggregated across providers by the capability registry.\n\nCurrent providers for this capability:\n\n- `native` (`packages/coding-agent/src/discovery/builtin.ts`) priority `100`\n- `gemini` (`packages/coding-agent/src/discovery/gemini.ts`) priority `60`\n\nDedup key is `ext.name` (`extensionCapability.key = ext => ext.name`).\n\n### Cross-provider precedence\n\nHigher-priority provider wins on duplicate extension names.\n\n- If `native` and `gemini` both emit extension name `foo`, the native item is kept.\n- Lower-priority duplicate is retained only in `result.all` with `_shadowed = true`.\n\n### Intra-provider order effects\n\nBecause dedup is “first seen wins”, provider-local item order matters.\n\n- Gemini loader appends **user first**, then **project**.\n- Therefore, duplicate names between `~/.gemini/extensions` and `<cwd>/.gemini/extensions` keep the user entry and shadow the project entry.\n\nBy contrast, native provider builds config dir order differently (`project` then `user` in `getConfigDirs()`), so native intra-provider shadowing is the opposite direction.\n\n---\n\n## User vs project behavior summary\n\nFor Gemini manifests specifically:\n\n- Both user and project roots are scanned every load.\n- Project root is fixed to `<cwd>/.gemini/extensions` (no ancestor walk).\n- Duplicate names inside Gemini source resolve to user-first.\n- Duplicate names against higher-priority providers (notably native) lose by priority.\n\n---\n\n## Boundary: discovery metadata vs runtime extension loading\n\n`gemini-extension.json` discovery currently feeds capability metadata (`Extension` items). It does **not** directly load runnable TS/JS extension modules.\n\nRuntime module loading (`discoverAndLoadExtensions()` / `loadExtensions()`) uses `extension-modules` and explicit paths, and currently filters auto-discovered modules to provider `native` only.\n\nPractical implication:\n\n- Gemini manifest extensions are discoverable as capability records.\n- They are not, by themselves, executed as runtime extension modules by the extension loader pipeline.\n\nThis boundary is intentional in current implementation and explains why manifest discovery and executable module loading can diverge.\n",
|
|
17
|
-
"handoff-generation-pipeline.md": "# `/handoff` generation pipeline\n\nThis document describes how the coding-agent implements `/handoff` today: trigger path, generation prompt, completion capture, session switch, and context reinjection.\n\n## Scope\n\nCovers:\n\n- Interactive `/handoff` command dispatch\n- `AgentSession.handoff()` lifecycle and state transitions\n- How handoff output is captured from assistant output\n- How old/new sessions persist handoff data differently\n- UI behavior for success, cancel, and failure\n\nDoes not cover:\n\n- Generic tree navigation/branch internals\n- Non-handoff session commands (`/new`, `/fork`, `/resume`)\n\n## Implementation files\n\n- [`../src/modes/controllers/input-controller.ts`](../packages/coding-agent/src/modes/controllers/input-controller.ts)\n- [`../src/modes/controllers/command-controller.ts`](../packages/coding-agent/src/modes/controllers/command-controller.ts)\n- [`../src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts)\n- [`../src/session/session-manager.ts`](../packages/coding-agent/src/session/session-manager.ts)\n- [`../src/extensibility/slash-commands.ts`](../packages/coding-agent/src/extensibility/slash-commands.ts)\n\n## Trigger path\n\n1. `/handoff` is declared in builtin slash command metadata (`slash-commands.ts`) with optional inline hint: `[focus instructions]`.\n2. In interactive input handling (`InputController`), submit text matching `/handoff` or `/handoff ...` is intercepted before normal prompt submission.\n3. The editor is cleared and `handleHandoffCommand(customInstructions?)` is called.\n4. `CommandController.handleHandoffCommand` performs a preflight guard using current entries:\n - Counts `type === \"message\"` entries.\n - If `< 2`, it warns: `Nothing to hand off (no messages yet)` and returns.\n\nThe same minimum-content guard exists again inside `AgentSession.handoff()` and throws if violated. This duplicates safety at both UI and session layers.\n\n## End-to-end lifecycle\n\n### 1) Start handoff generation\n\n`AgentSession.handoff(customInstructions?)`:\n\n- Reads current branch entries (`sessionManager.getBranch()`)\n- Validates minimum message count (`>= 2`)\n- Creates `#handoffAbortController`\n- Renders the fixed prompt template `prompts/system/handoff-document.md` with optional `additionalFocus`\n- Appends `Additional focus: ...` if custom instructions are provided\n\nPrompt is sent as an agent-authored developer message via:\n\n```ts\nawait this.#promptAgentWithIdleRetry([\n {\n role: \"developer\",\n content: [{ type: \"text\", text: handoffPrompt }],\n attribution: \"agent\",\n timestamp: Date.now(),\n },\n]);\n```\n\nBecause handoff bypasses `prompt(...)`, normal slash/prompt-template expansion is not applied to this internal instruction payload.\n\n### 2) Capture completion\n\nBefore sending prompt, `handoff()` subscribes to session events and waits for `agent_end`.\n\nOn `agent_end`, it extracts handoff text from agent state by scanning backward for the most recent `assistant` message, then concatenating all `content` blocks where `type === \"text\"` with `\\n`.\n\nImportant extraction assumptions:\n\n- Only text blocks are used; non-text content is ignored.\n- It assumes the latest assistant message corresponds to handoff generation.\n- It does not parse markdown sections or validate format compliance.\n- If assistant output has no text blocks, handoff is treated as missing.\n\n### 3) Cancellation checks\n\nCancellation throws `Error(\"Handoff cancelled\")`; a completed generation with no extracted text returns `undefined`.\n\n- no captured handoff text → returns `undefined`\n- aborted handoff signal → throws `Error(\"Handoff cancelled\")`\n\nIt always clears `#handoffAbortController` in `finally`.\n\n### 4) New session creation\n\nIf text was captured and not aborted:\n\n1. Flush current session writer (`sessionManager.flush()`)\n2. Cancel async jobs (`#asyncJobManager?.cancelAll()`)\n3. Start a brand-new session (`sessionManager.newSession()`)\n4. Reset in-memory agent state (`agent.reset()`)\n5. Rebind `agent.sessionId` to new session id\n6. Clear queued context arrays (`#steeringMessages`, `#followUpMessages`, `#pendingNextTurnMessages`) and any scheduled hidden next-turn generation\n7. Reset todo reminder counter\n `newSession()` creates a fresh header and empty entry list (leaf reset to `null`). In the handoff path, no `parentSession` is passed.\n\n### 5) Handoff-context injection\n\nThe generated handoff document is wrapped and appended to the new session as a `custom_message` entry:\n\n```text\n<handoff-context>\n...handoff text...\n</handoff-context>\n\nThe above is a handoff document from a previous session. Use this context to continue the work seamlessly.\n```\n\nInsertion call:\n\n```ts\nthis.sessionManager.appendCustomMessageEntry(\"handoff\", handoffContent, true);\n```\n\nSemantics:\n\n- `customType`: `\"handoff\"`\n- `display`: `true` (visible in TUI rebuild)\n- Entry type: `custom_message` (participates in LLM context)\n\n### 6) Rebuild active agent context\n\nAfter injection:\n\n1. `buildDisplaySessionContext()` resolves message list for current leaf\n2. `agent.replaceMessages(sessionContext.messages)` makes the injected handoff message active context\n3. Todo phases are synchronized from the new branch\n4. Method returns `{ document: handoffText, savedPath? }`\n\nAt this point, the active LLM context in the new session contains the injected handoff message, not the old transcript.\n\n## Persistence model: old session vs new session\n\n### Old session\n\nDuring generation, normal message persistence remains active. The assistant handoff response is persisted as a regular `message` entry on `message_end`.\n\nResult: the original session contains the visible generated handoff as part of historical transcript.\n\n### New session\n\nAfter session reset, handoff is persisted as `custom_message` with `customType: \"handoff\"`.\n\n`buildSessionContext()` converts this entry into a runtime custom/user-context message via `createCustomMessage(...)`, so it is included in future prompts from the new session.\n\n## Controller/UI behavior\n\n`CommandController.handleHandoffCommand` behavior:\n\n- Calls `await session.handoff(customInstructions)`\n- If result is `undefined`: `showError(\"Handoff cancelled\")`\n- On success:\n - `rebuildChatFromMessages()` (loads new session context, including injected handoff)\n - invalidates status line and editor top border\n - reloads todos\n - appends success chat line: `New session started with handoff context`\n- On exception:\n - if message is `\"Handoff cancelled\"` or error name is `AbortError`: `showError(\"Handoff cancelled\")`\n - otherwise: `showError(\"Handoff failed: <message>\")`\n- Requests render at end\n\n## Cancellation semantics (current behavior)\n\n### Session-level cancellation primitive\n\n`AgentSession` exposes:\n\n- `abortHandoff()` → aborts `#handoffAbortController`\n- `isGeneratingHandoff` → true while controller exists\n\nWhen this abort path is used, the handoff completion waiter resolves as cancelled, `agent.abort()` is called, and `handoff()` throws `Error(\"Handoff cancelled\")`; command controller maps it to cancellation UI.\n\n### Interactive `/handoff` path\n\nThe editor does not install a dedicated Escape handler that calls `abortHandoff()` for `/handoff`, but `InputController` treats `session.isGeneratingHandoff` as a busy state.\n\nPractical impact:\n\n- There is session-level cancellation support, but no handoff-specific keybinding hook in the `/handoff` command path.\n- User interruption may still occur through broader agent abort paths, but that is not the same explicit cancellation channel used by `abortHandoff()`.\n\n## Aborted vs failed handoff\n\nCurrent UI classification:\n\n- **Aborted/cancelled**\n - `abortHandoff()` path triggers `\"Handoff cancelled\"`, or\n - thrown `AbortError`\n - UI shows `Handoff cancelled`\n\n- **Failed**\n - any other thrown error from `handoff()` / prompt pipeline (model/API validation errors, runtime exceptions, etc.)\n - UI shows `Handoff failed: ...`\n\nAdditional nuance: if generation completes but no text is extracted, `handoff()` returns `undefined` and controller currently reports **cancelled**, not **failed**.\n\n## Short-session and minimum-content guardrails\n\nTwo guards prevent low-signal handoffs:\n\n- UI layer (`handleHandoffCommand`): warns and returns early for `< 2` message entries\n- Session layer (`handoff()`): throws the same condition as an error\n\nThis avoids creating a new session with empty/near-empty handoff context.\n\n## State transition summary\n\nHigh-level state flow:\n\n1. Interactive slash command intercepted\n2. Preflight message-count guard\n3. `#handoffAbortController` created (`isGeneratingHandoff = true`)\n4. Internal developer handoff prompt submitted (visible in chat as normal assistant generation)\n5. On `agent_end`, last assistant text extracted\n6. If missing text → return `undefined`; if aborted → cancellation error path\n7. If present:\n - flush old session\n - cancel async jobs\n - create new empty session\n - reset runtime queues/counters\n - append `custom_message(handoff)`\n - optionally save an auto-triggered handoff document under the session artifacts directory when `compaction.handoffSaveToDisk` is enabled\n8. Controller rebuilds chat UI and announces success\n9. `#handoffAbortController` cleared (`isGeneratingHandoff = false`)\n\n## Known assumptions and limitations\n\n- Handoff extraction is heuristic: \"last assistant text blocks\"; no structural validation.\n- No hard check that generated markdown follows requested section format.\n- Missing extracted text is reported as cancellation in controller UX.\n- `/handoff` interactive flow currently lacks a dedicated Escape→`abortHandoff()` binding, though the session reports handoff as a busy state.\n- New session lineage metadata (`parentSession`) is not set by this path.\n- Auto-triggered handoffs can write a timestamped `handoff-*.md` artifact when `compaction.handoffSaveToDisk` is enabled; write failure is logged and does not fail the handoff.\n",
|
|
17
|
+
"handoff-generation-pipeline.md": "# `/handoff` generation pipeline\n\nThis document describes how the coding-agent implements `/handoff`: trigger path, oneshot generation, session switch, context reinjection, persistence, and UI behavior.\n\n## Scope\n\nCovers:\n\n- Interactive `/handoff` command dispatch\n- `AgentSession.handoff()` lifecycle and state transitions\n- `generateHandoff(...)` request shape\n- How old/new sessions persist handoff data differently\n- UI behavior for success, cancel, and failure\n\nDoes not cover:\n\n- Generic tree navigation/branch internals\n- Non-handoff session commands (`/new`, `/fork`, `/resume`)\n\n## Implementation files\n\n- [`../src/modes/controllers/input-controller.ts`](../packages/coding-agent/src/modes/controllers/input-controller.ts)\n- [`../src/modes/controllers/command-controller.ts`](../packages/coding-agent/src/modes/controllers/command-controller.ts)\n- [`../src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts)\n- [`packages/agent/src/compaction/compaction.ts`](../packages/agent/src/compaction/compaction.ts)\n- [`../src/session/session-manager.ts`](../packages/coding-agent/src/session/session-manager.ts)\n- [`../src/extensibility/slash-commands.ts`](../packages/coding-agent/src/extensibility/slash-commands.ts)\n\n## Trigger path\n\n1. `/handoff` is declared in builtin slash command metadata (`slash-commands.ts`) with optional inline hint: `[focus instructions]`.\n2. In interactive input handling (`InputController`), submit text matching `/handoff` or `/handoff ...` is intercepted before normal prompt submission.\n3. The editor is cleared and `handleHandoffCommand(customInstructions?)` is called.\n4. `CommandController.handleHandoffCommand` performs a preflight guard using current entries:\n - Counts `type === \"message\"` entries.\n - If `< 2`, it warns: `Nothing to hand off (no messages yet)` and returns.\n\nThe same minimum-content guard exists again inside `AgentSession.handoff()` and throws if violated. This duplicates safety at both UI and session layers.\n\n## End-to-end lifecycle\n\n### 1) Start handoff generation\n\n`AgentSession.handoff(customInstructions?)`:\n\n- Reads current branch entries (`sessionManager.getBranch()`).\n- Validates minimum message count (`>= 2`).\n- Creates `#handoffAbortController` and links any caller-provided abort signal to it.\n- Resolves the current model API key through `ModelRegistry`.\n- Calls `generateHandoff(...)` with:\n - live agent messages (`agent.state.messages`),\n - the current model and API key,\n - the base system prompt (`#baseSystemPrompt`),\n - the live tool array (`agent.state.tools`),\n - optional focus instructions,\n - coding-agent message conversion (`convertToLlm`),\n - provider metadata and `initiatorOverride: \"agent\"`.\n\n`generateHandoff(...)` lives in `packages/agent/src/compaction/compaction.ts` next to summarization. It renders `packages/agent/src/compaction/prompts/handoff-document.md` via `renderHandoffPrompt(...)` with optional `additionalFocus`.\n\n### 2) Generate and capture output\n\n`generateHandoff(...)` converts the existing `AgentMessage[]` history to real LLM `Message[]` history, then appends one trailing agent-attributed `user` message containing the rendered handoff prompt.\n\nThe request uses `completeSimple(...)` directly:\n\n```ts\nawait completeSimple(\n model,\n {\n systemPrompt,\n messages: requestMessages,\n tools,\n },\n {\n apiKey,\n signal,\n reasoning: Effort.High,\n toolChoice: \"none\",\n initiatorOverride,\n metadata,\n },\n);\n```\n\nImportant generation properties:\n\n- The request preserves the live provider cache prefix by reusing the same system prompt, tool definitions, and real message history shape as the active agent.\n- The handoff instruction is a trailing `user` message, not a developer message, so the cached prefix remains aligned with the prior turn.\n- `toolChoice: \"none\"` prevents intentional tool dispatch.\n- The returned assistant content is filtered to text blocks and joined with `\\n`; stray tool-call blocks are ignored if a provider does not honor `toolChoice: \"none\"`.\n- `stopReason === \"error\"` throws a generation error.\n\nNo agent-loop events are used for capture. The handoff path no longer waits for `agent_end` and no longer scans the latest assistant message.\n\n### 3) Cancellation checks\n\nCancellation throws `Error(\"Handoff cancelled\")`; a completed generation with no text returns `undefined`.\n\n- caller signal aborts `#handoffAbortController`\n- `completeSimple(...)` receives the abort signal\n- aborted handoff signal or provider `AbortError` is normalized to `Error(\"Handoff cancelled\")`\n- empty generated text returns `undefined`\n\n`AgentSession.handoff()` always clears `#handoffAbortController` in `finally`.\n\n### 4) New session creation\n\nIf text was generated and not aborted:\n\n1. Flush current session writer (`sessionManager.flush()`).\n2. Cancel session-owned async jobs.\n3. Start a brand-new session with `parentSession` pointing at the previous session file when one exists.\n4. Reset in-memory agent state (`agent.reset()`).\n5. Rebind `agent.sessionId` to the new session id.\n6. Rekey/reset hindsight state for the new session.\n7. Clear queued context arrays (`#steeringMessages`, `#followUpMessages`, `#pendingNextTurnMessages`) and any scheduled hidden next-turn generation.\n8. Reset todo reminder counter.\n\n### 5) Handoff-context injection\n\nThe generated handoff document is wrapped by coding-agent session glue and appended to the new session as a `custom_message` entry:\n\n```text\n<handoff-context>\n...handoff text...\n</handoff-context>\n\nThe above is a handoff document from a previous session. Use this context to continue the work seamlessly.\n```\n\nInsertion call:\n\n```ts\nthis.sessionManager.appendCustomMessageEntry(\"handoff\", handoffContent, true, undefined, \"agent\");\n```\n\nSemantics:\n\n- `customType`: `\"handoff\"`\n- `display`: `true` (visible in TUI rebuild)\n- attribution: `\"agent\"`\n- Entry type: `custom_message` (participates in LLM context)\n\n### 6) Rebuild active agent context\n\nAfter injection:\n\n1. `buildDisplaySessionContext()` resolves message list for current leaf.\n2. `agent.replaceMessages(sessionContext.messages)` makes the injected handoff message active context.\n3. Todo phases are synchronized from the new branch.\n4. Method returns `{ document: handoffText, savedPath? }`.\n\nAt this point, the active LLM context in the new session contains the injected handoff message, not the old transcript.\n\n## Persistence model: old session vs new session\n\n### Old session\n\nHandoff generation is a oneshot request, not a visible agent turn. The generated handoff text is not appended to the old session as an assistant message.\n\nResult: the original session keeps its prior transcript unchanged except for data already persisted before handoff began.\n\n### New session\n\nAfter session reset, handoff is persisted as `custom_message` with `customType: \"handoff\"`.\n\n`buildSessionContext()` converts this entry into a runtime custom/user-context message via `createCustomMessage(...)`, so it is included in future prompts from the new session.\n\nAuto-triggered handoffs can additionally write a timestamped `handoff-*.md` artifact under the session artifacts directory when `compaction.handoffSaveToDisk` is enabled. Manual `/handoff` does not write that artifact.\n\n## Controller/UI behavior\n\n`CommandController.handleHandoffCommand` behavior:\n\n- Shows a status loader: `Generating handoff… (esc to cancel)`.\n- Calls `await session.handoff(customInstructions)`.\n- If result is `undefined`: `showError(\"Handoff cancelled\")`.\n- On success:\n - `rebuildChatFromMessages()` (loads new session context, including injected handoff)\n - invalidates status line and editor top border\n - reloads todos\n - appends success chat line: `New session started with handoff context`\n- On exception:\n - if message is `\"Handoff cancelled\"` or error name is `AbortError`: `showError(\"Handoff cancelled\")`\n - otherwise: `showError(\"Handoff failed: <message>\")`\n- Stops the loader, restores the previous Escape handler, and requests render at end.\n\nManual `/handoff` no longer streams the generated document into chat. A cancellable loader remains visible while the oneshot request runs, and the chat is rebuilt after generation completes.\n\n## Cancellation semantics\n\n### Session-level cancellation primitive\n\n`AgentSession` exposes:\n\n- `abortHandoff()` → aborts `#handoffAbortController`\n- `isGeneratingHandoff` → true while controller exists\n\nWhen this abort path is used, the abort signal is passed to `completeSimple(...)`; `handoff()` normalizes the cancellation to `Error(\"Handoff cancelled\")`, and command controller maps it to cancellation UI.\n\n### Interactive `/handoff` path\n\nThe command controller installs a temporary Escape handler for `/handoff` while the loader is visible. Pressing Escape calls `session.abortHandoff()`, which aborts the `completeSimple(...)` request through `#handoffAbortController`.\n\n## Aborted vs failed handoff\n\nCurrent UI classification:\n\n- **Aborted/cancelled**\n - `abortHandoff()` path triggers `\"Handoff cancelled\"`, or\n - thrown `AbortError`\n - UI shows `Handoff cancelled`\n- **Failed**\n - any other thrown error from `handoff()` / `generateHandoff()` / provider request path\n - UI shows `Handoff failed: ...`\n\nAdditional nuance: if generation completes but no text is returned, `handoff()` returns `undefined` and controller currently reports **cancelled**, not **failed**.\n\n## Short-session and minimum-content guardrails\n\nTwo guards prevent low-signal handoffs:\n\n- UI layer (`handleHandoffCommand`): warns and returns early for `< 2` message entries\n- Session layer (`handoff()`): throws the same condition as an error\n\nThis avoids creating a new session with empty/near-empty handoff context.\n\n## State transition summary\n\nHigh-level state flow:\n\n1. Interactive slash command intercepted.\n2. Preflight message-count guard.\n3. `#handoffAbortController` created (`isGeneratingHandoff = true`).\n4. `generateHandoff(...)` issues one `completeSimple(...)` request with live system prompt, tools, message history, and trailing handoff prompt.\n5. Assistant response text blocks are joined; tool-call blocks are discarded.\n6. If missing text → return `undefined`; if aborted → cancellation error path.\n7. If present:\n - flush old session\n - cancel async jobs\n - create new empty session with previous session as parent\n - reset runtime queues/counters\n - append `custom_message(handoff)`\n - optionally save an auto-triggered handoff document under the session artifacts directory when `compaction.handoffSaveToDisk` is enabled\n8. Controller rebuilds chat UI and announces success.\n9. `#handoffAbortController` cleared (`isGeneratingHandoff = false`).\n\n## Known assumptions and limitations\n\n- No structural validation checks that generated markdown follows the requested section format.\n- Missing generated text is reported as cancellation in controller UX.\n- Manual handoff has no streaming visibility; a cancellable loader is shown until the UI updates after generation completes.\n- Auto-triggered handoffs can write a timestamped `handoff-*.md` artifact when `compaction.handoffSaveToDisk` is enabled; write failure is logged and does not fail the handoff.\n",
|
|
18
18
|
"hooks.md": "# Hooks\n\nThis document describes the **current hook subsystem code** in `src/extensibility/hooks/*`.\n\n## Current status in runtime\n\nThe hook package (`src/extensibility/hooks/`) is still exported and usable as an API surface, but the default CLI runtime now initializes the **extension runner** path. In current startup flow:\n\n- `--hook` is treated as an alias for `--extension` (CLI paths are merged into `additionalExtensionPaths`)\n- tools are wrapped by `ExtensionToolWrapper`, not `HookToolWrapper`\n- context transforms and lifecycle emissions go through `ExtensionRunner`\n\nSo this file documents the hook subsystem implementation itself (types/loader/runner/wrapper), including legacy behavior and constraints.\n\n## Key files\n\n- `src/extensibility/hooks/types.ts` — hook context, event types, and result contracts\n- `src/extensibility/hooks/loader.ts` — module loading and hook discovery bridge\n- `src/extensibility/hooks/runner.ts` — event dispatch, command lookup, error signaling\n- `src/extensibility/hooks/tool-wrapper.ts` — pre/post tool interception wrapper\n- `src/extensibility/hooks/index.ts` — exports/re-exports\n\n## What a hook module is\n\nA hook module must default-export a factory:\n\n```ts\nimport type { HookAPI } from \"@oh-my-pi/pi-coding-agent/extensibility/hooks\";\n\nexport default function hook(pi: HookAPI): void {\n pi.on(\"tool_call\", async (event, ctx) => {\n if (\n event.toolName === \"bash\" &&\n String(event.input.command ?? \"\").includes(\"rm -rf\")\n ) {\n return { block: true, reason: \"blocked by policy\" };\n }\n });\n}\n```\n\nThe factory can:\n\n- register event handlers with `pi.on(...)`\n- send persistent custom messages with `pi.sendMessage(...)`\n- persist non-LLM state with `pi.appendEntry(...)`\n- register slash commands via `pi.registerCommand(...)`\n- register custom message renderers via `pi.registerMessageRenderer(...)`\n- run shell commands via `pi.exec(...)`\n\n## Discovery and loading\n\n`discoverAndLoadHooks(configuredPaths, cwd)` does:\n\n1. Load discovered hooks from capability registry (`loadCapability(\"hooks\")`)\n2. Append explicitly configured paths (deduped by absolute path)\n3. Call `loadHooks(allPaths, cwd)`\n\n`loadHooks` then imports each path and expects a `default` function.\n\n### Path resolution\n\n`loader.ts` resolves hook paths as:\n\n- absolute path: used as-is\n- `~` path: expanded\n- relative path: resolved against `cwd`\n\n### Important legacy mismatch\n\nDiscovery providers for `hookCapability` still model pre/post shell-style hook files (for example `.claude/hooks/pre/*`, `.omp/.../hooks/pre/*`).\n\nThe hook loader here uses dynamic module import and requires a default JS/TS hook factory. If a discovered hook path is not importable as a module, load fails and is reported in `LoadHooksResult.errors`.\n\n## Event surfaces\n\nHook events are strongly typed in `types.ts`.\n\n### Session events\n\n- `session_start`\n- `session_before_switch` → can return `{ cancel?: boolean }`\n- `session_switch`\n- `session_before_branch` → can return `{ cancel?: boolean; skipConversationRestore?: boolean }`\n- `session_branch`\n- `session_before_compact` → can return `{ cancel?: boolean; compaction?: CompactionResult }`\n- `session.compacting` → can return `{ context?: string[]; prompt?: string; preserveData?: Record<string, unknown> }`\n- `session_compact`\n- `session_before_tree` → can return `{ cancel?: boolean; summary?: { summary: string; details?: unknown } }`\n- `session_tree`\n- `session_shutdown`\n\n### Agent/context events\n\n- `context` → can return `{ messages?: Message[] }`\n- `before_agent_start` → can return `{ message?: { customType; content; display; details } }`\n- `agent_start`\n- `agent_end`\n- `turn_start`\n- `turn_end`\n- `auto_compaction_start`\n- `auto_compaction_end`\n- `auto_retry_start`\n- `auto_retry_end`\n- `ttsr_triggered`\n- `todo_reminder`\n\n### Tool events (pre/post model)\n\n- `tool_call` (pre-execution) → can return `{ block?: boolean; reason?: string }`\n- `tool_result` (post-execution) → can return `{ content?; details?; isError? }`\n\nThis is the hook subsystem’s core pre/post interception model.\n\n```text\nHook tool interception flow\n\ntool_call handlers\n │\n ├─ any { block: true }? ── yes ──> throw (tool blocked)\n │\n └─ no\n │\n ▼\n execute underlying tool\n │\n ├─ success ──> tool_result handlers can override { content, details }\n │\n └─ error ──> emit tool_result(isError=true) then rethrow original error\n```\n\n## Execution model and mutation semantics\n\n### 1) Pre-execution: `tool_call`\n\n`HookToolWrapper.execute()` emits `tool_call` before tool execution.\n\n- if any handler returns `{ block: true }`, execution stops\n- if handler throws, wrapper fails closed and blocks execution\n- returned `reason` becomes the thrown error text\n\n### 2) Tool execution\n\nUnderlying tool executes normally if not blocked.\n\n### 3) Post-execution: `tool_result`\n\nAfter success, wrapper emits `tool_result` with:\n\n- `toolName`, `toolCallId`, `input`\n- `content`\n- `details`\n- `isError: false`\n\nIf handler returns overrides:\n\n- `content` can replace result content\n- `details` can replace result details\n\nOn tool failure, wrapper emits `tool_result` with `isError: true` and error text content, then rethrows original error.\n\n### What hooks can mutate\n\n- LLM context for a single call via `context` (`messages` replacement chain)\n- tool output content/details on successful tool calls (`tool_result` path)\n- pre-agent injected message via `before_agent_start`\n- cancellation/custom compaction/tree behavior via `session_before_*` and `session.compacting`\n\n### What hooks cannot mutate in this implementation\n\n- raw tool input parameters in-place (only block/allow on `tool_call`)\n- execution continuation after thrown tool errors (error path rethrows)\n- final success/error status in wrapper behavior (returned `isError` is typed but not applied by `HookToolWrapper`)\n\n## Ordering and conflict behavior\n\n### Discovery-level ordering\n\nCapability providers are priority-sorted (higher first). Dedupe is by capability key, first wins.\n\nFor `hooks`, capability key is `${type}:${tool}:${name}`. Shadowed duplicates from lower-priority providers are marked and excluded from effective discovered list.\n\n### Load order\n\n`discoverAndLoadHooks` builds a flat `allPaths` list, deduped by resolved absolute path, then `loadHooks` iterates in that order.\nFile order within each discovered directory depends on `readdir` output; the hook loader does not perform an additional sort.\n\n### Runtime handler order\n\nInside `HookRunner`, order is deterministic by registration sequence:\n\n1. hooks array order\n2. handler registration order per hook/event\n\nConflict behavior by event type:\n\n- `tool_call`: last returned result wins unless a handler blocks; first block short-circuits\n- `tool_result`: last returned override wins (no short-circuit)\n- `context`: chained; each handler receives prior handler’s message output\n- `before_agent_start`: first returned message is kept; later messages ignored\n- `session_before_*`: latest returned result is tracked; `cancel: true` short-circuits immediately\n- `session.compacting`: latest returned result wins\n\nCommand/renderer conflicts:\n\n- `getCommand(name)` returns first match across hooks (first loaded wins)\n- `getMessageRenderer(customType)` returns first match\n- `getRegisteredCommands()` returns all commands (no dedupe)\n\n## UI interactions (`HookContext.ui`)\n\n`HookUIContext` includes:\n\n- `select`, `confirm`, `input`, `editor`\n- `notify`\n- `setStatus`\n- `custom`\n- `setEditorText`, `getEditorText`\n- `theme` getter\n\n`ctx.hasUI` indicates whether interactive UI is available.\n\nWhen running with no UI, the default no-op context behavior is:\n\n- `select/input/editor` return `undefined`\n- `confirm` returns `false`\n- `notify`, `setStatus`, `setEditorText` are no-ops\n- `getEditorText` returns `\"\"`\n\n### Status line behavior\n\nHook status text set via `ctx.ui.setStatus(key, text)` is:\n\n- stored per key\n- sorted by key name\n- sanitized (`\\r`, `\\n`, `\\t` → spaces; repeated spaces collapsed)\n- joined and width-truncated for display\n\n## Error propagation and fallback\n\n### Load-time\n\n- invalid module or missing default export → captured in `LoadHooksResult.errors`\n- loading continues for other hooks\n\n### Event-time\n\n`HookRunner.emit(...)` catches handler errors for most events and emits `HookError` to listeners (`hookPath`, `event`, `error`), then continues.\n\n`emitToolCall(...)` is stricter: handler errors are not swallowed there; they propagate to caller. In `HookToolWrapper`, this blocks the tool call (fail-safe).\n\n## Realistic API examples\n\n### Block unsafe bash commands\n\n```ts\nimport type { HookAPI } from \"@oh-my-pi/pi-coding-agent/extensibility/hooks\";\n\nexport default function (pi: HookAPI): void {\n pi.on(\"tool_call\", async (event, ctx) => {\n if (event.toolName !== \"bash\") return;\n const cmd = String(event.input.command ?? \"\");\n if (!cmd.includes(\"rm -rf\")) return;\n\n if (!ctx.hasUI) return { block: true, reason: \"rm -rf blocked (no UI)\" };\n const ok = await ctx.ui.confirm(\"Dangerous command\", `Allow: ${cmd}`);\n if (!ok) return { block: true, reason: \"user denied command\" };\n });\n}\n```\n\n### Redact tool output on post-execution\n\n```ts\nimport type { HookAPI } from \"@oh-my-pi/pi-coding-agent/extensibility/hooks\";\n\nexport default function (pi: HookAPI): void {\n pi.on(\"tool_result\", async (event) => {\n if (event.toolName !== \"read\" || event.isError) return;\n\n const redacted = event.content.map((chunk) => {\n if (chunk.type !== \"text\") return chunk;\n return {\n ...chunk,\n text: chunk.text.replaceAll(/API_KEY=\\S+/g, \"API_KEY=[REDACTED]\"),\n };\n });\n\n return { content: redacted };\n });\n}\n```\n\n### Modify model context per LLM call\n\n```ts\nimport type { HookAPI } from \"@oh-my-pi/pi-coding-agent/extensibility/hooks\";\n\nexport default function (pi: HookAPI): void {\n pi.on(\"context\", async (event) => {\n const filtered = event.messages.filter(\n (msg) => !(msg.role === \"custom\" && msg.customType === \"debug-only\"),\n );\n return { messages: filtered };\n });\n}\n```\n\n### Register slash command with command-safe context methods\n\n```ts\nimport type { HookAPI } from \"@oh-my-pi/pi-coding-agent/extensibility/hooks\";\n\nexport default function (pi: HookAPI): void {\n pi.registerCommand(\"handoff\", {\n description: \"Create a new session with setup message\",\n handler: async (_args, ctx) => {\n await ctx.waitForIdle();\n await ctx.newSession({\n parentSession: ctx.sessionManager.getSessionFile(),\n setup: async (sm) => {\n sm.appendMessage({\n role: \"user\",\n content: [\n { type: \"text\", text: \"Continue from prior session summary.\" },\n ],\n timestamp: Date.now(),\n });\n },\n });\n },\n });\n}\n```\n\n## Export surface\n\n`src/extensibility/hooks/index.ts` and the package subpath `@oh-my-pi/pi-coding-agent/extensibility/hooks` export:\n\n- loading APIs (`discoverAndLoadHooks`, `loadHooks`)\n- runner and wrapper (`HookRunner`, `HookToolWrapper`)\n- all hook types\n- `execCommand` re-export\n\nThe package root (`@oh-my-pi/pi-coding-agent`) does not re-export `HookAPI`; import legacy hook types from the hooks subpath.\n",
|
|
19
19
|
"marketplace.md": "# Marketplace plugin system\n\nThe marketplace system lets you discover, install, and manage plugins from Git-hosted catalogs. It is compatible with the Claude Code plugin registry format.\n\n## Quick start\n\n```\n/marketplace add anthropics/claude-plugins-official\n/marketplace install wordpress.com@claude-plugins-official\n```\n\nOr just type `/marketplace` with no arguments to open the interactive plugin browser.\n\n## Concepts\n\nA **marketplace** is a Git repository (or local directory) containing a catalog file at `.claude-plugin/marketplace.json`. The catalog lists available plugins with their sources, descriptions, and metadata.\n\nA **plugin** is a directory containing skills, commands, hooks, MCP servers, or LSP servers. Plugins are identified by `name@marketplace` (e.g. `code-review@claude-plugins-official`).\n\n**Scopes**: plugins can be installed at two scopes:\n\n- **user** (default) -- available in all projects, stored in `~/.omp/plugins/installed_plugins.json`\n- **project** -- available only in the current project, stored in `.omp/plugins/installed_plugins.json`\n\nProject-scoped installs shadow user-scoped installs of the same plugin.\n\n## Commands\n\n### Interactive mode\n\n| Command | Effect |\n| -------------- | ----------------------------------------- |\n| `/marketplace` | Open interactive plugin browser (install) |\n\n### Marketplace management\n\n| Command | Effect |\n| ---------------------------- | -------------------------------------------- |\n| `/marketplace add <source>` | Add a marketplace source |\n| `/marketplace remove <name>` | Remove a marketplace |\n| `/marketplace update [name]` | Re-fetch catalog(s); omit name to update all |\n| `/marketplace list` | List configured marketplaces |\n\n### Plugin operations\n\n| Command | Effect |\n| ------------------------------------------------------------------------- | ---------------------------------- |\n| `/marketplace discover [marketplace]` | Browse available plugins |\n| `/marketplace install [--force] [--scope user\\|project] name@marketplace` | Install a plugin |\n| `/marketplace uninstall [--scope user\\|project] name@marketplace` | Uninstall a plugin |\n| `/marketplace installed` | List installed marketplace plugins |\n| `/marketplace upgrade [--scope user\\|project] [name@marketplace]` | Upgrade one or all plugins |\n\n### CLI equivalents\n\nThe same operations are available from the command line:\n\n```\nomp plugin marketplace add <source>\nomp plugin marketplace remove <name>\nomp plugin marketplace update [name]\nomp plugin marketplace list\nomp plugin discover [marketplace]\nomp plugin install [--force] [--scope user|project] name@marketplace\nomp plugin uninstall [--scope user|project] name@marketplace\nomp plugin upgrade [--scope user|project] [name@marketplace]\n```\n\n## Marketplace sources\n\nWhen you run `/marketplace add <source>`, the system classifies the source:\n\n| Source format | Type | Example |\n| ------------------------------- | ------------------ | -------------------------------------- |\n| `owner/repo` | GitHub shorthand | `anthropics/claude-plugins-official` |\n| `https://...*.json` | Direct catalog URL | `https://example.com/marketplace.json` |\n| `https://...*.git` or `git@...` | Git repository | `https://github.com/org/repo.git` |\n| `./path` or `~/path` or `/path` | Local directory | `./my-marketplace` |\n\nThe system clones the repository (or reads the local directory), locates `.claude-plugin/marketplace.json`, validates it, and caches the catalog locally.\n\n## Catalog format (marketplace.json)\n\nA marketplace catalog lives at `.claude-plugin/marketplace.json` in the repository root:\n\n```json\n{\n \"$schema\": \"https://anthropic.com/claude-code/marketplace.schema.json\",\n \"name\": \"my-marketplace\",\n \"owner\": {\n \"name\": \"Your Name\",\n \"email\": \"you@example.com\"\n },\n \"description\": \"A collection of plugins\",\n \"plugins\": [\n {\n \"name\": \"my-plugin\",\n \"description\": \"What this plugin does\",\n \"source\": \"./plugins/my-plugin\",\n \"category\": \"development\",\n \"homepage\": \"https://github.com/you/my-plugin\"\n }\n ]\n}\n```\n\n### Required fields\n\n| Field | Description |\n| ------------ | ---------------------------------------------------------------------------------------------------------------- |\n| `name` | Marketplace name. Lowercase alphanumeric, hyphens, and dots. Must start and end with alphanumeric. Max 64 chars. |\n| `owner.name` | Marketplace owner name |\n| `plugins` | Array of plugin entries |\n\n### Plugin entry fields\n\n| Field | Required | Description |\n| ------------- | -------- | ---------------------------------------------------------------- |\n| `name` | yes | Plugin name (same rules as marketplace name) |\n| `source` | yes | Where to find the plugin (see below) |\n| `description` | no | Short description |\n| `version` | no | Version string |\n| `author` | no | `{ name, email? }` |\n| `homepage` | no | URL |\n| `category` | no | Category string (e.g. `development`, `productivity`, `security`) |\n| `tags` | no | Array of string tags |\n| `strict` | no | Boolean |\n| `commands` | no | Slash commands provided |\n| `agents` | no | Agents provided |\n| `hooks` | no | Hook definitions |\n| `mcpServers` | no | MCP server definitions |\n| `lspServers` | no | LSP server definitions |\n\n### Plugin source formats\n\nThe `source` field supports several formats:\n\n**Relative path** (within the marketplace repo):\n\n```json\n\"source\": \"./plugins/my-plugin\"\n```\n\n**Git repository URL**:\n\n```json\n\"source\": {\n \"source\": \"url\",\n \"url\": \"https://github.com/org/repo.git\",\n \"sha\": \"abc123...\"\n}\n```\n\n**GitHub shorthand**:\n\n```json\n\"source\": {\n \"source\": \"github\",\n \"repo\": \"org/repo\",\n \"ref\": \"main\",\n \"sha\": \"abc123...\"\n}\n```\n\n**Git subdirectory** (monorepo):\n\n```json\n\"source\": {\n \"source\": \"git-subdir\",\n \"url\": \"https://github.com/org/monorepo.git\",\n \"path\": \"plugins/my-plugin\",\n \"ref\": \"main\",\n \"sha\": \"abc123...\"\n}\n```\n\n**npm package**:\n\n```json\n\"source\": {\n \"source\": \"npm\",\n \"package\": \"@scope/my-plugin\",\n \"version\": \"1.0.0\"\n}\n```\n\n## On-disk layout\n\n```\n~/.omp/\n marketplaces.json # Registry of added marketplaces\n plugins/\n installed_plugins.json # User-scoped installed plugins\n cache/\n marketplaces/ # Cached marketplace catalogs\n plugins/ # Cached plugin directories\n\n<project>/.omp/\n plugins/\n installed_plugins.json # Project-scoped installed plugins\n```\n\n## Naming rules\n\nMarketplace and plugin names must:\n\n- Start and end with a lowercase letter or digit\n- Contain only lowercase letters, digits, hyphens, and dots\n- Be at most 64 characters\n\nPlugin IDs (`name@marketplace`) must be at most 128 characters total.\n\nValid examples: `my-plugin`, `code-review`, `wordpress.com`, `ai-firstify`\nInvalid examples: `-bad`, `bad-`, `.bad`, `Bad`, `under_score`\n",
|
|
20
20
|
"mcp-config.md": "# MCP configuration in OMP\n\nThis guide explains how to add, edit, and validate MCP servers for the OMP coding agent.\n\nSource of truth in code:\n\n- Runtime config types: `packages/coding-agent/src/mcp/types.ts`\n- Config writer: `packages/coding-agent/src/mcp/config-writer.ts`\n- Loader + validation: `packages/coding-agent/src/mcp/config.ts`\n- Standalone `mcp.json` discovery: `packages/coding-agent/src/discovery/mcp-json.ts`\n- Schema: `packages/coding-agent/src/config/mcp-schema.json`\n\n## Preferred config locations\n\nOMP can discover MCP servers from multiple tools (`.claude/`, `.cursor/`, `.vscode/`, `opencode.json`, and more), but for OMP-native configuration you should usually use one of these files:\n\n- Project: `.omp/mcp.json`\n- User: `~/.omp/agent/mcp.json`\n\nOMP also accepts fallback standalone files in the project root:\n\n- `mcp.json`\n- `.mcp.json`\n\nUse `.omp/mcp.json` or `~/.omp/agent/mcp.json` when you want OMP to own the configuration. Use root `mcp.json` / `.mcp.json` only when you want a portable fallback file that other MCP clients may also read.\n\n## Add a schema reference\n\nAdd this line at the top of the file for editor autocomplete and validation:\n\n```json\n{\n \"$schema\": \"https://raw.githubusercontent.com/can1357/oh-my-pi/main/packages/coding-agent/src/config/mcp-schema.json\",\n \"mcpServers\": {}\n}\n```\n\nOMP now writes this automatically when `/mcp add`, `/mcp enable`, `/mcp disable`, `/mcp reauth`, or other config-writing flows create or update an OMP-managed MCP file.\n\n## File shape\n\nOMP supports this top-level structure:\n\n```json\n{\n \"$schema\": \"https://raw.githubusercontent.com/can1357/oh-my-pi/main/packages/coding-agent/src/config/mcp-schema.json\",\n \"mcpServers\": {\n \"server-name\": {\n \"type\": \"stdio\",\n \"command\": \"npx\",\n \"args\": [\"-y\", \"some-mcp-server\"]\n }\n },\n \"disabledServers\": [\"server-name\"]\n}\n```\n\nTop-level keys:\n\n- `$schema` — optional JSON Schema URL for tooling\n- `mcpServers` — map of server name to server config\n- `disabledServers` — user-level denylist used to turn off discovered servers by name; runtime loading reads this list from `~/.omp/agent/mcp.json`\n\nServer names must match `^[a-zA-Z0-9_.-]{1,100}$`.\n\n## Supported server fields\n\nShared fields for every transport:\n\n- `enabled?: boolean` — skip this server when `false`\n- `timeout?: number` — connection timeout in milliseconds\n- `auth?: { ... }` — auth metadata used by OMP for OAuth/API-key flows\n- `oauth?: { ... }` — explicit OAuth client settings used during auth/reauth\n\n### `stdio` transport\n\n`stdio` is the default when `type` is omitted.\n\nRequired:\n\n- `command: string`\n\nOptional:\n\n- `type?: \"stdio\"`\n- `args?: string[]`\n- `env?: Record<string, string>`\n- `cwd?: string`\n\nExample:\n\n```json\n{\n \"$schema\": \"https://raw.githubusercontent.com/can1357/oh-my-pi/main/packages/coding-agent/src/config/mcp-schema.json\",\n \"mcpServers\": {\n \"filesystem\": {\n \"command\": \"npx\",\n \"args\": [\n \"-y\",\n \"@modelcontextprotocol/server-filesystem\",\n \"/Users/alice/projects\",\n \"/Users/alice/Documents\"\n ]\n }\n }\n}\n```\n\nThis follows the official Filesystem MCP server package (`@modelcontextprotocol/server-filesystem`).\n\n### `http` transport\n\nRequired:\n\n- `type: \"http\"`\n- `url: string`\n\nOptional:\n\n- `headers?: Record<string, string>`\n\nExample:\n\n```json\n{\n \"$schema\": \"https://raw.githubusercontent.com/can1357/oh-my-pi/main/packages/coding-agent/src/config/mcp-schema.json\",\n \"mcpServers\": {\n \"github\": {\n \"type\": \"http\",\n \"url\": \"https://api.githubcopilot.com/mcp/\"\n }\n }\n}\n```\n\nThis matches GitHub's hosted GitHub MCP server endpoint.\n\n### `sse` transport\n\nRequired:\n\n- `type: \"sse\"`\n- `url: string`\n\nOptional:\n\n- `headers?: Record<string, string>`\n\nExample:\n\n```json\n{\n \"$schema\": \"https://raw.githubusercontent.com/can1357/oh-my-pi/main/packages/coding-agent/src/config/mcp-schema.json\",\n \"mcpServers\": {\n \"legacy-remote\": {\n \"type\": \"sse\",\n \"url\": \"https://example.com/mcp/sse\"\n }\n }\n}\n```\n\n`sse` is still supported for compatibility, but the MCP spec now prefers Streamable HTTP (`type: \"http\"`) for new servers.\n\n## Auth fields\n\nOMP understands two auth-related objects.\n\n### `auth`\n\n```json\n{\n \"type\": \"oauth\" | \"apikey\",\n \"credentialId\": \"optional-stored-credential-id\",\n \"tokenUrl\": \"optional-token-endpoint\",\n \"clientId\": \"optional-client-id\",\n \"clientSecret\": \"optional-client-secret\"\n}\n```\n\nUse this when OMP should remember how to rehydrate credentials for a server.\n\n### `oauth`\n\n```json\n{\n \"clientId\": \"...\",\n \"clientSecret\": \"...\",\n \"redirectUri\": \"...\",\n \"callbackPort\": 3334,\n \"callbackPath\": \"/oauth/callback\"\n}\n```\n\nUse this when the MCP server requires explicit OAuth client settings.\n\nSlack is the clearest current example. Slack's MCP server is hosted at `https://mcp.slack.com/mcp`, uses Streamable HTTP, and requires confidential OAuth with your Slack app's client credentials.\n\nExample:\n\n```json\n{\n \"$schema\": \"https://raw.githubusercontent.com/can1357/oh-my-pi/main/packages/coding-agent/src/config/mcp-schema.json\",\n \"mcpServers\": {\n \"slack\": {\n \"type\": \"http\",\n \"url\": \"https://mcp.slack.com/mcp\",\n \"oauth\": {\n \"clientId\": \"YOUR_SLACK_CLIENT_ID\",\n \"clientSecret\": \"YOUR_SLACK_CLIENT_SECRET\"\n },\n \"auth\": {\n \"type\": \"oauth\",\n \"tokenUrl\": \"https://slack.com/api/oauth.v2.user.access\",\n \"clientId\": \"YOUR_SLACK_CLIENT_ID\",\n \"clientSecret\": \"YOUR_SLACK_CLIENT_SECRET\"\n }\n }\n }\n}\n```\n\nRelevant Slack endpoints from Slack's docs:\n\n- MCP endpoint: `https://mcp.slack.com/mcp`\n- Authorization endpoint: `https://slack.com/oauth/v2_user/authorize`\n- Token endpoint: `https://slack.com/api/oauth.v2.user.access`\n\n## Common copy-paste examples\n\n### Filesystem server via stdio\n\n```json\n{\n \"$schema\": \"https://raw.githubusercontent.com/can1357/oh-my-pi/main/packages/coding-agent/src/config/mcp-schema.json\",\n \"mcpServers\": {\n \"filesystem\": {\n \"command\": \"npx\",\n \"args\": [\n \"-y\",\n \"@modelcontextprotocol/server-filesystem\",\n \"/absolute/path/one\",\n \"/absolute/path/two\"\n ]\n }\n }\n}\n```\n\n### GitHub hosted server via HTTP\n\n```json\n{\n \"$schema\": \"https://raw.githubusercontent.com/can1357/oh-my-pi/main/packages/coding-agent/src/config/mcp-schema.json\",\n \"mcpServers\": {\n \"github\": {\n \"type\": \"http\",\n \"url\": \"https://api.githubcopilot.com/mcp/\"\n }\n }\n}\n```\n\n### GitHub local server via Docker\n\n```json\n{\n \"$schema\": \"https://raw.githubusercontent.com/can1357/oh-my-pi/main/packages/coding-agent/src/config/mcp-schema.json\",\n \"mcpServers\": {\n \"github\": {\n \"command\": \"docker\",\n \"args\": [\n \"run\",\n \"-i\",\n \"--rm\",\n \"-e\",\n \"GITHUB_PERSONAL_ACCESS_TOKEN\",\n \"ghcr.io/github/github-mcp-server\"\n ],\n \"env\": {\n \"GITHUB_PERSONAL_ACCESS_TOKEN\": \"GITHUB_PERSONAL_ACCESS_TOKEN\"\n }\n }\n }\n}\n```\n\nThis matches GitHub's official local Docker image `ghcr.io/github/github-mcp-server`.\n\n### Slack hosted server via OAuth\n\n```json\n{\n \"$schema\": \"https://raw.githubusercontent.com/can1357/oh-my-pi/main/packages/coding-agent/src/config/mcp-schema.json\",\n \"mcpServers\": {\n \"slack\": {\n \"type\": \"http\",\n \"url\": \"https://mcp.slack.com/mcp\",\n \"oauth\": {\n \"clientId\": \"YOUR_SLACK_CLIENT_ID\",\n \"clientSecret\": \"YOUR_SLACK_CLIENT_SECRET\"\n },\n \"auth\": {\n \"type\": \"oauth\",\n \"tokenUrl\": \"https://slack.com/api/oauth.v2.user.access\",\n \"clientId\": \"YOUR_SLACK_CLIENT_ID\",\n \"clientSecret\": \"YOUR_SLACK_CLIENT_SECRET\"\n }\n }\n }\n}\n```\n\n## Secrets and variable resolution\n\nThis is the part that usually trips people up.\n\n### In `.omp/mcp.json` and `~/.omp/agent/mcp.json`\n\nBefore OMP launches a stdio server or makes an HTTP/SSE request, it resolves stdio `env` values and HTTP/SSE `headers` values like this:\n\n1. If a value starts with `!`, OMP runs the rest as a shell command with a 10s timeout and uses trimmed stdout.\n2. If the command fails, times out, or prints only whitespace, that `env`/`headers` entry is omitted.\n3. Otherwise OMP checks whether the value names an environment variable.\n4. If that environment variable is set to a non-empty value, OMP uses the environment value; otherwise it uses the string literally.\n\nExamples:\n\n```json\n{\n \"env\": {\n \"GITHUB_PERSONAL_ACCESS_TOKEN\": \"GITHUB_PERSONAL_ACCESS_TOKEN\"\n },\n \"headers\": {\n \"X-MCP-Insiders\": \"true\"\n }\n}\n```\n\nThat means this is valid and convenient for local secrets:\n\n- `\"GITHUB_PERSONAL_ACCESS_TOKEN\": \"GITHUB_PERSONAL_ACCESS_TOKEN\"` → copy from the current shell environment\n- `\"Authorization\": \"Bearer hardcoded-token\"` → use the literal value\n- `\"Authorization\": \"!printf 'Bearer %s' \\\"$GITHUB_TOKEN\\\"\"` → build the header from a command\n\n### In root `mcp.json` and `.mcp.json`\n\nThe standalone fallback loader also expands `${VAR}` and `${VAR:-default}` inside strings during discovery for `command`, `args`, `env`, `cwd`, `url`, `headers`, `auth`, and `oauth`.\n\nExample:\n\n```json\n{\n \"mcpServers\": {\n \"github\": {\n \"type\": \"http\",\n \"url\": \"https://api.githubcopilot.com/mcp/\",\n \"headers\": {\n \"Authorization\": \"Bearer ${GITHUB_TOKEN}\"\n }\n }\n }\n}\n```\n\nIf you want the least surprising OMP behavior, prefer `.omp/mcp.json` or `~/.omp/agent/mcp.json` and use explicit env/header values.\n\n## `disabledServers`\n\n`disabledServers` is read from the user config file (`~/.omp/agent/mcp.json`) when a server is discovered from any source and you want OMP to ignore it without editing that other tool's config.\n\nExample:\n\n```json\n{\n \"$schema\": \"https://raw.githubusercontent.com/can1357/oh-my-pi/main/packages/coding-agent/src/config/mcp-schema.json\",\n \"disabledServers\": [\"github\", \"slack\"]\n}\n```\n\n## `/mcp add` vs editing JSON directly\n\nUse `/mcp add` when you want guided setup.\n\nUse direct JSON editing when:\n\n- you need a transport or auth option the wizard does not prompt for yet\n- you want to paste a server definition from another MCP client\n- you want schema-backed validation in your editor\n\nAfter editing, use:\n\n- `/mcp reload` to rediscover and reconnect servers in the current session\n- `/mcp list` to see which config file a server came from\n- `/mcp test <name>` to test a single server\n- `/mcp reconnect <name>` to reconnect one server without rediscovering all configs\n- `/mcp resources`, `/mcp prompts`, and `/mcp notifications` to inspect non-tool MCP capabilities\n\n## Validation rules OMP enforces\n\nFrom `validateServerConfig()` in `packages/coding-agent/src/mcp/config.ts`:\n\n- `stdio` requires `command`\n- `http` and `sse` require `url`\n- a server cannot set both `command` and `url`\n- unknown `type` values are rejected\n\nPractical implications:\n\n- Omitting `type` means `stdio`\n- If you paste a remote server config and forget `\"type\": \"http\"`, OMP will treat it as `stdio` and complain that `command` is missing\n- `sse` remains valid for compatibility, but new hosted servers should usually be configured as `http`\n\n## Discovery and precedence\n\nOMP does not merge duplicate server definitions across files. Discovery providers are prioritized, and the higher-priority definition wins. Separately, `disabledServers` from `~/.omp/agent/mcp.json` can suppress a discovered server by name.\n\nIn practice:\n\n- prefer `.omp/mcp.json` or `~/.omp/agent/mcp.json` when you want an OMP-specific override\n- keep server names unique across tools when possible\n- use `disabledServers` in the user config when a third-party config keeps reintroducing a server you do not want\n\n## Troubleshooting\n\n### `Server \"name\": stdio server requires \"command\" field`\n\nYou probably omitted `type: \"http\"` on a remote server.\n\n### `Server \"name\": both \"command\" and \"url\" are set`\n\nPick one transport. OMP treats `command` as stdio and `url` as http/sse.\n\n### `/mcp add` worked but the server still does not connect\n\nThe JSON is valid, but the server may still be unreachable. Use `/mcp test <name>` and check whether:\n\n- the binary or Docker image exists\n- required environment variables are set\n- the remote URL is reachable\n- the OAuth or API token is valid\n\n### The server exists in another tool's config but not in OMP\n\nRun `/mcp list`. OMP discovers many third-party MCP files, but project-level loading can also be disabled via the `mcp.enableProjectConfig` setting, and a user-level `disabledServers` entry can suppress a server by name.\n\n## References\n\n- MCP transport spec: https://modelcontextprotocol.io/specification/2025-03-26/basic/transports\n- Filesystem server package: https://www.npmjs.com/package/@modelcontextprotocol/server-filesystem\n- GitHub MCP server: https://github.com/github/github-mcp-server\n- Slack MCP server docs: https://docs.slack.dev/ai/slack-mcp-server/\n",
|
|
@@ -37,10 +37,10 @@ export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
|
|
|
37
37
|
"porting-from-pi-mono.md": "# Porting From pi-mono: A Practical Merge Guide\n\nThis guide is a repeatable checklist for porting changes from pi-mono into this repo.\nUse it for any merge: single file, feature branch, or full release sync.\n\n## Last Sync Point (historical upstream marker)\n\n**Commit:** `b21b42d032919de2f2e6920a76fa9a37c3920c0a`\n**Date:** 2026-03-22\n\nUpdate this section after each sync; do not reuse the previous range. This commit is an upstream pi-mono marker and may not exist in this repo's local object database.\n\nWhen starting a new sync, generate patches from this commit forward in a pi-mono checkout or remote that contains the commit:\n\n```bash\ngit format-patch b21b42d032919de2f2e6920a76fa9a37c3920c0a..HEAD --stdout > changes.patch\n```\n\n## 0) Define the scope\n\n- Identify the upstream reference (commit, tag, or PR).\n- List the packages or folders you plan to touch.\n- Decide which features are in-scope and which are intentionally skipped.\n\n## 1) Bring code over safely\n\n- Prefer a clean, focused diff rather than a wholesale copy.\n- Avoid copying built artifacts or generated files.\n- If upstream added new files, add them explicitly and review contents.\n\n## 2) Match import extension conventions\n\nMost runtime TypeScript sources omit `.js` in internal imports, but several current entrypoints and tool modules keep `.js` for ESM/runtime compatibility. Follow the surrounding file and package export style; do not blanket-strip or blanket-add extensions.\n\n- In `packages/coding-agent` runtime sources, prefer extensionless internal imports when the surrounding module does, but preserve existing `.js` imports in files that already require them.\n- In `packages/tui/test` and `packages/natives/bench`, keep `.js` where surrounding files already use it.\n- Keep real file extensions when required by tooling or import assertions (e.g., `.json`, `.css`, `.md` text embeds).\n- Example: `import { x } from \"./foo.js\";` → `import { x } from \"./foo\";` only when that package/file convention is extensionless.\n\n## 3) Replace import scopes\n\nUpstream uses different package scopes. Replace them consistently.\n\n- Replace old scopes with the local scope used here.\n- Examples (adjust to match the actual packages you are porting):\n - `@mariozechner/pi-coding-agent` → `@oh-my-pi/pi-coding-agent`\n - `@mariozechner/pi-agent-core` → `@oh-my-pi/pi-agent-core`\n - `@mariozechner/pi-tui` → `@oh-my-pi/pi-tui`\n - `@mariozechner/pi-ai` → `@oh-my-pi/pi-ai`\n\n## 4) Use Bun APIs where they improve on Node\n\nWe run on Bun, but the current source intentionally mixes Bun APIs with small Node standard-library APIs. Replace Node APIs only when Bun provides a clearer, safer, or simpler implementation; do not mechanically rewrite every Node import.\n\n**Prefer replacing when porting new code:**\n\n- Process spawning: prefer Bun Shell `$` for simple commands; use `Bun.spawn`/`Bun.spawnSync` for streaming or process control. Keep existing `child_process` only where its exact semantics are needed.\n- HTTP clients: `node-fetch`, `axios` → native `fetch`\n- SQLite: `better-sqlite3` → `bun:sqlite`\n- Env loading: `dotenv` → Bun loads `.env` automatically\n- Runtime text/assets: prefer Bun imports such as `with { type: \"text\" }` or `Bun.file()` over copy steps or bundled fallback file reads.\n\n**DO NOT replace (these work fine in Bun):**\n\n- `os.homedir()` — do NOT replace with `Bun.env.HOME` or literal `\"~\"`\n- `os.tmpdir()` — do NOT replace with `Bun.env.TMPDIR || \"/tmp\"` or hardcoded paths\n- `fs.mkdtempSync()` — do NOT replace with manual path construction\n- `path.join()`, `path.resolve()`, etc. — these are fine\n\n**Import style:** Use the `node:` prefix for Node standard-library imports. Namespace imports are common, but named imports are acceptable where the surrounding code already uses them.\n\n**Additional Bun conventions:**\n\n- Prefer Bun Shell `$` for short, non-streaming commands; use `Bun.spawn` only when you need streaming I/O or process control.\n- Use `Bun.file()`/`Bun.write()` for simple files and `node:fs/promises` for directory-oriented operations. Existing synchronous `node:fs` calls are acceptable when the calling flow is intentionally synchronous.\n- Avoid `Bun.file().exists()` checks; use `isEnoent` handling in try/catch.\n- Prefer `Bun.sleep(ms)` over `setTimeout` wrappers.\n\n**Wrong:**\n\n```typescript\n// BROKEN: env vars may be undefined, \"~\" is not expanded\nconst home = Bun.env.HOME || \"~\";\nconst tmp = Bun.env.TMPDIR || \"/tmp\";\n```\n\n**Correct:**\n\n```typescript\nimport * as os from \"node:os\";\nimport * as fs from \"node:fs\";\nimport * as path from \"node:path\";\n\nconst configDir = path.join(os.homedir(), \".config\", \"myapp\");\nconst tempDir = fs.mkdtempSync(path.join(os.tmpdir(), \"myapp-\"));\n```\n\n## 5) Prefer Bun embeds (no copying)\n\nDo not add new runtime asset copy steps. Keep assets in repo and prefer Bun embeds/imports; preserve existing explicit generation workflows such as `packages/coding-agent/src/export/html/template.generated.ts`.\n\n- If upstream copies assets into a dist folder, replace with Bun-friendly embeds.\n- Prompts are static `.md` files; use Bun text imports (`with { type: \"text\" }`) and Handlebars instead of inline prompt strings.\n- Use `import.meta.dir` + `Bun.file` to load adjacent non-text resources.\n- Keep assets in-repo and let the bundler include them.\n- Eliminate copy scripts unless the user explicitly requests them or the package already has an intentional generation step.\n- If upstream reads a bundled fallback file at runtime, replace filesystem reads with a Bun text embed import unless the current package already uses a generated asset pipeline.\n - Example (Codex instructions fallback):\n - `const FALLBACK_PROMPT_PATH = join(import.meta.dir, \"codex-instructions.md\");` -> removed\n - `import FALLBACK_INSTRUCTIONS from \"./codex-instructions.md\" with { type: \"text\" };`\n - Use `return FALLBACK_INSTRUCTIONS;` instead of `readFileSync(FALLBACK_PROMPT_PATH, \"utf8\")`\n\n## 6) Port `package.json` carefully\n\nTreat `package.json` as a contract. Merge intentionally.\n\n- Keep existing `name`, `version`, `type`, `exports`, and `bin` unless the port requires changes.\n- Replace npm/node scripts with Bun equivalents (e.g., `bun check`, `bun test`).\n- Ensure dependencies use the correct scope.\n- Do not downgrade dependencies to fix type errors; upgrade instead.\n- Validate workspace package links and `peerDependencies`.\n\n## 7) Align code style and tooling\n\n- Keep existing formatting conventions.\n- Do not introduce `any` unless required.\n- Avoid dynamic imports unless they are required for optional dependencies, startup cost, or runtime-only modules; prefer top-level imports otherwise.\n- Never build prompts in code; prompts are static `.md` files rendered with Handlebars.\n- In `packages/coding-agent`, use `logger` from `@oh-my-pi/pi-utils` for internal/runtime logging; CLI command files may use `console.*` for intentional user-facing output.\n- Use `Promise.withResolvers()` instead of `new Promise((resolve, reject) => ...)`.\n- Prefer ES `#` private fields for new encapsulated state. Constructor parameter properties already exist in current code and are acceptable; do not churn unrelated access modifiers while porting.\n- Prefer existing helpers and utilities over new ad-hoc code.\n Preserve Bun-first infrastructure changes already made in this repo:\n - Runtime is Bun (no Node entry points for the main CLI).\n - Package manager is Bun (no npm lockfiles).\n - Heavy Node APIs should not be introduced casually; current source still uses selected Node APIs (`node:crypto`, `node:readline`, synchronous `node:fs`, and `child_process`) where they fit provider, CLI, or process-control semantics.\n - Lightweight Node APIs (`os.homedir`, `os.tmpdir`, `fs.mkdtempSync`, `path.*`) are kept.\n - CLI shebangs use `bun` (not `node`, not `tsx`).\n - TypeScript packages generally use source files directly; `@oh-my-pi/pi-natives` exports generated native bindings from `packages/natives/native`.\n - CI workflows run Bun for install/check/test.\n\n## 8) Remove old compatibility layers\n\nUnless requested, remove upstream compatibility shims.\n\n- Delete old APIs that were replaced.\n- Update all call sites to the new API directly.\n- Do not keep `*_v2` or parallel versions.\n\n## 9) Update docs and references\n\n- Replace pi-mono repo links where appropriate.\n- Update examples to use Bun and correct package scopes.\n- Ensure README instructions still match the current repo behavior.\n\n## 10) Validate the port\n\nRun the standard checks after changes:\n\n- `bun check`\n\nIf the repo already has failing checks unrelated to your changes, call that out.\nTests use Bun's runner (not Vitest), but only run `bun test` when explicitly requested.\n\n## 11) Protect improved features (regression trap list)\n\nIf you already improved behavior locally, treat those as **non‑negotiable**. Before porting, write down\nthe improvements and add explicit checks so they don’t get lost in the merge.\n\n- **Freeze the expected behavior**: add a short “before/after” note for each improvement (inputs, outputs,\n defaults, edge cases). This prevents silent rollback.\n- **Map old → new APIs**: if upstream renamed concepts (hooks → extensions, custom tools → tools, etc.),\n ensure every old entry point still wires through. One missed flag or export equals lost functionality.\n- **Verify exports**: check `package.json` `exports`, public types, and barrel files. Upstream ports often\n forget to re-export local additions.\n- **Cover non‑happy paths**: if you fixed error handling, timeouts, or fallback logic, add a test or at\n least a manual checklist that exercises those paths.\n- **Check defaults and config merge order**: improvements often live in defaults. Confirm new defaults\n didn’t revert (e.g., new config precedence, disabled features, tool lists).\n- **Audit env/shell behavior**: if you fixed execution or sandboxing, verify the new path still uses your\n sanitized env and does not reintroduce alias/function overrides.\n- **Re-run targeted samples**: keep a minimal set of \"known good\" examples and run them after the port\n (CLI flags, extension registration, tool execution).\n\n## 12) Detect and handle reworked code\n\nBefore porting a file, check if upstream significantly refactored it:\n\n```bash\n# Compare the file you're about to port against what you have locally\ngit diff HEAD upstream/main -- path/to/file.ts\n```\n\nIf the diff shows the file was **reworked** (not just patched):\n\n- New abstractions, renamed concepts, merged modules, changed data flow\n\nThen you must **read the new implementation thoroughly** before porting. Blind merging of reworked code loses functionality because:\n\nNote: interactive mode was recently split into controllers/utils/types. When backporting related changes, port updates into the individual files we created and ensure `interactive-mode.ts` wiring stays in sync.\n\n1. **Defaults change silently** - A new variable `defaultFoo = [a, b]` may replace an old `getAllFoo()` that returned `[a, b, c, d, e]`.\n\n2. **API options get dropped** - When systems merge (e.g., `hooks` + `customTools` → `extensions`), old options may not wire through to the new implementation.\n\n3. **Code paths go stale** - A renamed concept (e.g., `hookMessage` → `custom`) needs updates in every switch statement, type guard, and handler—not just the definition.\n\n4. **Context/capabilities shrink** - Old APIs may have exposed `{ logger, typebox, pi }` that new APIs forgot to include.\n\n### Semantic porting process\n\nWhen upstream reworked a module:\n\n1. **Read the old implementation** - Understand what it did, what options it accepted, what it exposed.\n\n2. **Read the new implementation** - Understand the new abstractions and how they map to old behavior.\n\n3. **Verify feature parity** - For each capability in the old code, confirm the new code preserves it or explicitly removes it.\n\n4. **Grep for stragglers** - Search for old names/concepts that may have been missed in switch statements, handlers, UI components.\n\n5. **Test the boundaries** - CLI flags, SDK options, event handlers, default values—these are where regressions hide.\n\n### Quick checks\n\n```bash\n# Find all uses of an old concept that may need updating\nrg \"oldConceptName\" --type ts\n\n# Compare default values between versions\ngit show upstream/main:path/to/file.ts | rg \"default|DEFAULT\"\n\n# Check if all enum/union values have handlers\nrg \"case \\\"\" path/to/file.ts\n```\n\n## 13) Quick audit checklist\n\nUse this as a final pass before you finish:\n\n- [ ] Import extensions follow the local package convention (no blanket `.js` stripping)\n- [ ] No newly introduced Node-only APIs unless they match an existing justified pattern\n- [ ] All package scopes updated\n- [ ] `package.json` scripts use Bun\n- [ ] Prompts are `.md` text imports (no inline prompt strings)\n- [ ] No internal/runtime `console.*` in coding-agent; CLI user-facing output is intentional\n- [ ] Assets load via Bun embed/import patterns, or through an existing intentional generation pipeline\n- [ ] Tests or checks run (or explicitly noted as blocked)\n- [ ] No functionality regressions (see sections 11-12)\n\n## 14) Commit message format\n\nWhen committing a backport, follow the repo format `<type>(scope): <past-tense description>` and keep the commit\nrange in the title.\n\n```\nfix(coding-agent): backported pi-mono changes (<from>..<to>)\n\npackages/<package>:\n- <type>: <description>\n- <type>: <description> (#<issue> by @<contributor>)\n\npackages/<other-package>:\n- <type>: <description>\n```\n\n**Example:**\n\n```\nfix(coding-agent): backported pi-mono changes (9f3eef65f..52532c7c0)\n\npackages/ai:\n- fix: handle \"sensitive\" stop reason from Anthropic API\n- fix: normalize tool call IDs with special characters for Responses API\n- fix: add overflow detection for Bedrock, MiniMax, Kimi providers\n- fix: 429 status is rate limiting, not context overflow\n\npackages/tui:\n- fix: refactored autocomplete state tracking\n- fix: file autocomplete should not trigger on empty text\n- fix: configurable autocomplete max visible items\n- fix: improved table column width calculation with word-aware wrapping\n\npackages/coding-agent:\n- fix: preserve external config.yml edits on save (#1046 by @nicobailonMD)\n- fix: resolve macOS NFD and curly quote variants in file paths\n```\n\n**Rules:**\n\n- Group changes by package\n- Use conventional commit types (`fix`, `feat`, `refactor`, `perf`, `docs`)\n- Include upstream issue/PR numbers and contributor attribution for external contributions\n- The commit range in the title helps track sync points\n\n## 15) Intentional Divergences\n\nOur fork has architectural decisions that differ from upstream. **Do not port these upstream patterns:**\n\n### UI Architecture\n\n| Upstream | Our Fork | Reason |\n| ------------------------------------------- | --------------------------------------------------------- | --------------------------------------------------------------------- |\n| `FooterDataProvider` class | `StatusLineComponent` | Simpler, integrated status line |\n| `ctx.ui.setHeader()` / `ctx.ui.setFooter()` | No-op stubs in current extension contexts | Not currently wired to replace the TUI status/header UI |\n| `ctx.ui.setEditorComponent()` | No-op stubs in current extension contexts | Custom editor replacement is not currently wired |\n| `InteractiveModeOptions` options object | Positional constructor args (options type still exported) | Keep constructor signature; update the type when upstream adds fields |\n\n### Component Naming\n\n| Upstream | Our Fork |\n| ---------------------------- | ----------------------- |\n| `extension-input.ts` | `hook-input.ts` |\n| `extension-selector.ts` | `hook-selector.ts` |\n| `ExtensionInputComponent` | `HookInputComponent` |\n| `ExtensionSelectorComponent` | `HookSelectorComponent` |\n\n### API Naming\n\n| Upstream | Our Fork | Notes |\n| ---------------------------------------- | ---------------------------------------- | ----------------------------------------- |\n| `sessionManager.appendSessionInfo(name)` | `sessionManager.setSessionName(name)` | We use `sessionName` throughout |\n| `sessionManager.getSessionName()` | `sessionManager.getSessionName()` | Same (we unified to match upstream's RPC) |\n| `agent.sessionName` / `setSessionName()` | `agent.sessionName` / `setSessionName()` | Same |\n\n### File Consolidation\n\n| Upstream | Our Fork | Reason |\n| -------------------------------------------------- | --------------------------------------------------------- | --------------------------------------------- |\n| `clipboard.ts` + `clipboard-image.ts` (tool files) | `src/utils/clipboard.ts` backed by `@oh-my-pi/pi-natives` | Native implementation with a small TS wrapper |\n\n### Test Framework\n\n| Upstream | Our Fork |\n| ------------------------- | ----------------------------- |\n| `vitest` with `vi.mock()` | `bun:test` with `vi` from bun |\n| `node:test` assertions | `expect()` matchers |\n\n### Tool Architecture\n\n| Upstream | Our Fork | Notes |\n| ----------------------------------- | ------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------- |\n| `createTool(cwd: string, options?)` | `createTools(session: ToolSession)` via `BUILTIN_TOOLS` registry | Tool factories accept `ToolSession` and can return `null` |\n| Per-tool `*Operations` interfaces | Only current per-tool override interfaces remain (for example `FindOperations`) | Used for SSH/remote overrides where present |\n| Node.js `fs/promises` everywhere | Bun file APIs for simple file writes/reads, `node:fs/promises` for dirs, selected sync `node:fs` where needed | Prefer Bun APIs when they simplify |\n\n### Auth Storage\n\n| Upstream | Our Fork | Notes |\n| ------------------------------- | ------------------------------------------- | -------------------------------------------- |\n| `proper-lockfile` + `auth.json` | `agent.db` (bun:sqlite) | Credentials stored exclusively in `agent.db` |\n| Single credential per provider | Multi-credential with round-robin selection | Session affinity and backoff logic preserved |\n\n### Extensions\n\n| Upstream | Our Fork |\n| ----------------------------- | ------------------------------------------------- |\n| `jiti` for TypeScript loading | Native Bun `import()` |\n| `pkg.pi` manifest field | `pkg.omp` preferred; fallback to `pkg.pi` remains |\n\n### Skip These Upstream Features\n\nWhen porting, **skip** these files/features entirely:\n\n- `footer-data-provider.ts` — we use StatusLineComponent\n- `clipboard-image.ts` — image clipboard support is exposed through `src/utils/clipboard.ts` backed by `@oh-my-pi/pi-natives`\n- GitHub workflow files — we have our own CI\n- `models.generated.ts` — auto-generated, regenerate locally (as models.json instead)\n\n### Features We Added (Preserve These)\n\nThese exist in our fork but not upstream. **Never overwrite:**\n\n- `StatusLineComponent` in interactive mode\n- Multi-credential auth with session affinity\n- Capability-based discovery system (`defineCapability`, `registerProvider`, `loadCapability`, `skillCapability`, etc.)\n- MCP/Exa/SSH integrations\n- LSP writethrough for format-on-save\n- Bash interception (`checkBashInterception`)\n- Fuzzy path suggestions in read tool\n",
|
|
38
38
|
"porting-to-natives.md": "# Porting to pi-natives (N-API) — Field Notes\n\nThis is a practical guide for moving hot paths into `crates/pi-natives` and wiring them through the generated native package entrypoint. It exists to avoid the same failures happening twice.\n\n## When to port\n\nPort when any of these are true:\n\n- The hot path runs in render loops, tight UI updates, or large batches.\n- JS allocations dominate (string churn, regex backtracking, large arrays).\n- You already have a JS baseline and can benchmark both versions side by side.\n- The work is CPU-bound or blocking I/O that can run on the libuv thread pool.\n- The work is async I/O that can run on Tokio's runtime (for example shell execution).\n\nAvoid ports that depend on JS-only state or dynamic imports. N-API exports should be data-in/data-out. Long-running work should go through `task::blocking` (CPU-bound/blocking I/O) or `task::future` (async I/O) with cancellation where the caller needs `timeoutMs` or `AbortSignal`.\n\n## Current package shape\n\n`@oh-my-pi/pi-natives` no longer has a `packages/natives/src/<module>` TypeScript wrapper layer. The package root points at generated native artifacts:\n\n- runtime entry: `packages/natives/native/index.js`\n- types entry: `packages/natives/native/index.d.ts`\n- loader helpers: `packages/natives/native/loader-state.js`\n- embedded manifest: `packages/natives/native/embedded-addon.js`\n\nConsumers import directly from `@oh-my-pi/pi-natives`. The generated declarations are produced during `bun --cwd=packages/natives run build`.\n\n## Anatomy of a native export\n\n**Rust side:**\n\n- Implementation lives in `crates/pi-natives/src/<module>.rs`.\n- If you add a new module, register it in `crates/pi-natives/src/lib.rs`.\n- Export with `#[napi]`; snake_case exports are converted to camelCase automatically. Use explicit JS names only for true aliases/non-default names. Use `#[napi(object)]` for object-shaped structs.\n- For CPU-bound or blocking work, use `task::blocking(tag, cancel_token, work)`.\n- For async work that needs Tokio, use `task::future(env, tag, work)`.\n- Pass a `CancelToken` when the API exposes `timeoutMs` or `AbortSignal`, and call `heartbeat()` inside long loops.\n\n**Package/build side:**\n\n- `packages/natives/scripts/build-native.ts` runs napi-rs, installs the `.node` artifact, copies generated `index.js`/`index.d.ts`, and appends enum runtime exports.\n- `packages/natives/native/index.js` is the loader that chooses a candidate `.node` file and returns the loaded addon.\n- `packages/natives/package.json` exposes only the package root (`@oh-my-pi/pi-natives`).\n\n**Consumer side:**\n\n- Update direct imports/callsites in `packages/coding-agent` or `packages/tui` when the new export replaces a JS implementation.\n- Keep higher-level policy in consumers unless it belongs in the native primitive itself.\n\n## Porting checklist\n\n1. **Add the Rust implementation**\n\n- Put the core logic in a plain Rust function.\n- If it is a new module, add it to `crates/pi-natives/src/lib.rs`.\n- Expose it with `#[napi]` so the default snake_case -> camelCase mapping stays consistent.\n- Keep signatures owned and simple: `String`, `Vec<String>`, `Uint8Array`, `Either<JsString, Uint8Array>`, or `#[napi(object)]` structs.\n- For CPU-bound or blocking work, use `task::blocking`; for async work, use `task::future`.\n- If exposing cancellation, include `timeout_ms: Option<u32>` and `signal: Option<Unknown<'env>>` in options, create `CancelToken::new(...)`, and heartbeat in long loops.\n\n2. **Build generated bindings**\n\n- Run `bun --cwd=packages/natives run build`.\n- Confirm the generated `packages/natives/native/index.d.ts` includes the new export with the intended JS name/signature.\n- Confirm `packages/natives/native/index.js` still has generated enum exports appended when enum changes are involved.\n\n3. **Update consumers**\n\n- Import the new export directly from `@oh-my-pi/pi-natives`.\n- Replace only callsites where the native implementation is faster/equivalent and preserves behavior.\n- Remove obsolete JS implementation code in the same change when the native path becomes canonical.\n\n4. **Add benchmarks**\n\n- Put benchmarks next to the owning package (`packages/tui/bench`, `packages/natives/bench`, or `packages/coding-agent/bench`).\n- Include a JS baseline and native version in the same run.\n- Use `Bun.nanoseconds()` and a fixed iteration count.\n- Keep benchmark inputs realistic for the hot path.\n\n5. **Run focused verification**\n\n- Build the native package.\n- Run the benchmark.\n- Run the narrow tests or scenario covering the changed export/callsites.\n\n## Pain points and how to avoid them\n\n### 1) Stale platform/variant artifacts\n\nThe loader probes platform-tagged artifacts in deterministic order. For x64, selected variant candidates are tried before the unsuffixed default fallback:\n\n- `modern`: `pi_natives.<tag>-modern.node`, then `...-baseline.node`, then `pi_natives.<tag>.node`.\n- `baseline`: `pi_natives.<tag>-baseline.node`, then `pi_natives.<tag>.node`.\n\nNon-x64 uses `pi_natives.<tag>.node`.\n\nCompiled binaries also probe `<getNativesDir()>/<version>/...` and a legacy user-data directory before package/executable locations. If any earlier candidate is stale, a new export may appear missing.\n\n**Fix:** remove stale candidate/cache files and rebuild.\n\n```bash\nrm packages/natives/native/pi_natives.<platform>-<arch>.node\nrm packages/natives/native/pi_natives.<platform>-<arch>-modern.node\nrm packages/natives/native/pi_natives.<platform>-<arch>-baseline.node\nbun --cwd=packages/natives run build\n```\n\nFor compiled binaries, delete the versioned addon cache shown in the loader error (normally under `~/.omp/natives/<version>` unless `$XDG_DATA_HOME/omp` is used).\n\n### 2) Generated types do not match loaded binary\n\nThis can happen when `native/index.d.ts` was regenerated but the `.node` file being loaded is stale or from a different platform/variant.\n\nVerify the loaded export set from the actual candidate path:\n\n```bash\nbun -e 'const tag = `${process.platform}-${process.arch}`; const mod = require(`./packages/natives/native/pi_natives.${tag}.node`); console.log(Object.keys(mod).sort())'\n```\n\nFix the build/candidate mismatch. Do not paper over it with optional consumer checks if the export is required.\n\n### 3) Rust signature mismatch\n\nKeep N-API signatures simple and owned. Avoid borrowed references like `&str` in public exports. If you need structured data, use `#[napi(object)]` structs. If you need callbacks, use napi-rs `ThreadsafeFunction` and keep callback error/value behavior explicit.\n\n### 4) Enum runtime exports\n\nnapi-rs declarations alone are not enough for JS callers that use enum objects at runtime. `scripts/gen-enums.ts` appends enum objects to `native/index.js`. If you add or change a native enum, verify both `native/index.d.ts` and the generated enum export block in `native/index.js`.\n\n### 5) Benchmarking mistakes\n\n- Do not compare different inputs or allocations.\n- Keep JS and native using identical input arrays.\n- Run both in the same benchmark file to avoid skew.\n- Include enough iterations to smooth startup noise, but keep inputs realistic.\n\n## Benchmark template\n\n```ts\nconst ITERATIONS = 2000;\n\nfunction bench(name: string, fn: () => void): number {\n const start = Bun.nanoseconds();\n for (let i = 0; i < ITERATIONS; i++) fn();\n const elapsed = (Bun.nanoseconds() - start) / 1e6;\n console.log(\n `${name}: ${elapsed.toFixed(2)}ms total (${(elapsed / ITERATIONS).toFixed(6)}ms/op)`,\n );\n return elapsed;\n}\n\nbench(\"feature/js\", () => {\n jsImpl(sample);\n});\n\nbench(\"feature/native\", () => {\n nativeImpl(sample);\n});\n```\n\n## Verification checklist\n\n- Generated `native/index.d.ts` includes the new export and intended TS signature.\n- The loaded `.node` file's `Object.keys(require(candidate))` includes the new export.\n- Runtime enum objects are present when the change adds/changes enums.\n- Bench numbers are recorded in the PR/notes.\n- Call sites are updated only if native is faster/equal and behavior-compatible.\n- Obsolete JS code is removed when the native implementation becomes canonical.\n\n## Rule of thumb\n\n- If native is slower, do not switch callsites. Keep or remove the export based on whether it has a near-term owner.\n- If native is faster and behavior-compatible, switch callsites and keep a benchmark to catch regressions.\n",
|
|
39
39
|
"provider-streaming-internals.md": "# Provider streaming internals\n\nThis document explains how token/tool streaming is normalized in `@oh-my-pi/pi-ai`, then propagated through `@oh-my-pi/pi-agent-core` and `coding-agent` session events.\n\n## End-to-end flow\n\n1. `streamSimple()` (`packages/ai/src/stream.ts`) maps generic options and dispatches to a provider stream function.\n2. Provider stream functions translate provider-native stream events into the unified `AssistantMessageEvent` sequence. Current built-ins include Anthropic, OpenAI Responses/Completions/Codex/Azure Responses, Google Gemini/Gemini CLI/Vertex, Bedrock Converse, Ollama, Cursor, plus GitLab Duo/Kimi wrappers and extension-registered custom APIs.\n3. Each provider pushes events into `AssistantMessageEventStream` (`packages/ai/src/utils/event-stream.ts`), which throttles delta events and exposes:\n - async iteration for incremental updates\n - `result()` for final `AssistantMessage`\n4. `agentLoop` (`packages/agent/src/agent-loop.ts`) consumes those events, mutates in-flight assistant state, and emits `message_update` events carrying the raw `assistantMessageEvent`.\n5. `AgentSession` (`packages/coding-agent/src/session/agent-session.ts`) subscribes to agent events, persists messages, drives extension hooks, and applies session behaviors (retry, compaction, TTSR, streaming-edit abort checks).\n\n## Unified stream contract in `@oh-my-pi/pi-ai`\n\nAll providers emit the same shape (`AssistantMessageEvent` in `packages/ai/src/types.ts`):\n\n- `start`\n- content block lifecycle triplets:\n - text: `text_start` → `text_delta`\\* → `text_end`\n - thinking: `thinking_start` → `thinking_delta`\\* → `thinking_end`\n - tool call: `toolcall_start` → `toolcall_delta`\\* → `toolcall_end`\n- terminal event:\n - `done` with `reason: \"stop\" | \"length\" | \"toolUse\"`\n - or `error` with `reason: \"aborted\" | \"error\"`\n\n`AssistantMessageEventStream` guarantees:\n\n- final result is resolved by terminal event (`done` or `error`)\n- deltas are batched/throttled (~50ms)\n- buffered deltas are flushed before non-delta events and before completion\n\n## Delta throttling and harmonization behavior\n\n`AssistantMessageEventStream` treats `text_delta`, `thinking_delta`, and `toolcall_delta` as mergeable events:\n\n- buffered deltas are merged only when **type + contentIndex** match\n- merge keeps the latest `partial` snapshot\n- non-delta events force immediate flush\n\nThis smooths high-frequency provider streams for TUI/event consumers, but is not provider backpressure: providers still produce at full speed, while the local stream buffers.\n\n## Provider normalization details\n\n## Anthropic (`anthropic-messages`)\n\nSource: `packages/ai/src/providers/anthropic.ts`\n\nNormalization points:\n\n- `message_start` initializes usage (input/output/cache tokens)\n- `content_block_start` maps to text/thinking/toolcall starts\n- `content_block_delta` maps:\n - `text_delta` → `text_delta`\n - `thinking_delta` → `thinking_delta`\n - `input_json_delta` → `toolcall_delta`\n - `signature_delta` updates `thinkingSignature` only (no event)\n- `content_block_stop` emits corresponding `*_end`\n- `message_delta.stop_reason` maps via `mapStopReason()`\n\nTool-call argument streaming:\n\n- each tool block carries internal `partialJson`\n- every JSON delta appends to `partialJson`\n- `arguments` are reparsed on each delta via `parseStreamingJson()`\n- `toolcall_end` reparses once more, then strips `partialJson`\n\n## OpenAI Responses family (`openai-responses`, `openai-codex-responses`, `azure-openai-responses`)\n\nSources: `packages/ai/src/providers/openai-responses.ts`, `openai-codex-responses.ts`, and `azure-openai-responses.ts`\n\nNormalization points:\n\n- `response.output_item.added` starts reasoning/text/function-call blocks\n- reasoning summary events (`response.reasoning_summary_text.delta`) become `thinking_delta`\n- output/refusal deltas become `text_delta`\n- `response.function_call_arguments.delta` becomes `toolcall_delta`\n- `response.output_item.done` emits `thinking_end` / `text_end` / `toolcall_end`\n- `response.completed` maps status to stop reason and usage\n\nTool-call argument streaming:\n\n- same `partialJson` accumulation pattern as Anthropic\n- providers that send only `response.function_call_arguments.done` still populate final args\n- tool call IDs are normalized as `\"<call_id>|<item_id>\"`\n\n## Google Generative AI (`google-generative-ai`)\n\nSource: `packages/ai/src/providers/google.ts`\n\nNormalization points:\n\n- iterates `candidate.content.parts`\n- text parts are split into thinking vs text by `isThinkingPart(part)`\n- block transitions close previous block before starting a new one\n- `part.functionCall` is treated as a complete tool call (start/delta/end emitted immediately)\n- finish reason mapped by `mapStopReason()` from `google-shared.ts`\n\nTool-call argument streaming:\n\n- function call args arrive as structured object, not incremental JSON text\n- implementation emits one synthetic `toolcall_delta` containing `JSON.stringify(arguments)`\n- no partial JSON parser needed for Google in this path\n\n## Partial tool-call JSON accumulation and recovery\n\nShared behavior for Anthropic/OpenAI Responses uses `parseStreamingJson()` (`packages/ai/src/utils/json-parse.ts`):\n\n1. try `JSON.parse`\n2. fallback to `partial-json` parser for incomplete fragments\n3. if both fail, return `{}`\n\nImplications:\n\n- malformed or truncated argument deltas do not crash stream processing immediately\n- in-progress `arguments` may temporarily be `{}`\n- later valid deltas can recover structured arguments because parsing is retried on every append\n- final `toolcall_end` performs one more parse attempt before emission\n\n## Stop reasons vs transport/runtime errors\n\nProvider stop reasons are mapped to normalized `stopReason`:\n\n- Anthropic: `end_turn`→`stop`, `max_tokens`→`length`, `tool_use`→`toolUse`, safety/refusal cases→`error`\n- OpenAI Responses: `completed`→`stop`, `incomplete`→`length`, `failed/cancelled`→`error`\n- Google: `STOP`→`stop`, `MAX_TOKENS`→`length`, safety/prohibited/malformed-function-call classes→`error`\n\nError semantics are split in two stages:\n\n1. **Model completion semantics** (provider reported finish reason/status)\n2. **Transport/runtime failure** (network/client/parser/abort exceptions)\n\nIf provider stream throws or signals failure, each provider wrapper catches and emits terminal `error` event with:\n\n- `stopReason = \"aborted\"` when abort signal is set\n- otherwise `stopReason = \"error\"`\n- `errorMessage = formatErrorMessageWithRetryAfter(error)`\n\n## Malformed chunk / SSE parse failure behavior\n\nFor these provider paths, chunk/SSE framing is handled by vendor SDK streams (Anthropic SDK, OpenAI SDK, Google SDK). This code does not implement a custom SSE decoder here.\n\nObserved behavior in current implementation:\n\n- malformed chunk/SSE parsing at SDK level surfaces as an exception or stream `error` event\n- provider wrapper converts that into unified terminal `error` event\n- no provider-specific resume/retry inside the stream function itself\n- higher-level retries are handled in `AgentSession` auto-retry logic (message-level retry, not stream-chunk replay)\n\n## Cancellation boundaries\n\nCancellation is layered:\n\n- AI provider request: `options.signal` is passed into provider client stream call.\n- Provider wrapper: after stream loop, aborted signal forces error path (`\"Request was aborted\"`).\n- Agent loop: checks `signal.aborted` before handling each provider event and can synthesize an aborted assistant message from the latest partial.\n- Session/agent controls: `AgentSession.abort()` -> `agent.abort()` -> shared abort controller cancellation.\n\nTool execution cancellation is separate from model stream cancellation:\n\n- tool runners use `AbortSignal.any([agentSignal, steeringAbortSignal])`\n- steering interrupts can abort remaining tool execution while preserving already-produced tool results\n\n## Backpressure boundaries\n\nThere is no hard backpressure mechanism between provider SDK stream and downstream consumers:\n\n- `EventStream` uses in-memory queues with no max size\n- throttling reduces UI update rate but does not slow provider intake\n- if consumers lag significantly, queued events can grow until completion\n\nCurrent design favors responsiveness and simple ordering over bounded-buffer flow control.\n\n## How stream events surface as agent/session events\n\n`agentLoop.streamAssistantResponse()` bridges `AssistantMessageEvent` to `AgentEvent`:\n\n- on `start`: pushes placeholder assistant message and emits `message_start`\n- on block events (`text_*`, `thinking_*`, `toolcall_*`): updates last assistant message, emits `message_update` with raw `assistantMessageEvent`\n- on terminal (`done`/`error`): resolves final message from `response.result()`, emits `message_end`\n\n`AgentSession` then consumes those events for session-level behaviors:\n\n- TTSR watches `message_update.assistantMessageEvent` for `text_delta`, `thinking_delta`, and `toolcall_delta`\n- streaming edit guard inspects `toolcall_delta`/`toolcall_end` on `edit` calls and can abort early\n- persistence writes finalized messages at `message_end`\n- auto-retry examines assistant `stopReason === \"error\"` plus `errorMessage` heuristics\n\n## Unified vs provider-specific responsibilities\n\nUnified (common contract):\n\n- event shape (`AssistantMessageEvent`)\n- final result extraction (`done`/`error`)\n- delta throttling + merge rules\n- agent/session event propagation model\n\nProvider-specific (not fully abstracted):\n\n- upstream event taxonomies and mapping logic\n- stop-reason translation tables\n- tool-call ID conventions\n- reasoning/thinking block semantics and signatures\n- usage token semantics and availability timing\n- message conversion constraints per API\n\n## Implementation files\n\n- [`../../ai/src/stream.ts`](../packages/ai/src/stream.ts) — provider dispatch, option mapping, API key/session plumbing, custom API dispatch, and provider-specific credential handling.\n- [`../../ai/src/utils/event-stream.ts`](../packages/ai/src/utils/event-stream.ts) — generic stream queue + assistant delta throttling.\n- [`../../ai/src/utils/json-parse.ts`](../packages/ai/src/utils/json-parse.ts) — partial JSON parsing for streamed tool arguments.\n- [`../../ai/src/providers/anthropic.ts`](../packages/ai/src/providers/anthropic.ts) — Anthropic event translation and tool JSON delta accumulation.\n- [`../../ai/src/providers/openai-responses.ts`](../packages/ai/src/providers/openai-responses.ts), [`openai-codex-responses.ts`](../packages/ai/src/providers/openai-codex-responses.ts), [`azure-openai-responses.ts`](../packages/ai/src/providers/azure-openai-responses.ts) — Responses-family event translation and status mapping.\n- [`../../ai/src/providers/google.ts`](../packages/ai/src/providers/google.ts), [`google-gemini-cli.ts`](../packages/ai/src/providers/google-gemini-cli.ts), [`google-vertex.ts`](../packages/ai/src/providers/google-vertex.ts) — Gemini stream chunk-to-block translation variants.\n- [`../../ai/src/providers/google-shared.ts`](../packages/ai/src/providers/google-shared.ts) — Gemini finish-reason mapping and shared conversion rules.\n- [`../../ai/src/providers/amazon-bedrock.ts`](../packages/ai/src/providers/amazon-bedrock.ts), [`openai-completions.ts`](../packages/ai/src/providers/openai-completions.ts), [`ollama.ts`](../packages/ai/src/providers/ollama.ts), [`cursor.ts`](../packages/ai/src/providers/cursor.ts) — additional built-in stream adapters using the same event contract.\n- [`../../agent/src/agent-loop.ts`](../packages/agent/src/agent-loop.ts) — provider stream consumption and `message_update` bridging.\n- [`../src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts) — session-level handling of streaming updates, abort, retry, and persistence.\n",
|
|
40
|
-
"python-repl.md": "# Eval Tool Python Backend\n\nThis document describes the Python execution stack in `packages/coding-agent`.\nIt covers tool behavior, runner lifecycle, environment handling, execution semantics, output rendering, supported magics, and operational failure modes.\n\n## Scope and Key Files\n\n- Tool surface: `src/tools/eval.ts`\n- Session/per-call kernel orchestration: `src/eval/py/executor.ts`\n- Subprocess kernel client: `src/eval/py/kernel.ts`\n- Python wrapper / NDJSON server: `src/eval/py/runner.py`\n- Prelude helpers loaded into every kernel: `src/eval/py/prelude.py`\n- MIME bundle renderer (text + structured outputs): `src/eval/py/display.ts`\n- Interactive-mode renderer for user-triggered Python runs: `src/modes/components/eval-execution.ts`\n- Runtime/env filtering and Python resolution: `src/eval/py/runtime.ts`\n\n## What eval's Python backend is\n\nThe `eval` tool executes one or more Python cells inside a long-lived `python3` subprocess that speaks NDJSON over stdin/stdout. No Jupyter, no kernel gateway, no extra pip dependencies — a vanilla Python 3.8+ interpreter is enough. Rich `display()` output (PIL, pandas, plotly, matplotlib figures) keeps working because the wrapper reimplements the MIME-bundle dispatch that IPython previously provided.\n\nTool params:\n\n```ts\n{\n cells: Array<{ code: string; title?: string }>;\n timeout?: number; // seconds, clamped to 1..600, default 30\n reset?: boolean; // reset selected runtime before the first cell only\n}\n```\n\nThe tool is `concurrency = \"exclusive\"` for a session, so calls do not overlap.\n\n## Kernel lifecycle\n\nEach kernel is a single Python subprocess: `python -u <runner.py>`. The runner is bundled with the host binary (Bun text import), written to `~/.omp/python-env`-adjacent tmp cache once per script-hash, and reused by every subsequent spawn.\n\nKernel startup sequence:\n\n1. Availability check (`checkPythonKernelAvailability`) — verifies that a Python interpreter resolves and runs.\n2. Spawn `python -u runner.py` with filtered env and `cwd`.\n3. Send an init request that runs `os.chdir(cwd)`, injects env entries, and adds `cwd` to `sys.path`.\n4. Execute `PYTHON_PRELUDE` (idempotent — only initializes once per process).\n\nKernel shutdown:\n\n- Send `{\"type\": \"exit\"}` over stdin.\n- Wait for process exit with `SHUTDOWN_GRACE_MS` budget.\n- Escalate to `SIGTERM` and finally `SIGKILL` if the process does not exit in time.\n\n## Wire protocol (NDJSON, host ↔ runner)\n\nOne JSON object per line, UTF-8, `\\n` terminated.\n\nHost → runner:\n\n```jsonc\n{\"id\": \"<reqId>\", \"code\": \"<source>\", \"silent\": false, \"storeHistory\": true}\n{\"type\": \"exit\"}\n```\n\nRunner → host:\n\n```jsonc\n{\"type\": \"started\", \"id\": \"<reqId>\"}\n{\"type\": \"stdout\", \"id\": \"<reqId>\", \"data\": \"...\"}\n{\"type\": \"stderr\", \"id\": \"<reqId>\", \"data\": \"...\"}\n{\"type\": \"display\", \"id\": \"<reqId>\", \"bundle\": {<mime>: <value>}}\n{\"type\": \"result\", \"id\": \"<reqId>\", \"bundle\": {<mime>: <value>}}\n{\"type\": \"error\", \"id\": \"<reqId>\", \"ename\": \"...\", \"evalue\": \"...\", \"traceback\": [\"...\"]}\n{\"type\": \"done\", \"id\": \"<reqId>\", \"status\": \"ok\"|\"error\", \"executionCount\": N, \"cancelled\": false}\n```\n\nStatus events the prelude emits (e.g. `_emit_status(\"find\", count=…)`) ship inside display bundles under `application/x-omp-status` so the existing TUI status renderer keeps working.\n\n## Magics\n\nThe runner's source transformer rewrites IPython-style magics to plain Python calls before parsing. Supported set:\n\n| Magic | Effect |\n| --- | --- |\n| `%pip <args>` | `python -m pip <args>` with live streaming output. Newly installed packages are evicted from `sys.modules` so the next `import` picks up the fresh install. |\n| `%cd <path>` | `os.chdir(path)` (with `~` expansion); emits status event. |\n| `%pwd` | Returns `os.getcwd()`. |\n| `%ls [path]` | Returns `sorted(os.listdir(path))`. |\n| `%env [KEY[=VAL]]` | List, read, or set env vars (matches prelude `env()` semantics). |\n| `%set_env KEY VALUE` | Set `os.environ[KEY]`. |\n| `%time <expr>` / `%timeit <expr>` | Time the expression; emits status event with elapsed ms. |\n| `%who` / `%whos` | List user-namespace names. |\n| `%reset` | Clear user globals and re-inject prelude. |\n| `%load <path>` | Read a file into a fresh cell and execute. |\n| `%run <path>` | `runpy.run_path` and merge globals back. |\n| `%%bash` / `%%sh` | Run the cell body via `bash`/`sh`. |\n| `%%capture [name]` | Run body with stdout/stderr captured into `name`. |\n| `%%timeit` | Time the cell body. |\n| `%%writefile <path>` | Write body to file. |\n| `!cmd` / `var = !cmd` | Run command via subprocess shell; returns an SList-style result with `.n` / `.s` helpers. |\n| `var = %name args` | Assignment forms work for line magics and `!cmd`. |\n\nUnknown magic names raise `NameError: UsageError: ...` inside the cell.\n\n## Session persistence semantics\n\n`python.kernelMode` controls retained kernel reuse:\n\n- `session` (default)\n - Reuses kernel sessions keyed by session file plus cwd when a session file exists; otherwise by cwd.\n - Execution is serialized per session via a queue.\n - Idle sessions are evicted after 5 minutes.\n - At most 4 sessions; oldest is evicted on overflow.\n - Heartbeat checks detect dead kernels.\n - Auto-restart allowed once; repeated crash ⇒ hard failure.\n- `per-call`\n - Spawns a fresh subprocess for each request.\n - Shuts the subprocess down after the request.\n - No cross-call state persistence.\n\n### Multi-cell behavior in a single tool call\n\nCells run sequentially in the same kernel instance for that tool call.\n\nIf an intermediate cell fails:\n\n- Earlier cell state remains in memory.\n- Tool returns a targeted error indicating which cell failed.\n- Later cells are not executed.\n\n`reset=true` only applies to the first cell execution in that call.\n\n## Environment filtering and runtime resolution\n\nEnvironment is filtered before launching the runner:\n\n- Allowlist includes core vars like `PATH`, `HOME`, locale vars, `VIRTUAL_ENV`, `PYTHONPATH`, etc.\n- Allow-prefixes: `LC_`, `XDG_`, `PI_`\n- Denylist strips common API keys (OpenAI/Anthropic/Gemini/etc.)\n\nRuntime selection order:\n\n1. Active/located venv (`VIRTUAL_ENV`, then `<cwd>/.venv`, `<cwd>/venv`)\n2. Managed venv at `~/.omp/python-env`\n3. `python` or `python3` on PATH\n\nWhen a venv is selected, its bin/Scripts path is prepended to `PATH`.\n\nThe runner additionally receives `PYTHONUNBUFFERED=1` and `PYTHONIOENCODING=utf-8` so streamed output reaches the host promptly.\n\n## Tool availability and mode selection\n\n`eval.py` / `eval.js` (both default `true`) plus optional `PI_PY` override controls eval backend exposure:\n\n- Python backend only (`eval.py=true`, `eval.js=false`)\n- JavaScript backend only (`eval.py=false`, `eval.js=true`)\n- both backends\n\n`PI_PY` accepted values:\n\n- `0` / `bash` → JavaScript backend only\n- `1` / `py` → Python backend only\n- `mix` / `both` → both backends\n\nIf Python preflight fails and `eval.js` is enabled, `eval` remains available and dispatches to JavaScript unless `language: \"python\"` is explicitly requested.\n\n## Execution flow and cancellation/timeout\n\n### Tool-level timeout\n\n`eval` timeout is in seconds, default 30, clamped to `1..600`. The tool combines caller abort signal and timeout signal with `AbortSignal.any(...)`.\n\n### Kernel execution cancellation\n\nOn abort/timeout:\n\n- The host sends `kill(\"SIGINT\")` to the runner subprocess.\n- The runner's exec-time signal handler raises `KeyboardInterrupt` inside the user code.\n- Result includes `cancelled=true`; timeout path annotates output as `Command timed out after <n> seconds`.\n- Between requests the runner installs `SIG_IGN` for SIGINT so a stray cancel does not tear down the kernel.\n\nIf a second cancel is required (runner stuck in C code), the host escalates to `SIGTERM` and the session restarts on the next call.\n\n### stdin behavior\n\nInteractive stdin is not supported. The runner does not forward `input()` prompts; user code that calls `input()` blocks until cancellation.\n\n## Output capture and rendering\n\n### Captured output classes\n\nFrom runner frames:\n\n- `stdout` / `stderr` → plain text chunks\n- `display` / `result` → rich display handling (MIME bundle)\n- `error` → traceback text\n- `application/x-omp-status` MIME inside `display` → structured status events\n\nDisplay MIME precedence:\n\n1. `text/markdown`\n2. `text/plain`\n3. `text/html` (converted to basic markdown)\n\nAdditionally captured as structured outputs:\n\n- `application/json` → JSON tree data\n- `image/png` / `image/jpeg` → image payloads\n- `application/x-omp-status` → status events\n\n### Matplotlib\n\nThe runner sets `MPLBACKEND=Agg` as an environ default so figures render off-screen. After every cell, `pyplot.get_fignums()` is iterated; each figure is saved to PNG, emitted as an `image/png` display, and closed.\n\n### Storage and truncation\n\nOutput is streamed through `OutputSink` and may be persisted to artifact storage. Tool results can include truncation metadata and `artifact://<id>` for full output recovery.\n\n### Renderer behavior\n\n- Tool renderer (`eval.ts`):\n - shows code-cell blocks with per-cell status\n - collapsed preview defaults to 10 lines\n - supports expanded mode for full output and richer status detail\n- Interactive renderer (`eval-execution.ts`):\n - used for user-triggered Python execution in TUI\n - collapsed preview defaults to 20 lines\n - clamps very long individual lines to 4000 chars for display safety\n - shows cancellation/error/truncation notices\n\n## Operational troubleshooting\n\n- **Python backend not available** — Check `eval.py`, `PI_PY`, and that `python`/`python3` is on PATH. If preflight fails and `eval.js` is enabled, omit `language` or pass `language: \"js\"` to use JavaScript.\n- **No Python on PATH** — Install a system Python 3.8+ or place a venv at `~/.omp/python-env`. `omp setup python --check` reports the resolved interpreter.\n- **Execution hangs then times out** — Increase tool `timeout` (max 600s) if workload is legitimate. For stuck native code, cancellation triggers `SIGINT` first then escalates; the session restarts on the next request.\n- **stdin/input prompts in Python code** — `input()` is not supported; pass data programmatically.\n- **Working directory errors** — Tool validates `cwd` exists and is a directory before execution.\n\n## Relevant environment variables\n\n- `PI_PY` — tool exposure override\n- `PI_PYTHON_SKIP_CHECK=1` — bypass Python preflight/warm checks\n- `PI_PYTHON_INTEGRATION=1` — enable gated integration tests that spawn a real Python\n- `PI_PYTHON_IPC_TRACE=1` — log NDJSON frames exchanged with the runner subprocess\n- `PI_DEBUG_STARTUP=1` — emit startup-stage debug markers\n",
|
|
40
|
+
"python-repl.md": "# Eval Tool Python Backend\n\nThis document describes the Python execution stack in `packages/coding-agent`.\nIt covers tool behavior, runner lifecycle, environment handling, execution semantics, output rendering, supported magics, and operational failure modes.\n\n## Scope and Key Files\n\n- Tool surface: `src/tools/eval.ts`\n- Session/per-call kernel orchestration: `src/eval/py/executor.ts`\n- Subprocess kernel client: `src/eval/py/kernel.ts`\n- Python wrapper / NDJSON server: `src/eval/py/runner.py`\n- Prelude helpers loaded into every kernel: `src/eval/py/prelude.py`\n- MIME bundle renderer (text + structured outputs): `src/eval/py/display.ts`\n- Interactive-mode renderer for user-triggered Python runs: `src/modes/components/eval-execution.ts`\n- Runtime/env filtering and Python resolution: `src/eval/py/runtime.ts`\n\n## What eval's Python backend is\n\nThe `eval` tool executes one or more Python cells inside a long-lived `python3` subprocess that speaks NDJSON over stdin/stdout. No Jupyter, no kernel gateway, no extra pip dependencies — a vanilla Python 3.8+ interpreter is enough. Rich `display()` output (PIL, pandas, plotly, matplotlib figures) keeps working because the wrapper reimplements the MIME-bundle dispatch that IPython previously provided.\n\nTool params:\n\n```ts\n{\n cells: Array<{ code: string; title?: string }>;\n timeout?: number; // seconds, clamped to 1..600, default 30\n reset?: boolean; // reset selected runtime before the first cell only\n}\n```\n\nThe tool is `concurrency = \"exclusive\"` for a session, so calls do not overlap.\n\n## Kernel lifecycle\n\nEach kernel is a single Python subprocess: `python -u <runner.py>`. The runner is bundled with the host binary (Bun text import), written to `~/.omp/python-env`-adjacent tmp cache once per script-hash, and reused by every subsequent spawn.\n\nKernel startup sequence:\n\n1. Availability check (`checkPythonKernelAvailability`) — verifies that a Python interpreter resolves and runs.\n2. Spawn `python -u runner.py` with filtered env and `cwd`.\n3. Send an init request that runs `os.chdir(cwd)`, injects env entries, and adds `cwd` to `sys.path`.\n4. Execute `PYTHON_PRELUDE` (idempotent — only initializes once per process).\n\nKernel shutdown:\n\n- Send `{\"type\": \"exit\"}` over stdin.\n- Wait for process exit with `SHUTDOWN_GRACE_MS` budget.\n- Escalate to `SIGTERM` and finally `SIGKILL` if the process does not exit in time.\n\n## Wire protocol (NDJSON, host ↔ runner)\n\nOne JSON object per line, UTF-8, `\\n` terminated.\n\nHost → runner:\n\n```jsonc\n{\"id\": \"<reqId>\", \"code\": \"<source>\", \"silent\": false, \"storeHistory\": true}\n{\"type\": \"exit\"}\n```\n\nRunner → host:\n\n```jsonc\n{\"type\": \"started\", \"id\": \"<reqId>\"}\n{\"type\": \"stdout\", \"id\": \"<reqId>\", \"data\": \"...\"}\n{\"type\": \"stderr\", \"id\": \"<reqId>\", \"data\": \"...\"}\n{\"type\": \"display\", \"id\": \"<reqId>\", \"bundle\": {<mime>: <value>}}\n{\"type\": \"result\", \"id\": \"<reqId>\", \"bundle\": {<mime>: <value>}}\n{\"type\": \"error\", \"id\": \"<reqId>\", \"ename\": \"...\", \"evalue\": \"...\", \"traceback\": [\"...\"]}\n{\"type\": \"done\", \"id\": \"<reqId>\", \"status\": \"ok\"|\"error\", \"executionCount\": N, \"cancelled\": false}\n```\n\nStatus events the prelude emits (e.g. `_emit_status(\"find\", count=…)`) ship inside display bundles under `application/x-omp-status` so the existing TUI status renderer keeps working.\n\n## Magics\n\nThe runner's source transformer rewrites IPython-style magics to plain Python calls before parsing. Supported set:\n\n| Magic | Effect |\n| --- | --- |\n| `%pip <args>` | `python -m pip <args>` with live streaming output. Newly installed packages are evicted from `sys.modules` so the next `import` picks up the fresh install. |\n| `%cd <path>` | `os.chdir(path)` (with `~` expansion); emits status event. |\n| `%pwd` | Returns `os.getcwd()`. |\n| `%ls [path]` | Returns `sorted(os.listdir(path))`. |\n| `%env [KEY[=VAL]]` | List, read, or set env vars (matches prelude `env()` semantics). |\n| `%set_env KEY VALUE` | Set `os.environ[KEY]`. |\n| `%time <expr>` / `%timeit <expr>` | Time the expression; emits status event with elapsed ms. |\n| `%who` / `%whos` | List user-namespace names. |\n| `%reset` | Clear user globals and re-inject prelude. |\n| `%load <path>` | Read a file into a fresh cell and execute. |\n| `%run <path>` | `runpy.run_path` and merge globals back. |\n| `%%bash` / `%%sh` | Run the cell body via `bash`/`sh`. |\n| `%%capture [name]` | Run body with stdout/stderr captured into `name`. |\n| `%%timeit` | Time the cell body. |\n| `%%writefile <path>` | Write body to file. |\n| `!cmd` / `var = !cmd` | Run command via subprocess shell; returns an SList-style result with `.n` / `.s` helpers. |\n| `var = %name args` | Assignment forms work for line magics and `!cmd`. |\n\nUnknown magic names raise `NameError: UsageError: ...` inside the cell.\n\n## Session persistence semantics\n\n`python.kernelMode` controls retained kernel reuse:\n\n- `session` (default)\n - Reuses kernel sessions keyed by session file plus cwd when a session file exists; otherwise by cwd.\n - Execution is serialized per session via a queue.\n - Idle sessions are evicted after 5 minutes.\n - At most 4 sessions; oldest is evicted on overflow.\n - Heartbeat checks detect dead kernels.\n - Auto-restart allowed once; repeated crash ⇒ hard failure.\n- `per-call`\n - Spawns a fresh subprocess for each request.\n - Shuts the subprocess down after the request.\n - No cross-call state persistence.\n\n### Multi-cell behavior in a single tool call\n\nCells run sequentially in the same kernel instance for that tool call.\n\nIf an intermediate cell fails:\n\n- Earlier cell state remains in memory.\n- Tool returns a targeted error indicating which cell failed.\n- Later cells are not executed.\n\n`reset=true` only applies to the first cell execution in that call.\n\n## Environment filtering and runtime resolution\n\nEnvironment is filtered before launching the runner:\n\n- Allowlist includes core vars like `PATH`, `HOME`, locale vars, `VIRTUAL_ENV`, `PYTHONPATH`, etc.\n- Allow-prefixes: `LC_`, `XDG_`, `PI_`\n- Denylist strips common API keys (OpenAI/Anthropic/Gemini/etc.)\n\nRuntime selection order:\n\n1. Active/located venv (`VIRTUAL_ENV`, then `<cwd>/.venv`, `<cwd>/venv`)\n2. Managed venv at `~/.omp/python-env`\n3. `python` or `python3` on PATH\n\nWhen a venv is selected, its bin/Scripts path is prepended to `PATH`.\n\nThe runner additionally receives `PYTHONUNBUFFERED=1` and `PYTHONIOENCODING=utf-8` so streamed output reaches the host promptly.\n\n## Tool availability and mode selection\n\n`eval.py` / `eval.js` (both default `true`) plus optional `PI_PY` override controls eval backend exposure:\n\n- Python backend only (`eval.py=true`, `eval.js=false`)\n- JavaScript backend only (`eval.py=false`, `eval.js=true`)\n- both backends\n\n`PI_PY` accepted values:\n\n- `0` / `bash` → JavaScript backend only\n- `1` / `py` → Python backend only\n- `mix` / `both` → both backends\n\nIf Python preflight fails and `eval.js` is enabled, `eval` remains available and dispatches to JavaScript unless `language: \"python\"` is explicitly requested.\n\n## Execution flow and cancellation/timeout\n\n### Tool-level timeout\n\n`eval` timeout is in seconds, default 30, clamped to `1..600`. The tool combines caller abort signal and timeout signal with `AbortSignal.any(...)`.\n\n### Kernel execution cancellation\n\nOn abort/timeout:\n\n- The host sends `kill(\"SIGINT\")` to the runner subprocess.\n- The runner's exec-time signal handler raises `KeyboardInterrupt` inside the user code.\n- Result includes `cancelled=true`; timeout path annotates output as `Command timed out after <n> seconds`.\n- Between requests the runner installs `SIG_IGN` for SIGINT so a stray cancel does not tear down the kernel.\n\nIf a second cancel is required (runner stuck in C code), the host escalates to `SIGTERM` and the session restarts on the next call.\n\n### stdin behavior\n\nInteractive stdin is not supported. The runner does not forward `input()` prompts; user code that calls `input()` blocks until cancellation.\n\n## Output capture and rendering\n\n### Captured output classes\n\nFrom runner frames:\n\n- `stdout` / `stderr` → plain text chunks\n- `display` / `result` → rich display handling (MIME bundle)\n- `error` → traceback text\n- `application/x-omp-status` MIME inside `display` → structured status events\n\nDisplay MIME precedence:\n\n1. `text/markdown`\n2. `text/plain`\n3. `text/html` (converted to basic markdown)\n\nAdditionally captured as structured outputs:\n\n- `application/json` → JSON tree data\n- `image/png` / `image/jpeg` → image payloads\n- `application/x-omp-status` → status events\n\n### Matplotlib\n\nThe runner sets `MPLBACKEND=Agg` as an environ default so figures render off-screen. After every cell, `pyplot.get_fignums()` is iterated; each figure is saved to PNG, emitted as an `image/png` display, and closed.\n\n### Storage and truncation\n\nOutput is streamed through `OutputSink` and may be persisted to artifact storage. Tool results can include truncation metadata and `artifact://<id>` for full output recovery.\n\n### Renderer behavior\n\n- Tool renderer (`eval.ts`):\n - shows code-cell blocks with per-cell status\n - collapsed preview defaults to 10 lines\n - supports expanded mode for full output and richer status detail\n- Interactive renderer (`eval-execution.ts`):\n - used for user-triggered Python execution in TUI\n - collapsed preview defaults to 20 lines\n - clamps very long individual lines to 4000 chars for display safety\n - shows cancellation/error/truncation notices\n\n## Operational troubleshooting\n\n- **Python backend not available** — Check `eval.py`, `PI_PY`, and that `python`/`python3` is on PATH. If preflight fails and `eval.js` is enabled, omit `language` or pass `language: \"js\"` to use JavaScript.\n- **No Python on PATH** — Install a system Python 3.8+ or place a venv at `~/.omp/python-env`. `omp setup python --check` reports the resolved interpreter.\n- **Execution hangs then times out** — Increase tool `timeout` (max 600s) if workload is legitimate. For stuck native code, cancellation triggers `SIGINT` first then escalates; the session restarts on the next request.\n- **stdin/input prompts in Python code** — `input()` is not supported; pass data programmatically.\n- **Working directory errors** — Tool validates `cwd` exists and is a directory before execution.\n\n## Relevant environment variables\n\n- `PI_PY` — tool exposure override\n- `PI_PYTHON_SKIP_CHECK=1` — bypass Python preflight/warm checks\n- `PI_PYTHON_INTEGRATION=1` — enable gated integration tests that spawn a real Python\n- `PI_PYTHON_IPC_TRACE=1` — log NDJSON frames exchanged with the runner subprocess\n",
|
|
41
41
|
"render-mermaid.md": "# RenderMermaid\n\n`RenderMermaid` is an optional built-in tool that renders Mermaid source to terminal-friendly text.\n\n## Enable it\n\nDisabled by default. Turn it on in `/settings` under **Tools → Render Mermaid**, or in `~/.omp/agent/config.yml`:\n\n```yaml\nrenderMermaid:\n enabled: true\n```\n\n## What it does\n\n- Tool name: `render_mermaid`\n- Input: Mermaid source in the required `mermaid` field\n- Output: rendered ASCII/Unicode text, not SVG or PNG\n- Storage: when artifact storage is available, the full render is also saved as an `artifact://...`\n\nThere are no model-specific or environment-variable prerequisites. Once enabled, any model that can call built-in tools can use it.\n\n## Parameters\n\n```json\n{\n \"mermaid\": \"graph TD\\n A[Start] --> B[Stop]\",\n \"config\": {\n \"useAscii\": false,\n \"paddingX\": 2,\n \"paddingY\": 2,\n \"boxBorderPadding\": 0\n }\n}\n```\n\nAvailable `config` fields:\n\n- `useAscii` — `true` for plain ASCII, `false` for Unicode box-drawing characters (default and usually more readable)\n- `paddingX` — horizontal spacing between nodes\n- `paddingY` — vertical spacing between nodes\n- `boxBorderPadding` — inner padding inside node boxes\n\n## Current limitations\n\n`RenderMermaid` uses the `beautiful-mermaid` ASCII renderer. It works best for flowcharts and small diagrams.\n\nComplex sequence diagrams, especially with `alt` / `else` blocks, can become very wide in a terminal. That is current renderer behavior, not a provider or model configuration problem.\n\nIf a sequence diagram is hard to read:\n\n1. Keep Unicode output (`useAscii: false`)\n2. Reduce spacing with a tighter config such as `paddingX: 2`, `paddingY: 2`, `boxBorderPadding: 0`\n3. Prefer smaller sub-diagrams over one large sequence diagram\n4. Open the saved artifact if the inline preview is truncated in the TUI\n\n## Example\n\nInput:\n\n```mermaid\ngraph TD\n A[Start] --> B{Decision}\n B -->|Yes| C[Action]\n B -->|No| D[End]\n```\n\nTypical result:\n\n```text\n┌─────┐\n│Start│\n└─────┘\n │\n ▼\n┌────────┐\n│Decision│\n└────────┘\n```\n",
|
|
42
|
-
"resolve-tool-runtime.md": "# Resolve tool runtime internals\n\nThis document explains how preview/apply workflows are modeled in coding-agent and how built-in or custom tools can participate via the tool-choice queue and `pushPendingAction`.\n\n## Scope and key files\n\n- [`src/tools/resolve.ts`](../packages/coding-agent/src/tools/resolve.ts)\n- [`src/tools/ast-edit.ts`](../packages/coding-agent/src/tools/ast-edit.ts)\n- [`src/extensibility/custom-tools/types.ts`](../packages/coding-agent/src/extensibility/custom-tools/types.ts)\n- [`src/extensibility/custom-tools/loader.ts`](../packages/coding-agent/src/extensibility/custom-tools/loader.ts)\n- [`src/sdk.ts`](../packages/coding-agent/src/sdk.ts)\n\n## What `resolve` does\n\n`resolve` is a hidden tool that finalizes a pending preview action.\n\n- `action: \"apply\"` executes the queued action's `apply(reason)` callback and returns that result with resolve metadata.\n- `action: \"discard\"` invokes `reject(reason)` if provided; otherwise returns `Discarded: <label>. Reason: <reason>`.\n\nIf no pending action exists, `resolve` fails with:\n\n- `No pending action to resolve. Nothing to apply or discard.`\n\n## Pending actions use the tool-choice queue\n\nPreview producers call `queueResolveHandler(...)`, which pushes a one-shot forced `resolve` directive onto the session tool-choice queue and adds a `resolve-reminder` steering message.\n\nRuntime behavior:\n\n- the queued handler owns the pending `apply`/`reject` callbacks,\n- `resolve` looks up the current queue invoker with `session.peekQueueInvoker()`,\n- if the model rejects the forced tool choice, the queue directive is requeued,\n- `resolve` does not maintain a separate pending-action stack.\n\nMultiple pending previews therefore follow the active tool-choice queue ordering, not an independent pending-action store.\n\n## Built-in producer example (`ast_edit`)\n\n`ast_edit` previews structural replacements first. When the preview has replacements and is not applied yet, it queues a resolve handler that contains:\n\n- label (human-readable summary)\n- `sourceToolName` (`ast_edit`)\n- `apply(reason: string)` callback that reruns AST edit with `dryRun: false`\n\n`resolve(action=\"apply\", reason=\"...\")` passes `reason` into this callback.\n\n## Custom tools: `pushPendingAction`\n\nCustom tools can register resolve-compatible pending actions through `CustomToolAPI.pushPendingAction(...)`. The custom tool loader forwards these actions to `queueResolveHandler(...)` when that hook is available.\n\n`CustomToolPendingAction`:\n\n- `label: string` (required)\n- `apply(reason: string): Promise<AgentToolResult<unknown>>` (required) — invoked on apply; `reason` is the string passed to `resolve`\n- `reject?(reason: string): Promise<AgentToolResult<unknown> | undefined>` (optional) — invoked on discard; return value replaces the default \"Discarded\" message if provided\n- `details?: unknown` exists on the public custom-tool type but is not currently forwarded by the loader into resolve metadata\n- `sourceToolName?: string` (optional, defaults to `\"custom_tool\"`)\n\n### Minimal usage example\n\n```ts\nimport type { CustomToolFactory } from \"@oh-my-pi/pi-coding-agent\";\n\nconst factory: CustomToolFactory = (pi) => ({\n name: \"batch_rename_preview\",\n label: \"Batch Rename Preview\",\n description: \"Previews renames and defers commit to resolve\",\n parameters: pi.
|
|
43
|
-
"rpc.md": "# RPC Protocol Reference\n\nRPC mode runs the coding agent as a newline-delimited JSON protocol over stdio.\n\n- **stdin**: commands (`RpcCommand`), extension UI responses, and host-tool updates/results\n- **stdout**: a ready frame, command responses (`RpcResponse`), session/agent events, extension UI requests, host-tool requests/cancellations\n\nPrimary implementation:\n\n- `src/modes/rpc/rpc-mode.ts`\n- `src/modes/rpc/rpc-types.ts`\n- `src/session/agent-session.ts`\n- `packages/agent/src/agent.ts`\n- `packages/agent/src/agent-loop.ts`\n\n## Startup\n\n```bash\nomp --mode rpc [regular CLI options]\n```\n\nBehavior notes:\n\n- `@file` CLI arguments are rejected in RPC mode.\n- RPC mode disables automatic session title generation by default to avoid an extra model call.\n- RPC mode resets workflow-altering `todo.*`, `task.*`, `async.*`, and `bash.autoBackground.*` settings to their built-in defaults instead of inheriting user overrides.\n- The process reads stdin as JSONL (`readJsonl(Bun.stdin.stream())`).\n- At startup it writes `{ \"type\": \"ready\" }` before processing commands.\n- When stdin closes, pending host-tool calls are rejected and the process exits with code `0`.\n- Responses/events are written as one JSON object per line.\n\n## Transport and Framing\n\nEach frame is a single JSON object followed by `\\n`.\n\nThere is no envelope beyond the object shape itself.\n\n### Outbound frame categories (stdout)\n\n1. Ready frame (`{ type: \"ready\" }`)\n2. `RpcResponse` (`{ type: \"response\", ... }`)\n3. `AgentSessionEvent` objects (`agent_start`, `message_update`, etc.)\n4. `RpcExtensionUIRequest` (`{ type: \"extension_ui_request\", ... }`)\n5. Host tool requests/cancellations (`host_tool_call`, `host_tool_cancel`)\n6. Extension errors (`{ type: \"extension_error\", extensionPath, event, error }`)\n\n### Inbound frame categories (stdin)\n\n1. `RpcCommand`\n2. `RpcExtensionUIResponse` (`{ type: \"extension_ui_response\", ... }`)\n3. Host tool updates/results (`host_tool_update`, `host_tool_result`)\n\n## Request/Response Correlation\n\nAll commands accept optional `id?: string`.\n\n- If provided, normal command responses echo the same `id`.\n- `RpcClient` relies on this for pending-request resolution.\n\nImportant edge behavior from runtime:\n\n- Unknown command responses are emitted with `id: undefined` (even if the request had an `id`).\n- Parse/handler exceptions in the input loop emit `command: \"parse\"` with `id: undefined`.\n- `prompt` and `abort_and_prompt` return immediate success, then may emit a later error response with the **same** id if async prompt scheduling fails.\n\n## Command Schema (canonical)\n\n`RpcCommand` is defined in `src/modes/rpc/rpc-types.ts`:\n\n### Prompting\n\n- `{ id?, type: \"prompt\", message: string, images?: ImageContent[], streamingBehavior?: \"steer\" | \"followUp\" }`\n- `{ id?, type: \"steer\", message: string, images?: ImageContent[] }`\n- `{ id?, type: \"follow_up\", message: string, images?: ImageContent[] }`\n- `{ id?, type: \"abort\" }`\n- `{ id?, type: \"abort_and_prompt\", message: string, images?: ImageContent[] }`\n- `{ id?, type: \"new_session\", parentSession?: string }`\n\n### State\n\n- `{ id?, type: \"get_state\" }`\n- `{ id?, type: \"set_todos\", phases: TodoPhase[] }`\n- `{ id?, type: \"set_host_tools\", tools: RpcHostToolDefinition[] }`\n\n### Model\n\n- `{ id?, type: \"set_model\", provider: string, modelId: string }`\n- `{ id?, type: \"cycle_model\" }`\n- `{ id?, type: \"get_available_models\" }`\n\n### Thinking\n\n- `{ id?, type: \"set_thinking_level\", level: ThinkingLevel }`\n- `{ id?, type: \"cycle_thinking_level\" }`\n\n### Queue modes\n\n- `{ id?, type: \"set_steering_mode\", mode: \"all\" | \"one-at-a-time\" }`\n- `{ id?, type: \"set_follow_up_mode\", mode: \"all\" | \"one-at-a-time\" }`\n- `{ id?, type: \"set_interrupt_mode\", mode: \"immediate\" | \"wait\" }`\n\n### Compaction\n\n- `{ id?, type: \"compact\", customInstructions?: string }`\n- `{ id?, type: \"set_auto_compaction\", enabled: boolean }`\n\n### Retry\n\n- `{ id?, type: \"set_auto_retry\", enabled: boolean }`\n- `{ id?, type: \"abort_retry\" }`\n\n### Bash\n\n- `{ id?, type: \"bash\", command: string }`\n- `{ id?, type: \"abort_bash\" }`\n\n### Session\n\n- `{ id?, type: \"get_session_stats\" }`\n- `{ id?, type: \"export_html\", outputPath?: string }`\n- `{ id?, type: \"switch_session\", sessionPath: string }`\n- `{ id?, type: \"branch\", entryId: string }`\n- `{ id?, type: \"get_branch_messages\" }`\n- `{ id?, type: \"get_last_assistant_text\" }`\n- `{ id?, type: \"set_session_name\", name: string }`\n\n### Messages\n\n- `{ id?, type: \"get_messages\" }`\n\n## Response Schema\n\nAll command results use `RpcResponse`:\n\n- Success: `{ id?, type: \"response\", command: <command>, success: true, data?: ... }`\n- Failure: `{ id?, type: \"response\", command: string, success: false, error: string }`\n\nData payloads are command-specific and defined in `rpc-types.ts`.\n\n### `get_state` payload\n\n```json\n{\n \"model\": { \"provider\": \"...\", \"id\": \"...\" },\n \"thinkingLevel\": \"off|minimal|low|medium|high|xhigh\",\n \"isStreaming\": false,\n \"isCompacting\": false,\n \"steeringMode\": \"all|one-at-a-time\",\n \"followUpMode\": \"all|one-at-a-time\",\n \"interruptMode\": \"immediate|wait\",\n \"sessionFile\": \"...\",\n \"sessionId\": \"...\",\n \"sessionName\": \"...\",\n \"autoCompactionEnabled\": true,\n \"messageCount\": 0,\n \"queuedMessageCount\": 0,\n \"todoPhases\": [\n {\n \"id\": \"phase-1\",\n \"name\": \"Todos\",\n \"tasks\": [\n {\n \"id\": \"task-1\",\n \"content\": \"Map the tool surface\",\n \"status\": \"in_progress\"\n }\n ]\n }\n ],\n \"systemPrompt\": \"...\",\n \"dumpTools\": [\n {\n \"name\": \"read\",\n \"description\": \"Read files and URLs\",\n \"parameters\": {}\n }\n ]\n}\n```\n\n### `set_todos` payload\n\nReplaces the in-memory todo state for the current session and returns the normalized phase list:\n\n```json\n{\n \"id\": \"req_2\",\n \"type\": \"set_todos\",\n \"phases\": [\n {\n \"id\": \"phase-1\",\n \"name\": \"Evaluation\",\n \"tasks\": [\n {\n \"id\": \"task-1\",\n \"content\": \"Map the read tool surface\",\n \"status\": \"in_progress\"\n },\n {\n \"id\": \"task-2\",\n \"content\": \"Exercise edit operations\",\n \"status\": \"pending\"\n }\n ]\n }\n ]\n}\n```\n\nThis is useful for hosts that want to pre-seed a plan before the first prompt.\n\n### `set_host_tools` payload\n\nReplaces the current set of host-owned tools that the RPC server may call back\ninto over stdio:\n\n```json\n{\n \"id\": \"req_3\",\n \"type\": \"set_host_tools\",\n \"tools\": [\n {\n \"name\": \"echo_host\",\n \"label\": \"Echo Host\",\n \"description\": \"Echo a value from the embedding host\",\n \"parameters\": {\n \"type\": \"object\",\n \"properties\": {\n \"message\": { \"type\": \"string\" }\n },\n \"required\": [\"message\"],\n \"additionalProperties\": false\n }\n }\n ]\n}\n```\n\nThe response payload is:\n\n```json\n{\n \"toolNames\": [\"echo_host\"]\n}\n```\n\nThese tools are added to the active session tool registry before the next model\ncall. Re-sending `set_host_tools` replaces the previous host-owned set.\n\n## Event Stream Schema\n\nRPC mode forwards `AgentSessionEvent` objects from `AgentSession.subscribe(...)`.\n\nCommon event types:\n\n- `agent_start`, `agent_end`\n- `turn_start`, `turn_end`\n- `message_start`, `message_update`, `message_end`\n- `tool_execution_start`, `tool_execution_update`, `tool_execution_end`\n- `auto_compaction_start`, `auto_compaction_end`\n- `auto_retry_start`, `auto_retry_end`\n- `ttsr_triggered`\n- `todo_reminder`\n- `todo_auto_clear`\n\nExtension runner errors are emitted separately as:\n\n```json\n{\n \"type\": \"extension_error\",\n \"extensionPath\": \"...\",\n \"event\": \"...\",\n \"error\": \"...\"\n}\n```\n\n`message_update` includes streaming deltas in `assistantMessageEvent` (text/thinking/toolcall deltas).\n\n## Prompt/Queue Concurrency and Ordering\n\nThis is the most important operational behavior.\n\n### Immediate ack vs completion\n\n`prompt` and `abort_and_prompt` are **acknowledged immediately**:\n\n```json\n{ \"id\": \"req_1\", \"type\": \"response\", \"command\": \"prompt\", \"success\": true }\n```\n\nThat means:\n\n- command acceptance != run completion\n- final completion is observed via `agent_end`\n\n### While streaming\n\n`AgentSession.prompt()` requires `streamingBehavior` during active streaming:\n\n- `\"steer\"` => queued steering message (interrupt path)\n- `\"followUp\"` => queued follow-up message (post-turn path)\n\nIf omitted during streaming, prompt fails.\n\n### Queue defaults\n\nFrom `packages/agent/src/agent.ts` defaults:\n\n- `steeringMode`: `\"one-at-a-time\"`\n- `followUpMode`: `\"one-at-a-time\"`\n- `interruptMode`: `\"immediate\"`\n\n### Mode semantics\n\n- `set_steering_mode` / `set_follow_up_mode`\n - `\"one-at-a-time\"`: dequeue one queued message per turn\n - `\"all\"`: dequeue entire queue at once\n- `set_interrupt_mode`\n - `\"immediate\"`: tool execution checks steering between tool calls; pending steering can abort remaining tool calls in the turn\n - `\"wait\"`: defer steering until turn completion\n\n## Extension UI Sub-Protocol\n\nExtensions in RPC mode use request/response UI frames.\n\n### Outbound request\n\n`RpcExtensionUIRequest` (`type: \"extension_ui_request\"`) methods:\n\n- `select`, `confirm`, `input`, `editor`, `cancel`\n- `notify`, `setStatus`, `setWidget`, `setTitle`, `set_editor_text`\n\nRuntime note:\n\n- Automatic session title generation is disabled in RPC mode, and `setTitle` UI\n requests are also suppressed by default because most hosts do not have a\n meaningful terminal-title surface. Set `PI_RPC_EMIT_TITLE=1` to opt back in to\n the UI event only.\n\nExample:\n\n```json\n{\n \"type\": \"extension_ui_request\",\n \"id\": \"123\",\n \"method\": \"confirm\",\n \"title\": \"Confirm\",\n \"message\": \"Continue?\",\n \"timeout\": 30000\n}\n```\n\n### Inbound response\n\n`RpcExtensionUIResponse` (`type: \"extension_ui_response\"`):\n\n- `{ type: \"extension_ui_response\", id: string, value: string }`\n- `{ type: \"extension_ui_response\", id: string, confirmed: boolean }`\n- `{ type: \"extension_ui_response\", id: string, cancelled: true, timedOut?: boolean }`\n\nIf a dialog has a timeout, RPC mode resolves to a default value when timeout/abort fires.\n\n## Host Tool Sub-Protocol\n\nRPC hosts can expose custom tools to the agent by sending `set_host_tools`, then\nserving execution requests over the same transport.\n\n### Outbound request\n\nWhen the agent wants the host to execute one of those tools, RPC mode emits:\n\n```json\n{\n \"type\": \"host_tool_call\",\n \"id\": \"host_1\",\n \"toolCallId\": \"toolu_123\",\n \"toolName\": \"echo_host\",\n \"arguments\": { \"message\": \"hello\" }\n}\n```\n\nIf the tool execution is later aborted, RPC mode emits:\n\n```json\n{\n \"type\": \"host_tool_cancel\",\n \"id\": \"host_cancel_1\",\n \"targetId\": \"host_1\"\n}\n```\n\n### Inbound updates and completion\n\nHosts can optionally stream progress:\n\n```json\n{\n \"type\": \"host_tool_update\",\n \"id\": \"host_1\",\n \"partialResult\": {\n \"content\": [{ \"type\": \"text\", \"text\": \"working\" }]\n }\n}\n```\n\nCompletion uses:\n\n```json\n{\n \"type\": \"host_tool_result\",\n \"id\": \"host_1\",\n \"result\": {\n \"content\": [{ \"type\": \"text\", \"text\": \"done\" }]\n }\n}\n```\n\nSet top-level `isError: true` on `host_tool_result` to reject the pending host tool call and surface the returned text content as a tool error.\n\n## Error Model and Recoverability\n\n### Command-level failures\n\nFailures are `success: false` with string `error`.\n\n```json\n{\n \"id\": \"req_2\",\n \"type\": \"response\",\n \"command\": \"set_model\",\n \"success\": false,\n \"error\": \"Model not found: provider/model\"\n}\n```\n\n### Recoverability expectations\n\n- Most command failures are recoverable; process remains alive.\n- Malformed JSONL / parse-loop exceptions emit a `parse` error response and continue reading subsequent lines.\n- Empty `set_session_name` is rejected (`Session name cannot be empty`).\n- Extension UI responses with unknown `id` are ignored.\n- Process termination conditions are stdin close or explicit extension-triggered shutdown after the current command.\n\n## Compact Command Flows\n\n### 1) Prompt and stream\n\nstdin:\n\n```json\n{ \"id\": \"req_1\", \"type\": \"prompt\", \"message\": \"Summarize this repo\" }\n```\n\nstdout sequence (typical):\n\n```json\n{ \"id\": \"req_1\", \"type\": \"response\", \"command\": \"prompt\", \"success\": true }\n{ \"type\": \"agent_start\" }\n{ \"type\": \"message_update\", \"assistantMessageEvent\": { \"type\": \"text_delta\", \"delta\": \"...\" }, \"message\": { \"role\": \"assistant\", \"content\": [] } }\n{ \"type\": \"agent_end\", \"messages\": [] }\n```\n\n### 2) Prompt during streaming with explicit queue policy\n\nstdin:\n\n```json\n{\n \"id\": \"req_2\",\n \"type\": \"prompt\",\n \"message\": \"Also include risks\",\n \"streamingBehavior\": \"followUp\"\n}\n```\n\n### 3) Inspect and tune queue behavior\n\nstdin:\n\n```json\n{ \"id\": \"q1\", \"type\": \"get_state\" }\n{ \"id\": \"q2\", \"type\": \"set_steering_mode\", \"mode\": \"all\" }\n{ \"id\": \"q3\", \"type\": \"set_interrupt_mode\", \"mode\": \"wait\" }\n```\n\n### 4) Extension UI round trip\n\nstdout:\n\n```json\n{\n \"type\": \"extension_ui_request\",\n \"id\": \"ui_7\",\n \"method\": \"input\",\n \"title\": \"Branch name\",\n \"placeholder\": \"feature/...\"\n}\n```\n\nstdin:\n\n```json\n{ \"type\": \"extension_ui_response\", \"id\": \"ui_7\", \"value\": \"feature/rpc-host\" }\n```\n\n## Notes on `RpcClient` helper\n\n`src/modes/rpc/rpc-client.ts` is a convenience wrapper, not the protocol definition.\n\nCurrent helper characteristics:\n\n- Spawns `bun <cliPath> --mode rpc`\n- Correlates responses by generated `req_<n>` ids\n- Dispatches only recognized `AgentEvent` types to listeners\n- Supports host-owned custom tools via `setCustomTools()` and automatic handling of `host_tool_call` / `host_tool_cancel`\n- Does **not** expose helper methods for every protocol command (for example, `set_interrupt_mode` and `set_session_name` are in protocol types but not wrapped as dedicated methods)\n\nUse raw protocol frames if you need complete surface coverage.\n",
|
|
42
|
+
"resolve-tool-runtime.md": "# Resolve tool runtime internals\n\nThis document explains how preview/apply workflows are modeled in coding-agent and how built-in or custom tools can participate via the tool-choice queue and `pushPendingAction`.\n\n## Scope and key files\n\n- [`src/tools/resolve.ts`](../packages/coding-agent/src/tools/resolve.ts)\n- [`src/tools/ast-edit.ts`](../packages/coding-agent/src/tools/ast-edit.ts)\n- [`src/extensibility/custom-tools/types.ts`](../packages/coding-agent/src/extensibility/custom-tools/types.ts)\n- [`src/extensibility/custom-tools/loader.ts`](../packages/coding-agent/src/extensibility/custom-tools/loader.ts)\n- [`src/sdk.ts`](../packages/coding-agent/src/sdk.ts)\n\n## What `resolve` does\n\n`resolve` is a hidden tool that finalizes a pending preview action.\n\n- `action: \"apply\"` executes the queued action's `apply(reason)` callback and returns that result with resolve metadata.\n- `action: \"discard\"` invokes `reject(reason)` if provided; otherwise returns `Discarded: <label>. Reason: <reason>`.\n\nIf no pending action exists, `resolve` fails with:\n\n- `No pending action to resolve. Nothing to apply or discard.`\n\n## Pending actions use the tool-choice queue\n\nPreview producers call `queueResolveHandler(...)`, which pushes a one-shot forced `resolve` directive onto the session tool-choice queue and adds a `resolve-reminder` steering message.\n\nRuntime behavior:\n\n- the queued handler owns the pending `apply`/`reject` callbacks,\n- `resolve` looks up the current queue invoker with `session.peekQueueInvoker()`,\n- if the model rejects the forced tool choice, the queue directive is requeued,\n- `resolve` does not maintain a separate pending-action stack.\n\nMultiple pending previews therefore follow the active tool-choice queue ordering, not an independent pending-action store.\n\n## Built-in producer example (`ast_edit`)\n\n`ast_edit` previews structural replacements first. When the preview has replacements and is not applied yet, it queues a resolve handler that contains:\n\n- label (human-readable summary)\n- `sourceToolName` (`ast_edit`)\n- `apply(reason: string)` callback that reruns AST edit with `dryRun: false`\n\n`resolve(action=\"apply\", reason=\"...\")` passes `reason` into this callback.\n\n## Custom tools: `pushPendingAction`\n\nCustom tools can register resolve-compatible pending actions through `CustomToolAPI.pushPendingAction(...)`. The custom tool loader forwards these actions to `queueResolveHandler(...)` when that hook is available.\n\n`CustomToolPendingAction`:\n\n- `label: string` (required)\n- `apply(reason: string): Promise<AgentToolResult<unknown>>` (required) — invoked on apply; `reason` is the string passed to `resolve`\n- `reject?(reason: string): Promise<AgentToolResult<unknown> | undefined>` (optional) — invoked on discard; return value replaces the default \"Discarded\" message if provided\n- `details?: unknown` exists on the public custom-tool type but is not currently forwarded by the loader into resolve metadata\n- `sourceToolName?: string` (optional, defaults to `\"custom_tool\"`)\n\n### Minimal usage example\n\n```ts\nimport type { CustomToolFactory } from \"@oh-my-pi/pi-coding-agent\";\n\nconst factory: CustomToolFactory = (pi) => ({\n name: \"batch_rename_preview\",\n label: \"Batch Rename Preview\",\n description: \"Previews renames and defers commit to resolve\",\n parameters: pi.zod.object({\n files: pi.zod.array(pi.zod.string()),\n }),\n\n async execute(_toolCallId, params) {\n const previewSummary = `Prepared rename plan for ${params.files.length} files`;\n\n pi.pushPendingAction({\n label: `Batch rename: ${params.files.length} files`,\n sourceToolName: \"batch_rename_preview\",\n apply: async (reason) => {\n // apply writes here\n return {\n content: [\n { type: \"text\", text: `Applied batch rename. Reason: ${reason}` },\n ],\n };\n },\n reject: async (reason) => {\n // optional: cleanup or notify on discard\n return {\n content: [\n { type: \"text\", text: `Discarded batch rename. Reason: ${reason}` },\n ],\n };\n },\n });\n\n return {\n content: [\n {\n type: \"text\",\n text: `${previewSummary}. Call resolve to apply or discard.`,\n },\n ],\n };\n },\n});\n\nexport default factory;\n```\n\n## Runtime availability and failures\n\n`pushPendingAction` is wired by the custom tool loader through the active session's resolve queue hook.\n\nIf the runtime did not provide the resolve queue hook, `pushPendingAction` throws:\n\n- `Pending action store unavailable for custom tools in this runtime.`\n\n## Tool-choice behavior\n\nWhen `queueResolveHandler(...)` registers a preview, the agent runtime forces a one-shot `resolve` tool choice so pending previews are explicitly finalized before normal tool flow continues.\n\n## Developer guidance\n\n- Use pending actions only for destructive or high-impact operations that should support explicit apply/discard.\n- Keep `label` concise and specific; it is shown in resolve renderer output.\n- Ensure `apply(reason)` is deterministic and idempotent enough for one-shot execution; `reason` is informational and should not change behavior.\n- Implement `reject(reason)` when the discard needs cleanup (temp state, locks, notifications); omit it for stateless previews where the default message suffices.\n- If your tool can stage multiple previews, remember they are mediated by the tool-choice queue rather than a separate pending-action stack.\n",
|
|
43
|
+
"rpc.md": "# RPC Protocol Reference\n\nRPC mode runs the coding agent as a newline-delimited JSON protocol over stdio.\n\n- **stdin**: commands (`RpcCommand`), extension UI responses, and host-tool updates/results\n- **stdout**: a ready frame, command responses (`RpcResponse`), session/agent events, extension UI requests, host-tool requests/cancellations\n\nPrimary implementation:\n\n- `src/modes/rpc/rpc-mode.ts`\n- `src/modes/rpc/rpc-types.ts`\n- `src/session/agent-session.ts`\n- `packages/agent/src/agent.ts`\n- `packages/agent/src/agent-loop.ts`\n\n## Startup\n\n```bash\nomp --mode rpc [regular CLI options]\n```\n\nBehavior notes:\n\n- `@file` CLI arguments are rejected in RPC mode.\n- RPC mode disables automatic session title generation by default to avoid an extra model call.\n- RPC mode resets workflow-altering `todo.*`, `task.*`, `async.*`, and `bash.autoBackground.*` settings to their built-in defaults instead of inheriting user overrides.\n- The process reads stdin as JSONL (`readJsonl(Bun.stdin.stream())`).\n- At startup it writes `{ \"type\": \"ready\" }` before processing commands.\n- When stdin closes, pending host-tool calls are rejected and the process exits with code `0`.\n- Responses/events are written as one JSON object per line.\n\n## Transport and Framing\n\nEach frame is a single JSON object followed by `\\n`.\n\nThere is no envelope beyond the object shape itself.\n\n### Outbound frame categories (stdout)\n\n1. Ready frame (`{ type: \"ready\" }`)\n2. `RpcResponse` (`{ type: \"response\", ... }`)\n3. `AgentSessionEvent` objects (`agent_start`, `message_update`, etc.)\n4. `RpcExtensionUIRequest` (`{ type: \"extension_ui_request\", ... }`)\n5. Host tool requests/cancellations (`host_tool_call`, `host_tool_cancel`)\n6. Host URI requests/cancellations (`host_uri_request`, `host_uri_cancel`)\n7. Extension errors (`{ type: \"extension_error\", extensionPath, event, error }`)\n\n### Inbound frame categories (stdin)\n\n1. `RpcCommand`\n2. `RpcExtensionUIResponse` (`{ type: \"extension_ui_response\", ... }`)\n3. Host tool updates/results (`host_tool_update`, `host_tool_result`)\n4. Host URI results (`host_uri_result`)\n\n## Request/Response Correlation\n\nAll commands accept optional `id?: string`.\n\n- If provided, normal command responses echo the same `id`.\n- `RpcClient` relies on this for pending-request resolution.\n\nImportant edge behavior from runtime:\n\n- Unknown command responses are emitted with `id: undefined` (even if the request had an `id`).\n- Parse/handler exceptions in the input loop emit `command: \"parse\"` with `id: undefined`.\n- `prompt` and `abort_and_prompt` return immediate success, then may emit a later error response with the **same** id if async prompt scheduling fails.\n\n## Command Schema (canonical)\n\n`RpcCommand` is defined in `src/modes/rpc/rpc-types.ts`:\n\n### Prompting\n\n- `{ id?, type: \"prompt\", message: string, images?: ImageContent[], streamingBehavior?: \"steer\" | \"followUp\" }`\n- `{ id?, type: \"steer\", message: string, images?: ImageContent[] }`\n- `{ id?, type: \"follow_up\", message: string, images?: ImageContent[] }`\n- `{ id?, type: \"abort\" }`\n- `{ id?, type: \"abort_and_prompt\", message: string, images?: ImageContent[] }`\n- `{ id?, type: \"new_session\", parentSession?: string }`\n\n### State\n\n- `{ id?, type: \"get_state\" }`\n- `{ id?, type: \"set_todos\", phases: TodoPhase[] }`\n- `{ id?, type: \"set_host_tools\", tools: RpcHostToolDefinition[] }`\n- `{ id?, type: \"set_host_uri_schemes\", schemes: RpcHostUriSchemeDefinition[] }`\n\n### Model\n\n- `{ id?, type: \"set_model\", provider: string, modelId: string }`\n- `{ id?, type: \"cycle_model\" }`\n- `{ id?, type: \"get_available_models\" }`\n\n### Thinking\n\n- `{ id?, type: \"set_thinking_level\", level: ThinkingLevel }`\n- `{ id?, type: \"cycle_thinking_level\" }`\n\n### Queue modes\n\n- `{ id?, type: \"set_steering_mode\", mode: \"all\" | \"one-at-a-time\" }`\n- `{ id?, type: \"set_follow_up_mode\", mode: \"all\" | \"one-at-a-time\" }`\n- `{ id?, type: \"set_interrupt_mode\", mode: \"immediate\" | \"wait\" }`\n\n### Compaction\n\n- `{ id?, type: \"compact\", customInstructions?: string }`\n- `{ id?, type: \"set_auto_compaction\", enabled: boolean }`\n\n### Retry\n\n- `{ id?, type: \"set_auto_retry\", enabled: boolean }`\n- `{ id?, type: \"abort_retry\" }`\n\n### Bash\n\n- `{ id?, type: \"bash\", command: string }`\n- `{ id?, type: \"abort_bash\" }`\n\n### Session\n\n- `{ id?, type: \"get_session_stats\" }`\n- `{ id?, type: \"export_html\", outputPath?: string }`\n- `{ id?, type: \"switch_session\", sessionPath: string }`\n- `{ id?, type: \"branch\", entryId: string }`\n- `{ id?, type: \"get_branch_messages\" }`\n- `{ id?, type: \"get_last_assistant_text\" }`\n- `{ id?, type: \"set_session_name\", name: string }`\n\n### Messages\n\n- `{ id?, type: \"get_messages\" }`\n\n## Response Schema\n\nAll command results use `RpcResponse`:\n\n- Success: `{ id?, type: \"response\", command: <command>, success: true, data?: ... }`\n- Failure: `{ id?, type: \"response\", command: string, success: false, error: string }`\n\nData payloads are command-specific and defined in `rpc-types.ts`.\n\n### `get_state` payload\n\n```json\n{\n \"model\": { \"provider\": \"...\", \"id\": \"...\" },\n \"thinkingLevel\": \"off|minimal|low|medium|high|xhigh\",\n \"isStreaming\": false,\n \"isCompacting\": false,\n \"steeringMode\": \"all|one-at-a-time\",\n \"followUpMode\": \"all|one-at-a-time\",\n \"interruptMode\": \"immediate|wait\",\n \"sessionFile\": \"...\",\n \"sessionId\": \"...\",\n \"sessionName\": \"...\",\n \"autoCompactionEnabled\": true,\n \"messageCount\": 0,\n \"queuedMessageCount\": 0,\n \"todoPhases\": [\n {\n \"id\": \"phase-1\",\n \"name\": \"Todos\",\n \"tasks\": [\n {\n \"id\": \"task-1\",\n \"content\": \"Map the tool surface\",\n \"status\": \"in_progress\"\n }\n ]\n }\n ],\n \"systemPrompt\": \"...\",\n \"dumpTools\": [\n {\n \"name\": \"read\",\n \"description\": \"Read files and URLs\",\n \"parameters\": {}\n }\n ]\n}\n```\n\n### `set_todos` payload\n\nReplaces the in-memory todo state for the current session and returns the normalized phase list:\n\n```json\n{\n \"id\": \"req_2\",\n \"type\": \"set_todos\",\n \"phases\": [\n {\n \"id\": \"phase-1\",\n \"name\": \"Evaluation\",\n \"tasks\": [\n {\n \"id\": \"task-1\",\n \"content\": \"Map the read tool surface\",\n \"status\": \"in_progress\"\n },\n {\n \"id\": \"task-2\",\n \"content\": \"Exercise edit operations\",\n \"status\": \"pending\"\n }\n ]\n }\n ]\n}\n```\n\nThis is useful for hosts that want to pre-seed a plan before the first prompt.\n\n### `set_host_tools` payload\n\nReplaces the current set of host-owned tools that the RPC server may call back\ninto over stdio:\n\n```json\n{\n \"id\": \"req_3\",\n \"type\": \"set_host_tools\",\n \"tools\": [\n {\n \"name\": \"echo_host\",\n \"label\": \"Echo Host\",\n \"description\": \"Echo a value from the embedding host\",\n \"parameters\": {\n \"type\": \"object\",\n \"properties\": {\n \"message\": { \"type\": \"string\" }\n },\n \"required\": [\"message\"],\n \"additionalProperties\": false\n }\n }\n ]\n}\n```\n\nThe response payload is:\n\n```json\n{\n \"toolNames\": [\"echo_host\"]\n}\n```\n\nThese tools are added to the active session tool registry before the next model\ncall. Re-sending `set_host_tools` replaces the previous host-owned set.\n\n### `set_host_uri_schemes` payload\n\nReplaces the current set of host-owned URL schemes the RPC server should\ndispatch reads/writes through:\n\n```json\n{\n \"id\": \"req_4\",\n \"type\": \"set_host_uri_schemes\",\n \"schemes\": [\n {\n \"scheme\": \"db\",\n \"description\": \"Virtual db row files\",\n \"writable\": true,\n \"immutable\": false\n }\n ]\n}\n```\n\nThe response payload is:\n\n```json\n{\n \"schemes\": [\"db\"]\n}\n```\n\nSchemes are case-insensitive on the wire and normalized to lowercase before\nthe response is sent. Re-sending `set_host_uri_schemes` replaces the entire\nprevious set — schemes missing from the new list are unregistered.\n\n## Event Stream Schema\n\nRPC mode forwards `AgentSessionEvent` objects from `AgentSession.subscribe(...)`.\n\nCommon event types:\n\n- `agent_start`, `agent_end`\n- `turn_start`, `turn_end`\n- `message_start`, `message_update`, `message_end`\n- `tool_execution_start`, `tool_execution_update`, `tool_execution_end`\n- `auto_compaction_start`, `auto_compaction_end`\n- `auto_retry_start`, `auto_retry_end`\n- `ttsr_triggered`\n- `todo_reminder`\n- `todo_auto_clear`\n\nExtension runner errors are emitted separately as:\n\n```json\n{\n \"type\": \"extension_error\",\n \"extensionPath\": \"...\",\n \"event\": \"...\",\n \"error\": \"...\"\n}\n```\n\n`message_update` includes streaming deltas in `assistantMessageEvent` (text/thinking/toolcall deltas).\n\n## Prompt/Queue Concurrency and Ordering\n\nThis is the most important operational behavior.\n\n### Immediate ack vs completion\n\n`prompt` and `abort_and_prompt` are **acknowledged immediately**:\n\n```json\n{ \"id\": \"req_1\", \"type\": \"response\", \"command\": \"prompt\", \"success\": true }\n```\n\nThat means:\n\n- command acceptance != run completion\n- final completion is observed via `agent_end`\n\n### While streaming\n\n`AgentSession.prompt()` requires `streamingBehavior` during active streaming:\n\n- `\"steer\"` => queued steering message (interrupt path)\n- `\"followUp\"` => queued follow-up message (post-turn path)\n\nIf omitted during streaming, prompt fails.\n\n### Queue defaults\n\nFrom `packages/agent/src/agent.ts` defaults:\n\n- `steeringMode`: `\"one-at-a-time\"`\n- `followUpMode`: `\"one-at-a-time\"`\n- `interruptMode`: `\"immediate\"`\n\n### Mode semantics\n\n- `set_steering_mode` / `set_follow_up_mode`\n - `\"one-at-a-time\"`: dequeue one queued message per turn\n - `\"all\"`: dequeue entire queue at once\n- `set_interrupt_mode`\n - `\"immediate\"`: tool execution checks steering between tool calls; pending steering can abort remaining tool calls in the turn\n - `\"wait\"`: defer steering until turn completion\n\n## Extension UI Sub-Protocol\n\nExtensions in RPC mode use request/response UI frames.\n\n### Outbound request\n\n`RpcExtensionUIRequest` (`type: \"extension_ui_request\"`) methods:\n\n- `select`, `confirm`, `input`, `editor`, `cancel`\n- `notify`, `setStatus`, `setWidget`, `setTitle`, `set_editor_text`\n\nRuntime note:\n\n- Automatic session title generation is disabled in RPC mode, and `setTitle` UI\n requests are also suppressed by default because most hosts do not have a\n meaningful terminal-title surface. Set `PI_RPC_EMIT_TITLE=1` to opt back in to\n the UI event only.\n\nExample:\n\n```json\n{\n \"type\": \"extension_ui_request\",\n \"id\": \"123\",\n \"method\": \"confirm\",\n \"title\": \"Confirm\",\n \"message\": \"Continue?\",\n \"timeout\": 30000\n}\n```\n\n### Inbound response\n\n`RpcExtensionUIResponse` (`type: \"extension_ui_response\"`):\n\n- `{ type: \"extension_ui_response\", id: string, value: string }`\n- `{ type: \"extension_ui_response\", id: string, confirmed: boolean }`\n- `{ type: \"extension_ui_response\", id: string, cancelled: true, timedOut?: boolean }`\n\nIf a dialog has a timeout, RPC mode resolves to a default value when timeout/abort fires.\n\n## Host Tool Sub-Protocol\n\nRPC hosts can expose custom tools to the agent by sending `set_host_tools`, then\nserving execution requests over the same transport.\n\n### Outbound request\n\nWhen the agent wants the host to execute one of those tools, RPC mode emits:\n\n```json\n{\n \"type\": \"host_tool_call\",\n \"id\": \"host_1\",\n \"toolCallId\": \"toolu_123\",\n \"toolName\": \"echo_host\",\n \"arguments\": { \"message\": \"hello\" }\n}\n```\n\nIf the tool execution is later aborted, RPC mode emits:\n\n```json\n{\n \"type\": \"host_tool_cancel\",\n \"id\": \"host_cancel_1\",\n \"targetId\": \"host_1\"\n}\n```\n\n### Inbound updates and completion\n\nHosts can optionally stream progress:\n\n```json\n{\n \"type\": \"host_tool_update\",\n \"id\": \"host_1\",\n \"partialResult\": {\n \"content\": [{ \"type\": \"text\", \"text\": \"working\" }]\n }\n}\n```\n\nCompletion uses:\n\n```json\n{\n \"type\": \"host_tool_result\",\n \"id\": \"host_1\",\n \"result\": {\n \"content\": [{ \"type\": \"text\", \"text\": \"done\" }]\n }\n}\n```\n\nSet top-level `isError: true` on `host_tool_result` to reject the pending host tool call and surface the returned text content as a tool error.\n\n## Host URI Sub-Protocol\n\nRPC hosts can also own custom URL schemes (virtual files). After\n`set_host_uri_schemes`, every read of `<scheme>://…` and write of\n`<scheme>://…` (when registered as `writable`) is bounced back to the host\nover the same transport.\n\n### Outbound request\n\nWhen a session tool resolves a host-owned URL, RPC mode emits:\n\n```json\n{\n \"type\": \"host_uri_request\",\n \"id\": \"uri_1\",\n \"operation\": \"read\",\n \"url\": \"db://users/42\"\n}\n```\n\nWrites look the same with `\"operation\": \"write\"` and an additional\n`\"content\": \"...\"` field carrying the full replacement bytes.\n\nIf the request is later aborted (caller cancels, session ends), RPC mode\nemits:\n\n```json\n{\n \"type\": \"host_uri_cancel\",\n \"id\": \"uri_cancel_1\",\n \"targetId\": \"uri_1\"\n}\n```\n\n### Inbound result\n\nFor successful reads:\n\n```json\n{\n \"type\": \"host_uri_result\",\n \"id\": \"uri_1\",\n \"content\": \"id=42\\nname=Alice\\n\",\n \"contentType\": \"text/plain\",\n \"notes\": [\"fresh from cache\"],\n \"immutable\": false\n}\n```\n\nFor successful writes, omit content:\n\n```json\n{ \"type\": \"host_uri_result\", \"id\": \"uri_1\" }\n```\n\nTo reject the request, set `isError: true` and either populate `error` with\na message or fall back to `content` for textual error surfacing:\n\n```json\n{\n \"type\": \"host_uri_result\",\n \"id\": \"uri_1\",\n \"isError\": true,\n \"error\": \"row 42 not found\"\n}\n```\n\n### Constraints\n\n- The agent's `edit` tool does not target host URIs. Hosts that want to\n mutate virtual files expose `write` and let the model use the `write` tool\n with replacement content.\n- Schemes are global to the process; `set_host_uri_schemes` replaces the\n previous set, unregistering anything not in the new list.\n- Schemes are normalized to lowercase before registration.\n\n## Error Model and Recoverability\n\n### Command-level failures\n\nFailures are `success: false` with string `error`.\n\n```json\n{\n \"id\": \"req_2\",\n \"type\": \"response\",\n \"command\": \"set_model\",\n \"success\": false,\n \"error\": \"Model not found: provider/model\"\n}\n```\n\n### Recoverability expectations\n\n- Most command failures are recoverable; process remains alive.\n- Malformed JSONL / parse-loop exceptions emit a `parse` error response and continue reading subsequent lines.\n- Empty `set_session_name` is rejected (`Session name cannot be empty`).\n- Extension UI responses with unknown `id` are ignored.\n- Process termination conditions are stdin close or explicit extension-triggered shutdown after the current command.\n\n## Compact Command Flows\n\n### 1) Prompt and stream\n\nstdin:\n\n```json\n{ \"id\": \"req_1\", \"type\": \"prompt\", \"message\": \"Summarize this repo\" }\n```\n\nstdout sequence (typical):\n\n```json\n{ \"id\": \"req_1\", \"type\": \"response\", \"command\": \"prompt\", \"success\": true }\n{ \"type\": \"agent_start\" }\n{ \"type\": \"message_update\", \"assistantMessageEvent\": { \"type\": \"text_delta\", \"delta\": \"...\" }, \"message\": { \"role\": \"assistant\", \"content\": [] } }\n{ \"type\": \"agent_end\", \"messages\": [] }\n```\n\n### 2) Prompt during streaming with explicit queue policy\n\nstdin:\n\n```json\n{\n \"id\": \"req_2\",\n \"type\": \"prompt\",\n \"message\": \"Also include risks\",\n \"streamingBehavior\": \"followUp\"\n}\n```\n\n### 3) Inspect and tune queue behavior\n\nstdin:\n\n```json\n{ \"id\": \"q1\", \"type\": \"get_state\" }\n{ \"id\": \"q2\", \"type\": \"set_steering_mode\", \"mode\": \"all\" }\n{ \"id\": \"q3\", \"type\": \"set_interrupt_mode\", \"mode\": \"wait\" }\n```\n\n### 4) Extension UI round trip\n\nstdout:\n\n```json\n{\n \"type\": \"extension_ui_request\",\n \"id\": \"ui_7\",\n \"method\": \"input\",\n \"title\": \"Branch name\",\n \"placeholder\": \"feature/...\"\n}\n```\n\nstdin:\n\n```json\n{ \"type\": \"extension_ui_response\", \"id\": \"ui_7\", \"value\": \"feature/rpc-host\" }\n```\n\n## Notes on `RpcClient` helper\n\n`src/modes/rpc/rpc-client.ts` is a convenience wrapper, not the protocol definition.\n\nCurrent helper characteristics:\n\n- Spawns `bun <cliPath> --mode rpc`\n- Correlates responses by generated `req_<n>` ids\n- Dispatches only recognized `AgentEvent` types to listeners\n- Supports host-owned custom tools via `setCustomTools()` and automatic handling of `host_tool_call` / `host_tool_cancel`\n- Does **not** expose helper methods for every protocol command (for example, `set_interrupt_mode` and `set_session_name` are in protocol types but not wrapped as dedicated methods)\n\nUse raw protocol frames if you need complete surface coverage.\n",
|
|
44
44
|
"rulebook-matching-pipeline.md": "# Rulebook Matching Pipeline\n\nThis document describes how coding-agent discovers rules from supported config formats, normalizes them into a single `Rule` shape, resolves precedence conflicts, and splits the result into:\n\n- **Rulebook rules** (available to the model via system prompt + `rule://` URLs)\n- **TTSR rules** (time-travel stream interruption rules)\n\nIt reflects the current implementation, including partial semantics and metadata that is parsed but not enforced.\n\n## Implementation files\n\n- [`../src/capability/rule.ts`](../packages/coding-agent/src/capability/rule.ts)\n- [`../src/capability/index.ts`](../packages/coding-agent/src/capability/index.ts)\n- [`../src/discovery/index.ts`](../packages/coding-agent/src/discovery/index.ts)\n- [`../src/discovery/helpers.ts`](../packages/coding-agent/src/discovery/helpers.ts)\n- [`../src/discovery/builtin.ts`](../packages/coding-agent/src/discovery/builtin.ts)\n- [`../src/discovery/cursor.ts`](../packages/coding-agent/src/discovery/cursor.ts)\n- [`../src/discovery/windsurf.ts`](../packages/coding-agent/src/discovery/windsurf.ts)\n- [`../src/discovery/cline.ts`](../packages/coding-agent/src/discovery/cline.ts)\n- [`../src/sdk.ts`](../packages/coding-agent/src/sdk.ts)\n- [`../src/system-prompt.ts`](../packages/coding-agent/src/system-prompt.ts)\n- [`../src/internal-urls/rule-protocol.ts`](../packages/coding-agent/src/internal-urls/rule-protocol.ts)\n- [`../src/utils/frontmatter.ts`](../packages/coding-agent/src/utils/frontmatter.ts)\n\n## 1. Canonical rule shape\n\nAll providers normalize source files into `Rule`:\n\n```ts\ninterface Rule {\n name: string;\n path: string;\n content: string;\n globs?: string[];\n alwaysApply?: boolean;\n description?: string;\n condition?: string[];\n scope?: string[];\n interruptMode?: \"never\" | \"prose-only\" | \"tool-only\" | \"always\";\n _source: SourceMeta;\n}\n```\n\nCapability identity is `rule.name` (`ruleCapability.key = rule => rule.name`).\n\nConsequence: precedence and deduplication are **name-based only**. Two different files with the same `name` are considered the same logical rule.\n\n## 2. Discovery sources and normalization\n\n`src/discovery/index.ts` auto-registers providers. For `rules`, current providers are:\n\n- `native` (priority `100`)\n- `cursor` (priority `50`)\n- `windsurf` (priority `50`)\n- `cline` (priority `40`)\n\n### Native provider (`builtin.ts`)\n\nLoads `.omp` rules from:\n\n- project: `<cwd>/.omp/rules/*.{md,mdc}`\n- user: `~/.omp/agent/rules/*.{md,mdc}`\n\nNormalization:\n\n- `name` = filename without `.md`/`.mdc`\n- frontmatter parsed via `parseFrontmatter`\n- `content` = body (frontmatter stripped)\n- `globs`, `alwaysApply`, `description`, `condition`/legacy `ttsr_trigger`, `scope`, and `interruptMode` are parsed by `buildRuleFromMarkdown`\n\nImportant caveat: `condition` values that look like file globs are converted into `tool:edit(...)` / `tool:write(...)` scope shorthands with catch-all condition `.*`.\n\n### Cursor provider (`cursor.ts`)\n\nLoads from:\n\n- user: `~/.cursor/rules/*.{mdc,md}`\n- project: `<cwd>/.cursor/rules/*.{mdc,md}`\n\nNormalization (`transformMDCRule`):\n\n- `description`: kept only if string\n- `alwaysApply`: only `true` is preserved (`false` becomes `undefined`)\n- `globs`: accepts array (string elements only) or single string\n- `condition`/legacy `ttsr_trigger`, `scope`, and `interruptMode` are parsed by shared rule helpers\n- `name` from filename without extension\n\n### Windsurf provider (`windsurf.ts`)\n\nLoads from:\n\n- user: `~/.codeium/windsurf/memories/global_rules.md` (fixed rule name `global_rules`)\n- project: `<cwd>/.windsurf/rules/*.md`\n\nNormalization:\n\n- `globs`: array-of-string or single string\n- `alwaysApply`, `description`, `condition`/legacy `ttsr_trigger`, `scope`, and `interruptMode` parsed by shared rule helpers\n- `name` is fixed to `global_rules` for the user global file and derived from filename for project rules\n\n### Cline provider (`cline.ts`)\n\nSearches upward from `cwd` for nearest `.clinerules`:\n\n- if directory: loads `*.md` inside it\n- if file: loads single file as rule named `clinerules`\n\nNormalization:\n\n- `globs`: array-of-string or single string\n- `alwaysApply`, `description`, `condition`/legacy `ttsr_trigger`, `scope`, and `interruptMode` parsed by shared rule helpers\n- `name` is fixed to `clinerules` for a `.clinerules` file and derived from filename for `.clinerules/*.md`\n\n## 3. Frontmatter parsing behavior and ambiguity\n\nAll providers use `parseFrontmatter` (`utils/frontmatter.ts`) with these semantics:\n\n1. Frontmatter is parsed only when content starts with `---` and has a closing `\\n---`.\n2. Body is trimmed after frontmatter extraction.\n3. If YAML parse fails:\n - warning is logged,\n - parser falls back to simple `key: value` line parsing (`^(\\w+):\\s*(.*)$`).\n\nAmbiguity consequences:\n\n- Fallback parser does not support arrays, nested objects, quoting rules, or hyphenated keys.\n- Fallback values become strings (for example `alwaysApply: true` becomes string `\"true\"`), so providers requiring boolean/string types may drop metadata.\n- `ttsr_trigger` works in fallback (underscore key); keys like `thinking-level` would not.\n- Files without valid frontmatter still load as rules with empty metadata and full content body.\n\n## 4. Provider precedence and deduplication\n\n`loadCapability(\"rules\")` (`capability/index.ts`) merges provider outputs and then deduplicates by `rule.name`.\n\n### Precedence model\n\n- Providers are ordered by priority descending.\n- Equal priority keeps registration order (`cursor` before `windsurf` from `discovery/index.ts`).\n- Dedup is first-wins: first encountered rule name is kept; later same-name items are marked `_shadowed` in `all` and excluded from `items`.\n\nEffective rule provider order is currently:\n\n1. `native` (100)\n2. `cursor` (50)\n3. `windsurf` (50)\n4. `cline` (40)\n\n### Intra-provider ordering caveat\n\nWithin a provider, item order comes from `loadFilesFromDir` glob result ordering plus explicit push order. This is deterministic enough for normal use but not explicitly sorted in code.\n\nNotable source-order differences:\n\n- `native` appends project then user config dirs.\n- `cursor` appends user then project results.\n- `windsurf` appends user `global_rules` first, then project rules.\n- `cline` loads only nearest `.clinerules` source.\n\n## 5. Split into Rulebook, Always-Apply, and TTSR buckets\n\nAfter rule discovery in `createAgentSession` (`sdk.ts`):\n\n1. All discovered rules are scanned.\n2. Rules with `condition` entries are registered into `TtsrManager`; legacy `ttsr_trigger` / `ttsrTrigger` are accepted during rule parsing as condition fallbacks.\n3. A separate `rulebookRules` list is built with this predicate:\n\n```ts\n!isTtsrRule && rule.alwaysApply !== true && !!rule.description;\n```\n\n4. An `alwaysApplyRules` list is built:\n\n```ts\n!isTtsrRule && rule.alwaysApply === true;\n```\n\n### Bucket behavior\n\n- **TTSR bucket**: any rule with a non-empty parsed `condition` that `TtsrManager.addRule(...)` accepts. Takes priority over other buckets.\n- **Always-apply bucket**: `alwaysApply === true`, not TTSR. Full content injected into system prompt. Resolvable via `rule://`.\n- **Rulebook bucket**: must have description, must not be TTSR, must not be `alwaysApply`. Listed in system prompt by name+description; content read on demand via `rule://`.\n- A rule with both `condition` and `alwaysApply` goes to TTSR only (TTSR takes priority).\n- A rule with both `alwaysApply` and `description` goes to always-apply only (not rulebook).\n\n## 6. How metadata affects runtime surfaces\n\n### `description`\n\n- Required for inclusion in rulebook.\n- Rendered in system prompt `<rules>` block.\n- Missing description means rule is not available via `rule://` and not listed in system prompt rules.\n\n### `globs`\n\n- Carried through on `Rule`.\n- Rendered as `<glob>...</glob>` entries in the system prompt rules block.\n- Exposed in rules UI state (`extensions` mode list).\n- **Not enforced for automatic matching in this pipeline.** There is no runtime glob matcher selecting rules by current file/tool target.\n\n### `alwaysApply`\n\n- Parsed and preserved by providers.\n- Used in UI display (`\"always\"` trigger label in extensions state manager).\n- Used as an exclusion condition from `rulebookRules`.\n- **Full rule content is auto-injected into the system prompt** (before the rulebook rules section).\n- Rule is also addressable via `rule://<name>` for re-reading.\n\n### `condition`, `scope`, and `interruptMode`\n\n- `condition` is the current TTSR trigger field; legacy `ttsr_trigger` / `ttsrTrigger` are accepted as fallback inputs during parsing.\n- `scope` narrows TTSR matching scope. A condition token that looks like a file glob becomes `tool:edit(<glob>)` and `tool:write(<glob>)` scope entries plus catch-all condition `.*`.\n- `interruptMode` can override the global TTSR interrupt mode for the rule.\n\n## 7. System prompt inclusion path\n\n`buildSystemPromptInternal` receives both `rules` (rulebook) and `alwaysApplyRules`.\n\nAlways-apply rules are rendered first, injecting their raw content directly into the prompt.\n\nRulebook rules are rendered in a `# Rules` section with:\n\n- `Read rule://<name> when working in matching domain`\n- Each rule's `name`, `description`, and optional `<glob>` list\n\nThis is advisory/contextual: prompt text asks the model to read applicable rules, but code does not enforce glob applicability.\n\n## 8. `rule://` internal URL behavior\n\n`RuleProtocolHandler` is registered with:\n\n```ts\nnew RuleProtocolHandler({\n getRules: () => [...rulebookRules, ...alwaysApplyRules],\n});\n```\n\nImplications:\n\n- `rule://<name>` resolves against both **rulebookRules** and **alwaysApplyRules**.\n- TTSR-only rules and rules with no description and no `alwaysApply` are not addressable via `rule://`.\n- Resolution is exact name match.\n- Unknown names return error listing available rule names.\n- Returned content is raw `rule.content` (frontmatter stripped), content type `text/markdown`.\n\n## 9. Known partial / non-enforced semantics\n\n1. Provider descriptions mention legacy files (`.cursorrules`, `.windsurfrules`), but current loader code paths do not actually read those files.\n2. `globs` metadata is surfaced to prompt/UI but not enforced by rule selection logic.\n3. Rule selection for `rule://` includes rulebook and always-apply rules, but not TTSR-only rules.\n4. Discovery warnings (`loadCapability(\"rules\").warnings`) are produced but `createAgentSession` does not currently surface/log them in this path.\n",
|
|
45
45
|
"sdk.md": "# SDK\n\nThe SDK is the in-process integration surface for `@oh-my-pi/pi-coding-agent`.\nUse it when you want direct access to agent state, event streaming, tool wiring, and session control from your own Bun/Node process.\n\nIf you need cross-language/process isolation, use RPC mode instead.\n\n## Installation\n\n```bash\nbun add @oh-my-pi/pi-coding-agent\n```\n\n## Entry points\n\n`@oh-my-pi/pi-coding-agent` exports the SDK APIs from the package root (and also via `@oh-my-pi/pi-coding-agent/sdk`).\n\nCore exports for embedders:\n\n- `createAgentSession`\n- `SessionManager`\n- `Settings`\n- `AuthStorage`\n- `ModelRegistry`\n- `discoverAuthStorage`\n- Discovery helpers (`discoverExtensions`, `discoverSkills`, `discoverContextFiles`, `discoverPromptTemplates`, `discoverSlashCommands`, `discoverCustomTSCommands`, `discoverMCPServers`)\n- Tool factory surface (`createTools`, `BUILTIN_TOOLS`, tool classes)\n\n## Quick start (auto-discovery defaults)\n\n```ts\nimport { createAgentSession } from \"@oh-my-pi/pi-coding-agent\";\n\nconst { session, modelFallbackMessage } = await createAgentSession();\n\nif (modelFallbackMessage) {\n process.stderr.write(`${modelFallbackMessage}\\n`);\n}\n\nconst unsubscribe = session.subscribe((event) => {\n if (\n event.type === \"message_update\" &&\n event.assistantMessageEvent.type === \"text_delta\"\n ) {\n process.stdout.write(event.assistantMessageEvent.delta);\n }\n});\n\nawait session.prompt(\"Summarize this repository in 3 bullets.\");\nunsubscribe();\nawait session.dispose();\n```\n\n## What `createAgentSession()` discovers by default\n\n`createAgentSession()` follows “provide to override, omit to discover”.\n\nIf omitted, it resolves:\n\n- `cwd`: `getProjectDir()`\n- `agentDir`: `~/.omp/agent` (via `getAgentDir()`)\n- `authStorage`: `discoverAuthStorage(agentDir)`\n- `modelRegistry`: `new ModelRegistry(authStorage)` + background `refreshInBackground()` when the registry is not provided\n- `settings`: `await Settings.init({ cwd, agentDir })`\n- `sessionManager`: `SessionManager.create(cwd)` (file-backed)\n- skills/context files/prompt templates/slash commands/extensions/custom TS commands\n- built-in tools via `createTools(...)`\n- MCP tools (enabled by default)\n- LSP integration (enabled by default)\n- `eventBus`: new `EventBus()` unless supplied\n\n### Required vs optional inputs\n\nTypically you must provide only what you want to control:\n\n- **Must provide**: nothing for a minimal session\n- **Usually provide explicitly** in embedders:\n - `sessionManager` (if you need in-memory or custom location)\n - `authStorage` + `modelRegistry` (if you own credential/model lifecycle)\n - `model` or `modelPattern` (if deterministic model selection matters)\n - `settings` (if you need isolated/test config)\n\n## Session manager behavior (persistent vs in-memory)\n\n`AgentSession` always uses a `SessionManager`; behavior depends on which factory you use.\n\n### File-backed (default)\n\n```ts\nimport { createAgentSession, SessionManager } from \"@oh-my-pi/pi-coding-agent\";\n\nconst { session } = await createAgentSession({\n sessionManager: SessionManager.create(process.cwd()),\n});\n\nconsole.log(session.sessionFile); // absolute .jsonl path\n```\n\n- Persists conversation/messages/state deltas to session files.\n- Supports resume/open/list/fork workflows.\n- `session.sessionFile` is defined.\n\n### In-memory\n\n```ts\nimport { createAgentSession, SessionManager } from \"@oh-my-pi/pi-coding-agent\";\n\nconst { session } = await createAgentSession({\n sessionManager: SessionManager.inMemory(),\n});\n\nconsole.log(session.sessionFile); // undefined\n```\n\n- No filesystem persistence.\n- Useful for tests, ephemeral workers, request-scoped agents.\n- Session methods still work, but persistence-specific behaviors (file resume/fork paths) are naturally limited.\n\n### Resume/open/list helpers\n\n```ts\nimport { SessionManager } from \"@oh-my-pi/pi-coding-agent\";\n\nconst recent = await SessionManager.continueRecent(process.cwd());\nconst listed = await SessionManager.list(process.cwd());\nconst opened = listed[0] ? await SessionManager.open(listed[0].path) : null;\n```\n\n## Model and auth wiring\n\n`createAgentSession()` uses `ModelRegistry` + `AuthStorage` for model selection and API key resolution.\n\n### Explicit wiring\n\n```ts\nimport {\n createAgentSession,\n discoverAuthStorage,\n ModelRegistry,\n SessionManager,\n} from \"@oh-my-pi/pi-coding-agent\";\n\nconst authStorage = await discoverAuthStorage();\nconst modelRegistry = new ModelRegistry(authStorage);\nawait modelRegistry.refresh();\n\nconst available = modelRegistry.getAvailable();\nif (available.length === 0)\n throw new Error(\"No authenticated models available\");\n\nconst { session } = await createAgentSession({\n authStorage,\n modelRegistry,\n model: available[0],\n thinkingLevel: \"medium\",\n sessionManager: SessionManager.inMemory(),\n});\n```\n\n### Selection order when `model` is omitted\n\nWhen no explicit `model`/`modelPattern` is provided:\n\n1. restore model from existing session (if restorable + key available)\n2. settings default model role (`default`)\n3. first available model with valid auth\n\nIf restore fails, `modelFallbackMessage` explains fallback.\n\n### Auth priority\n\n`AuthStorage.getApiKey(...)` resolves in this order:\n\n1. runtime override (`setRuntimeApiKey`)\n2. stored credentials in `agent.db`\n3. provider environment variables\n4. custom-provider resolver fallback (if configured)\n\n## Event subscription model\n\nSubscribe with `session.subscribe(listener)`; it returns an unsubscribe function.\n\n```ts\nconst unsubscribe = session.subscribe((event) => {\n switch (event.type) {\n case \"agent_start\":\n case \"turn_start\":\n case \"tool_execution_start\":\n break;\n case \"message_update\":\n if (event.assistantMessageEvent.type === \"text_delta\") {\n process.stdout.write(event.assistantMessageEvent.delta);\n }\n break;\n }\n});\n```\n\n`AgentSessionEvent` includes core `AgentEvent` plus session-level events:\n\n- `auto_compaction_start` / `auto_compaction_end`\n- `auto_retry_start` / `auto_retry_end`\n- `retry_fallback_applied` / `retry_fallback_succeeded`\n- `ttsr_triggered`\n- `todo_reminder` / `todo_auto_clear`\n- `irc_message`\n\n## Prompt lifecycle\n\n`session.prompt(text, options?)` is the primary entry point.\n\nBehavior:\n\n1. optional command/template expansion (`/` commands, custom commands, file slash commands, prompt templates)\n2. if currently streaming:\n - requires `streamingBehavior: \"steer\" | \"followUp\"`\n - queues instead of throwing work away\n3. if idle:\n - validates model + API key\n - appends user message\n - starts agent turn\n\nRelated APIs:\n\n- `sendUserMessage(content, { deliverAs? })`\n- `steer(text, images?)`\n- `followUp(text, images?)`\n- `sendCustomMessage({ customType, content, ... }, { deliverAs?, triggerTurn? })`\n- `abort()`\n\n## Tools and extension integration\n\n### Built-ins and filtering\n\n- Built-ins come from `createTools(...)` and `BUILTIN_TOOLS`.\n- `toolNames` acts as an allowlist for built-ins.\n- `customTools` and extension-registered tools are still included.\n- Hidden tools (for example `yield`) are opt-in unless required by options.\n\n```ts\nconst { session } = await createAgentSession({\n toolNames: [\"read\", \"grep\", \"find\", \"write\"],\n requireYieldTool: true,\n});\n```\n\n### Extensions\n\n- `extensions`: inline `ExtensionFactory[]`\n- `additionalExtensionPaths`: load extra extension files\n- `disableExtensionDiscovery`: disable automatic extension scanning\n- `preloadedExtensions`: reuse already loaded extension set\n\n### Runtime tool set changes\n\n`AgentSession` supports runtime activation updates:\n\n- `getActiveToolNames()`\n- `getAllToolNames()`\n- `setActiveToolsByName(names)`\n- `refreshMCPTools(mcpTools)`\n\nSystem prompt is rebuilt to reflect active tool changes.\n\n## Discovery helpers\n\nUse these when you want partial control without recreating internal discovery logic:\n\n- `discoverAuthStorage(agentDir?)`\n- `discoverExtensions(cwd?)`\n- `discoverSkills(cwd?, _agentDir?, settings?)`\n- `discoverContextFiles(cwd?, _agentDir?)`\n- `discoverPromptTemplates(cwd?, agentDir?)`\n- `discoverSlashCommands(cwd?)`\n- `discoverCustomTSCommands(cwd?, agentDir?)`\n- `discoverMCPServers(cwd?)`\n- `buildSystemPrompt(options?)`\n\n## Subagent-oriented options\n\nFor SDK consumers building orchestrators (similar to task executor flow):\n\n- `outputSchema`: passes structured output expectation into tool context\n- `requireYieldTool`: forces `yield` tool inclusion\n- `taskDepth`: recursion-depth context for nested task sessions\n- `parentTaskPrefix`: artifact naming prefix for nested task outputs\n\nThese are optional for normal single-agent embedding.\n\n## `createAgentSession()` return value\n\n```ts\ntype CreateAgentSessionResult = {\n session: AgentSession;\n extensionsResult: LoadExtensionsResult;\n setToolUIContext: (uiContext: ExtensionUIContext, hasUI: boolean) => void;\n mcpManager?: MCPManager;\n modelFallbackMessage?: string;\n lspServers?: Array<{\n name: string;\n status: \"ready\" | \"error\";\n fileTypes: string[];\n error?: string;\n }>;\n eventBus: EventBus;\n};\n```\n\nUse `setToolUIContext(...)` only if your embedder provides UI capabilities that tools/extensions should call into.\n\n## Minimal controlled embed example\n\n```ts\nimport {\n createAgentSession,\n discoverAuthStorage,\n ModelRegistry,\n SessionManager,\n Settings,\n} from \"@oh-my-pi/pi-coding-agent\";\n\nconst authStorage = await discoverAuthStorage();\nconst modelRegistry = new ModelRegistry(authStorage);\nawait modelRegistry.refresh();\n\nconst settings = Settings.isolated({\n \"compaction.enabled\": true,\n \"retry.enabled\": true,\n});\n\nconst { session } = await createAgentSession({\n authStorage,\n modelRegistry,\n settings,\n sessionManager: SessionManager.inMemory(),\n toolNames: [\"read\", \"grep\", \"find\", \"edit\", \"write\"],\n enableMCP: false,\n enableLsp: true,\n});\n\nsession.subscribe((event) => {\n if (\n event.type === \"message_update\" &&\n event.assistantMessageEvent.type === \"text_delta\"\n ) {\n process.stdout.write(event.assistantMessageEvent.delta);\n }\n});\n\nawait session.prompt(\"Find all TODO comments in this repo and propose fixes.\");\nawait session.dispose();\n```\n",
|
|
46
46
|
"secrets.md": "# Secret Obfuscation\n\nPrevents sensitive values (API keys, tokens, passwords) from being sent to LLM providers. When enabled, secrets are replaced with deterministic placeholders before leaving the process, and restored in tool call arguments returned by the model.\n\n## Enabling\n\nDisabled by default. Toggle via `/settings` UI or directly in `config.yml`:\n\n```yaml\nsecrets:\n enabled: true\n```\n\n## How it works\n\n1. On session startup, secrets are collected from two sources:\n - **Environment variables** whose names match common secret patterns (`KEY`, `SECRET`, `TOKEN`, `PASSWORD`, `PASS`, `AUTH`, `CREDENTIAL`, `PRIVATE`, `OAUTH`) with values >= 8 characters\n - **`secrets.yml` files** (see below)\n\n2. Outbound text messages to the LLM have secret values replaced with deterministic placeholders like `#AB12#`.\n\n3. Session context/tool arguments returned from the model are deep-walked and obfuscation placeholders are restored to original values before display or execution.\n\nTwo modes control what happens to each secret:\n\n| Mode | Behavior | Reversible |\n| --------------------- | ------------------------------------------------------- | ----------------------------------------------- |\n| `obfuscate` (default) | Replaced with deterministic placeholder `#[A-Z0-9]{4}#` | Yes (deobfuscated in tool args/session context) |\n| `replace` | Replaced with deterministic same-length string | No (one-way) |\n\n## secrets.yml\n\nDefine custom secret entries in YAML. Two locations are checked:\n\n| Level | Path | Purpose |\n| ------- | -------------------------- | --------------------------- |\n| Global | `~/.omp/agent/secrets.yml` | Secrets across all projects |\n| Project | `<cwd>/.omp/secrets.yml` | Project-specific secrets |\n\nProject entries override global entries with matching `content`.\n\n### Schema\n\nEach entry in the array has these fields:\n\n| Field | Type | Required | Description |\n| ------------- | ---------------------------- | -------- | ------------------------------------------------- |\n| `type` | `\"plain\"` or `\"regex\"` | Yes | Match strategy |\n| `content` | string | Yes | The secret value (plain) or regex pattern (regex) |\n| `mode` | `\"obfuscate\"` or `\"replace\"` | No | Default: `\"obfuscate\"` |\n| `replacement` | string | No | Custom replacement (replace mode only) |\n| `flags` | string | No | Regex flags (regex type only) |\n\n### Examples\n\n#### Plain secrets\n\n```yaml\n# Obfuscate a specific API key (default mode)\n- type: plain\n content: sk-proj-abc123def456\n\n# Replace a database password with a fixed string\n- type: plain\n content: hunter2\n mode: replace\n replacement: \"********\"\n```\n\n#### Regex secrets\n\n```yaml\n# Obfuscate any AWS-style key\n- type: regex\n content: \"AKIA[0-9A-Z]{16}\"\n\n# Case-insensitive match with explicit flags\n- type: regex\n content: \"api[_-]?key\\\\s*=\\\\s*\\\\w+\"\n flags: \"i\"\n\n# Regex literal syntax (pattern and flags in one string)\n- type: regex\n content: \"/bearer\\\\s+[a-zA-Z0-9._~+\\\\/=-]+/i\"\n```\n\nRegex entries always scan globally (the `g` flag is enforced automatically). The regex literal syntax `/pattern/flags` is supported as an alternative to separate `content` + `flags` fields. Escaped slashes within the pattern (`\\\\/`) are handled correctly.\n\n#### Replace mode with regex\n\n```yaml\n# One-way replace connection strings (not reversible)\n- type: regex\n content: \"postgres://[^\\\\s]+\"\n mode: replace\n replacement: \"postgres://***\"\n```\n\n## Interaction with env var detection\n\nEnvironment variables are collected first, then file-defined entries are appended. File entries can cover secrets that don't live in env vars (config files, hardcoded values, etc.). If the same plain value appears in both env and file entries, the env entry's obfuscate-mode mapping is used first.\n\n## Key files\n\n- `packages/coding-agent/src/secrets/index.ts` -- loading, merging, env var collection\n- `packages/coding-agent/src/secrets/obfuscator.ts` -- `SecretObfuscator` class, placeholder generation, message obfuscation\n- `packages/coding-agent/src/secrets/regex.ts` -- regex literal parsing and compilation\n- `packages/coding-agent/src/config/settings-schema.ts` -- `secrets.enabled` setting definition\n",
|
|
@@ -49,7 +49,7 @@ export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
|
|
|
49
49
|
"session-tree-plan.md": "# Session tree architecture (current)\n\nReference: [session.md](../docs/session.md)\n\nThis document describes how session tree navigation works today: in-memory tree model, leaf movement rules, branching behavior, and extension/event integration.\n\n## What this subsystem is\n\nThe session is stored as an append-only entry log, but runtime behavior is tree-based:\n\n- Every non-header entry has `id` and `parentId`.\n- The active position is `leafId` in `SessionManager`.\n- Appending an entry always creates a child of the current leaf.\n- Branching does **not** rewrite history; it only changes where the leaf points before the next append.\n\nKey files:\n\n- `src/session/session-manager.ts` — tree data model, traversal, leaf movement, branch/session extraction\n- `src/session/agent-session.ts` — `/tree` navigation flow, summarization, hook/event emission\n- `src/modes/components/tree-selector.ts` — interactive tree UI behavior and filtering\n- `src/modes/controllers/selector-controller.ts` — selector orchestration for `/tree` and `/branch`\n- `src/modes/controllers/input-controller.ts` — command routing (`/tree`, `/branch`, double-escape behavior)\n- `src/session/messages.ts` — conversion of `branch_summary`, `compaction`, and `custom_message` entries into LLM context messages\n\n## Tree data model in `SessionManager`\n\nRuntime indices:\n\n- `#byId: Map<string, SessionEntry>` — fast lookup for any entry\n- `#leafId: string | null` — current position in the tree\n- `#labelsById: Map<string, string>` — resolved labels by target entry id\n\nTree APIs:\n\n- `getBranch(fromId?)` walks parent links to root and returns root→node path\n- `getTree()` returns `SessionTreeNode[]` (`entry`, `children`, `label`)\n - parent links become children arrays\n - entries with missing parents are treated as roots\n - children are sorted oldest→newest by timestamp\n- `getChildren(parentId)` returns direct children\n- `getLabel(id)` resolves current label from `labelsById`\n\n`getTree()` is a runtime projection; persistence remains append-only JSONL entries.\n\n## Leaf movement semantics\n\nThere are three leaf movement primitives:\n\n1. `branch(entryId)`\n - Validates entry exists\n - Sets `leafId = entryId`\n - No new entry is written\n\n2. `resetLeaf()`\n - Sets `leafId = null`\n - Next append creates a new root entry (`parentId = null`)\n\n3. `branchWithSummary(branchFromId, summary, details?, fromExtension?)`\n - Accepts `branchFromId: string | null`\n - Sets `leafId = branchFromId`\n - Appends a `branch_summary` entry as child of that leaf\n - When `branchFromId` is `null`, `fromId` is persisted as `\"root\"`\n\n## `/tree` navigation behavior (same session file)\n\n`AgentSession.navigateTree()` is navigation, not file forking.\n\nFlow:\n\n1. Validate target and compute abandoned path (`collectEntriesForBranchSummary`)\n2. Emit `session_before_tree` with `TreePreparation`\n3. Optionally summarize abandoned entries (hook-provided summary or built-in summarizer)\n4. Compute new leaf target:\n - selecting a **user** message: leaf moves to its parent, and message text is returned for editor prefill\n - selecting a **custom_message**: same rule as user message (leaf = parent, text prefills editor)\n - selecting any other entry: leaf = selected entry id\n5. Apply leaf move:\n - with summary: `branchWithSummary(newLeafId, ...)`\n - without summary and `newLeafId === null`: `resetLeaf()`\n - otherwise: `branch(newLeafId)`\n6. Rebuild agent context from new leaf and emit `session_tree`\n\nImportant: summary entries are attached at the **new navigation position**, not on the abandoned branch tail.\n\n## `/branch` behavior (new session file)\n\n`/branch` and `/tree` are intentionally different:\n\n- `/tree` navigates within the current session file.\n- `/branch` creates a new session branch file (or in-memory replacement for non-persistent mode).\n\nUser-facing `/branch` flow (`SelectorController.showUserMessageSelector` → `AgentSession.branch`):\n\n- Branch source must be a **user message**.\n- Selected user text is extracted for editor prefill.\n- If selected user message is root (`parentId === null`): start a new session via `newSession({ parentSession: previousSessionFile })`.\n- Otherwise: `createBranchedSession(selectedEntry.parentId)` to fork history up to the selected prompt boundary.\n\n`SessionManager.createBranchedSession(leafId)` specifics:\n\n- Builds root→leaf path via `getBranch(leafId)`; throws if missing.\n- Excludes existing `label` entries from copied path.\n- Rebuilds fresh label entries from resolved `labelsById` for entries that remain in path.\n- Persistent mode: writes new JSONL file and switches manager to it; returns new file path.\n- In-memory mode: replaces in-memory entries; returns `undefined`.\n\n## Context reconstruction and summary/custom integration\n\n`buildSessionContext()` (in `session-manager.ts`) resolves the active root→leaf path and builds effective LLM context state:\n\n- Tracks latest thinking/model/service-tier/mode/TTSR/MCP-selection state on path.\n- Handles latest compaction on path:\n - emits compaction summary first\n - replays kept messages from `firstKeptEntryId` to compaction point\n - then replays post-compaction messages\n- Includes `branch_summary` and `custom_message` entries as `AgentMessage` objects.\n\n`session/messages.ts` then maps these message types for model input:\n\n- `branchSummary` and `compactionSummary` become user-role templated context messages\n- `custom`/`hookMessage` become user-role content messages\n\nSo tree movement changes context by changing the active leaf path, not by mutating old entries.\n\n## Labels and tree UI behavior\n\nLabel persistence:\n\n- `appendLabelChange(targetId, label?)` writes `label` entries on the current leaf chain.\n- `labelsById` is updated immediately (set or delete).\n- `getTree()` resolves current label onto each returned node.\n\nTree selector behavior (`tree-selector.ts`):\n\n- Flattens tree for navigation, keeps active-path highlighting, and prioritizes displaying the active branch first.\n- Supports filter modes: `default`, `no-tools`, `user-only`, `labeled-only`, `all`.\n- Supports free-text search over rendered semantic content.\n- `Shift+L` opens inline label editing and writes via `appendLabelChange`.\n\nCommand routing:\n\n- `/tree` always opens tree selector.\n- `/branch` opens user-message selector unless `doubleEscapeAction=tree`, in which case it also uses tree selector UX.\n\n## Extension and hook touchpoints for tree operations\n\nCommand-time extension API (`ExtensionCommandContext`):\n\n- `branch(entryId)` — create branched session file\n- `navigateTree(targetId, { summarize? })` — move within current tree/file\n\nEvents around tree navigation:\n\n- `session_before_tree`\n - receives `TreePreparation`:\n - `targetId`\n - `oldLeafId`\n - `commonAncestorId`\n - `entriesToSummarize`\n - `userWantsSummary`\n - may cancel navigation\n - may provide summary payload used instead of built-in summarizer\n - receives abort `signal` (Escape cancellation path)\n- `session_tree`\n - emits `newLeafId`, `oldLeafId`\n - includes `summaryEntry` when a summary was created\n - `fromExtension` indicates summary origin\n\nAdjacent but related lifecycle hooks:\n\n- `session_before_branch` / `session_branch` for `/branch` flow\n- `session_before_compact`, `session.compacting`, `session_compact` for compaction entries that later affect tree-context reconstruction\n\n## Real constraints and edge conditions\n\n- `branch()` cannot target `null`; use `resetLeaf()` for root-before-first-entry state.\n- `branchWithSummary()` supports `null` target and records `fromId: \"root\"`.\n- Selecting current leaf in tree selector is a no-op.\n- Summarization requires an active model; if absent, summarize navigation fails fast.\n- If summarization is aborted, navigation is cancelled and leaf is unchanged.\n- In-memory sessions never return a branch file path from `createBranchedSession`.\n- Tree context reconstruction includes service-tier and MCP tool-selection state, but those entries do not become LLM messages.\n\n## Legacy compatibility still present\n\nSession migrations still run on load:\n\n- v1→v2 adds `id`/`parentId` and converts compaction index anchor to id anchor\n- v2→v3 migrates legacy `hookMessage` role to `custom`\n\nCurrent runtime behavior is version-3 tree semantics after migration.\n",
|
|
50
50
|
"session.md": "# Session Storage and Entry Model\n\nThis document is the source of truth for how coding-agent sessions are represented, persisted, migrated, and reconstructed at runtime.\n\n## Scope\n\nCovers:\n\n- Session JSONL format and versioning\n- Entry taxonomy and tree semantics (`id`/`parentId` + leaf pointer)\n- Migration/compatibility behavior when loading old or malformed files\n- Context reconstruction (`buildSessionContext`)\n- Persistence guarantees, failure behavior, truncation/blob externalization\n- Storage abstractions (`FileSessionStorage`, `MemorySessionStorage`) and related utilities\n\nDoes not cover `/tree` UI rendering behavior beyond semantics that affect session data.\n\n## Implementation Files\n\n- [`src/session/session-manager.ts`](../packages/coding-agent/src/session/session-manager.ts)\n- [`src/session/messages.ts`](../packages/coding-agent/src/session/messages.ts)\n- [`src/session/session-storage.ts`](../packages/coding-agent/src/session/session-storage.ts)\n- [`src/session/history-storage.ts`](../packages/coding-agent/src/session/history-storage.ts)\n- [`src/session/blob-store.ts`](../packages/coding-agent/src/session/blob-store.ts)\n\n## On-Disk Layout\n\nDefault session file location:\n\n```text\n~/.omp/agent/sessions/--<cwd-encoded>--/<timestamp>_<sessionId>.jsonl\n```\n\n`<cwd-encoded>` is derived from the working directory by stripping leading slash and replacing `/`, `\\\\`, and `:` with `-`.\n\nBlob store location:\n\n```text\n~/.omp/agent/blobs/<sha256>\n```\n\nTerminal breadcrumb files are written under:\n\n```text\n~/.omp/agent/terminal-sessions/<terminal-id>\n```\n\nBreadcrumb content is two lines: original cwd, then session file path. `continueRecent()` prefers this terminal-scoped pointer before scanning most-recent mtime.\n\n## File Format\n\nSession files are JSONL: one JSON object per line.\n\n- Line 1 is always the session header (`type: \"session\"`).\n- Remaining lines are `SessionEntry` values.\n- Entries are append-only at runtime; branch navigation moves a pointer (`leafId`) rather than mutating existing entries.\n\n### Header (`SessionHeader`)\n\n```json\n{\n \"type\": \"session\",\n \"version\": 3,\n \"id\": \"1f9d2a6b9c0d1234\",\n \"timestamp\": \"2026-02-16T10:20:30.000Z\",\n \"cwd\": \"/work/pi\",\n \"title\": \"optional session title\",\n \"titleSource\": \"auto\",\n \"parentSession\": \"optional lineage marker\"\n}\n```\n\nNotes:\n\n- `version` is optional in v1 files; absence means v1.\n- `parentSession` is an opaque lineage string. Current code writes either a session id or a session path depending on flow (`fork`, `forkFrom`, `createBranchedSession`, or explicit `newSession({ parentSession })`). Treat as metadata, not a typed foreign key.\n\n### Entry Base (`SessionEntryBase`)\n\nAll non-header entries include:\n\n```json\n{\n \"type\": \"...\",\n \"id\": \"8-char-id\",\n \"parentId\": \"previous-or-branch-parent\",\n \"timestamp\": \"2026-02-16T10:20:30.000Z\"\n}\n```\n\n`parentId` can be `null` for a root entry (first append, or after `resetLeaf()`).\n\n## Entry Taxonomy\n\n`SessionEntry` is the union of:\n\n- `message`\n- `thinking_level_change`\n- `service_tier_change`\n- `compaction`\n- `branch_summary`\n- `custom`\n- `custom_message`\n- `label`\n- `ttsr_injection`\n- `session_init`\n- `mode_change`\n- `mcp_tool_selection`\n\n### `message`\n\nStores an `AgentMessage` directly.\n\n```json\n{\n \"type\": \"message\",\n \"id\": \"a1b2c3d4\",\n \"parentId\": null,\n \"timestamp\": \"2026-02-16T10:21:00.000Z\",\n \"message\": {\n \"role\": \"assistant\",\n \"provider\": \"anthropic\",\n \"model\": \"claude-sonnet-4-5\",\n \"content\": [{ \"type\": \"text\", \"text\": \"Done.\" }],\n \"usage\": {\n \"input\": 100,\n \"output\": 20,\n \"cacheRead\": 0,\n \"cacheWrite\": 0,\n \"cost\": {\n \"input\": 0,\n \"output\": 0,\n \"cacheRead\": 0,\n \"cacheWrite\": 0,\n \"total\": 0\n }\n },\n \"timestamp\": 1760000000000\n }\n}\n```\n\n### `model_change`\n\n```json\n{\n \"type\": \"model_change\",\n \"id\": \"b1c2d3e4\",\n \"parentId\": \"a1b2c3d4\",\n \"timestamp\": \"2026-02-16T10:21:30.000Z\",\n \"model\": \"openai/gpt-4o\",\n \"role\": \"default\"\n}\n```\n\n`role` is optional; missing is treated as `default` in context reconstruction.\n\n### `service_tier_change`\n\n```json\n{\n \"type\": \"service_tier_change\",\n \"id\": \"c1d2e3f4\",\n \"parentId\": \"b1c2d3e4\",\n \"timestamp\": \"2026-02-16T10:21:45.000Z\",\n \"serviceTier\": \"flex\"\n}\n```\n\n`serviceTier` can also be `null`.\n\n### `thinking_level_change`\n\n```json\n{\n \"type\": \"thinking_level_change\",\n \"id\": \"c1d2e3f4\",\n \"parentId\": \"b1c2d3e4\",\n \"timestamp\": \"2026-02-16T10:22:00.000Z\",\n \"thinkingLevel\": \"high\"\n}\n```\n\n### `compaction`\n\n```json\n{\n \"type\": \"compaction\",\n \"id\": \"d1e2f3a4\",\n \"parentId\": \"c1d2e3f4\",\n \"timestamp\": \"2026-02-16T10:23:00.000Z\",\n \"summary\": \"Conversation summary\",\n \"shortSummary\": \"Short recap\",\n \"firstKeptEntryId\": \"a1b2c3d4\",\n \"tokensBefore\": 42000,\n \"details\": { \"readFiles\": [\"src/a.ts\"] },\n \"preserveData\": { \"hookState\": true },\n \"fromExtension\": false\n}\n```\n\n### `branch_summary`\n\n```json\n{\n \"type\": \"branch_summary\",\n \"id\": \"e1f2a3b4\",\n \"parentId\": \"a1b2c3d4\",\n \"timestamp\": \"2026-02-16T10:24:00.000Z\",\n \"fromId\": \"a1b2c3d4\",\n \"summary\": \"Summary of abandoned path\",\n \"details\": { \"note\": \"optional\" },\n \"fromExtension\": true\n}\n```\n\nIf branching from root (`branchFromId === null`), `fromId` is the literal string `\"root\"`.\n\n### `custom`\n\nExtension state persistence; ignored by `buildSessionContext`.\n\n```json\n{\n \"type\": \"custom\",\n \"id\": \"f1a2b3c4\",\n \"parentId\": \"e1f2a3b4\",\n \"timestamp\": \"2026-02-16T10:25:00.000Z\",\n \"customType\": \"my-extension\",\n \"data\": { \"state\": 1 }\n}\n```\n\n### `custom_message`\n\nExtension-provided message that does participate in LLM context. `content` can be a string or text/image content blocks, and `attribution` records whether the user or agent initiated it.\n\n```json\n{\n \"type\": \"custom_message\",\n \"id\": \"a2b3c4d5\",\n \"parentId\": \"f1a2b3c4\",\n \"timestamp\": \"2026-02-16T10:26:00.000Z\",\n \"customType\": \"my-extension\",\n \"content\": \"Injected context\",\n \"display\": true,\n \"details\": { \"debug\": false },\n \"attribution\": \"agent\"\n}\n```\n\n### `label`\n\n```json\n{\n \"type\": \"label\",\n \"id\": \"b2c3d4e5\",\n \"parentId\": \"a2b3c4d5\",\n \"timestamp\": \"2026-02-16T10:27:00.000Z\",\n \"targetId\": \"a1b2c3d4\",\n \"label\": \"checkpoint\"\n}\n```\n\n`label: undefined` clears a label for `targetId`.\n\n### `ttsr_injection`\n\n```json\n{\n \"type\": \"ttsr_injection\",\n \"id\": \"c2d3e4f5\",\n \"parentId\": \"b2c3d4e5\",\n \"timestamp\": \"2026-02-16T10:28:00.000Z\",\n \"injectedRules\": [\"ruleA\", \"ruleB\"]\n}\n```\n\n### `mcp_tool_selection`\n\n```json\n{\n \"type\": \"mcp_tool_selection\",\n \"id\": \"d2e3f4a5\",\n \"parentId\": \"c2d3e4f5\",\n \"timestamp\": \"2026-02-16T10:28:30.000Z\",\n \"selectedToolNames\": [\"server.tool\"]\n}\n```\n\n### `session_init`\n\n```json\n{\n \"type\": \"session_init\",\n \"id\": \"d2e3f4a5\",\n \"parentId\": \"c2d3e4f5\",\n \"timestamp\": \"2026-02-16T10:29:00.000Z\",\n \"systemPrompt\": \"...\",\n \"task\": \"...\",\n \"tools\": [\"read\", \"edit\"],\n \"outputSchema\": { \"type\": \"object\" }\n}\n```\n\n### `mode_change`\n\n```json\n{\n \"type\": \"mode_change\",\n \"id\": \"e2f3a4b5\",\n \"parentId\": \"d2e3f4a5\",\n \"timestamp\": \"2026-02-16T10:30:00.000Z\",\n \"mode\": \"plan\",\n \"data\": { \"planFile\": \"/tmp/plan.md\" }\n}\n```\n\n## Versioning and Migration\n\nCurrent session version: `3`.\n\n### v1 -> v2\n\nApplied when header `version` is missing or `< 2`:\n\n- Adds `id` and `parentId` to each non-header entry.\n- Reconstructs a linear parent chain using file order.\n- Migrates compaction field `firstKeptEntryIndex` -> `firstKeptEntryId` when present.\n- Sets header `version = 2`.\n\n### v2 -> v3\n\nApplied when header `version < 3`:\n\n- For `message` entries: rewrites legacy `message.role === \"hookMessage\"` to `\"custom\"`.\n- Sets header `version = 3`.\n\n### Migration Trigger and Persistence\n\n- Migrations run during session load (`setSessionFile`).\n- If any migration ran, the entire file is rewritten to disk immediately.\n- Migration mutates in-memory entries first, then persists rewritten JSONL.\n\n## Load and Compatibility Behavior\n\n`loadEntriesFromFile(path)` behavior:\n\n- Missing file (`ENOENT`) -> returns `[]`.\n- Non-parseable lines are handled by lenient JSONL parser (`parseJsonlLenient`).\n- If first parsed entry is not a valid session header (`type !== \"session\"` or missing string `id`) -> returns `[]`.\n\n`SessionManager.setSessionFile()` behavior:\n\n- `[]` from loader is treated as empty/nonexistent session and replaced with a new initialized session file at that path.\n- Valid files are loaded, migrated if needed, blob refs resolved, then indexed.\n\n## Tree and Leaf Semantics\n\nThe underlying model is append-only tree + mutable leaf pointer:\n\n- Every append method creates exactly one new entry whose `parentId` is current `leafId`.\n- The new entry becomes the new `leafId`.\n- `branch(entryId)` moves only `leafId`; existing entries remain unchanged.\n- `resetLeaf()` sets `leafId = null`; next append creates a new root entry (`parentId: null`).\n- `branchWithSummary()` sets leaf to branch target and appends a `branch_summary` entry.\n\n`getEntries()` returns all non-header entries in insertion order. Existing entries are not deleted in normal operation; rewrites preserve logical history while updating representation (migrations, move, targeted rewrite helpers).\n\n## Context Reconstruction (`buildSessionContext`)\n\n`buildSessionContext(entries, leafId, byId?)` resolves what is sent to the model.\n\nAlgorithm:\n\n1. Determine leaf:\n - `leafId === null` -> return empty context.\n - explicit `leafId` -> use that entry if found.\n - otherwise fallback to last entry.\n2. Walk `parentId` chain from leaf to root and reverse to root->leaf path.\n3. Derive runtime state across path:\n - `thinkingLevel` from latest `thinking_level_change` (default `\"off\"`)\n - `serviceTier` from latest `service_tier_change`\n - model map from `model_change` entries (`role ?? \"default\"`)\n - fallback `models.default` from assistant message provider/model if no explicit model change\n - deduplicated `injectedTtsrRules` from all `ttsr_injection` entries\n - selected MCP discovery tools from latest `mcp_tool_selection`\n - mode/modeData from latest `mode_change` (default mode `\"none\"`)\n4. Build message list:\n - `message` entries pass through\n - `custom_message` entries become `custom` AgentMessages via `createCustomMessage`\n - `branch_summary` entries become `branchSummary` AgentMessages via `createBranchSummaryMessage`\n - if a `compaction` exists on path:\n - emit compaction summary first (`createCompactionSummaryMessage`)\n - emit path entries starting at `firstKeptEntryId` up to the compaction boundary\n - emit entries after the compaction boundary\n\n`custom`, `session_init`, `service_tier_change`, `mcp_tool_selection`, and `ttsr_injection` entries do not inject model context directly.\n\n## Persistence Guarantees and Failure Model\n\n### Persist vs in-memory\n\n- `SessionManager.create/open/continueRecent/forkFrom` -> persistent mode (`persist = true`).\n- `SessionManager.inMemory` -> non-persistent mode (`persist = false`) with `MemorySessionStorage`.\n\n### Write pipeline\n\nWrites are serialized through an internal promise chain (`#persistChain`) and `NdjsonFileWriter`.\n\n- `append*` updates in-memory state immediately.\n- Persistence is deferred until at least one assistant message exists.\n - Before first assistant: entries are retained in memory; no file append occurs.\n - When first assistant exists: full in-memory session is flushed to file.\n - Afterwards: new entries append incrementally.\n\nRationale in code: avoid persisting sessions that never produced an assistant response.\n\n### Durability operations\n\n- `flush()` flushes writer and calls `fsync()`.\n- Atomic full rewrites (`#rewriteFile`) write to temp file, flush+fsync, close, then rename over target.\n- Used for migrations, `setSessionName`, `rewriteEntries`, move operations, and tool-call arg rewrites.\n\n### Error behavior\n\n- Persistence errors are latched (`#persistError`) and rethrown on subsequent operations.\n- First error is logged once with session file context.\n- Writer close is best-effort but propagates the first meaningful error.\n\n## Data Size Controls and Blob Externalization\n\nBefore persisting entries:\n\n- Large strings are truncated to `MAX_PERSIST_CHARS` (500,000 chars) with notice:\n - `\"[Session persistence truncated large content]\"`\n- Transient fields `partialJson` and `jsonlEvents` are removed.\n- If object has both `content` and `lineCount`, line count is recomputed after truncation.\n- Image blocks in `content` arrays with base64 length >= 1024 are externalized to blob refs:\n - stored as `blob:sha256:<hash>`\n - raw bytes written to blob store (`BlobStore.put`)\n\nOn load, blob refs are resolved back to base64 for message/custom_message image blocks.\n\n## Storage Abstractions\n\n`SessionStorage` interface provides all filesystem operations used by `SessionManager`:\n\n- sync: `ensureDirSync`, `existsSync`, `writeTextSync`, `statSync`, `listFilesSync`\n- async: `exists`, `readText`, `readTextPrefix`, `writeText`, `rename`, `unlink`, `openWriter`\n\nImplementations:\n\n- `FileSessionStorage`: real filesystem (Bun + node fs)\n- `MemorySessionStorage`: map-backed in-memory implementation for tests/non-persistent sessions\n\n`SessionStorageWriter` exposes `writeLine`, `flush`, `fsync`, `close`, `getError`.\n\n## Session Discovery Utilities\n\nDefined in `session-manager.ts`:\n\n- `getRecentSessions(sessionDir, limit)` -> lightweight metadata for UI/session picker\n- `findMostRecentSession(sessionDir)` -> newest by mtime\n- `list(cwd, sessionDir?)` -> sessions in one project scope\n- `listAll()` -> sessions across all project scopes under `~/.omp/agent/sessions`\n\nMetadata extraction reads only a prefix (`readTextPrefix(..., 4096)`) where possible.\n\n## Related but Distinct: Prompt History Storage\n\n`HistoryStorage` (`history-storage.ts`) is a separate SQLite subsystem for prompt recall/search, not session replay.\n\n- DB: `~/.omp/agent/history.db`\n- Table: `history(id, prompt, created_at, cwd)`\n- FTS5 index: `history_fts` with trigger-maintained sync\n- Deduplicates consecutive identical prompts using in-memory last-prompt cache\n- Async insertion (`setImmediate`) so prompt capture does not block turn execution\n\nUse session files for conversation graph/state replay; use `HistoryStorage` for prompt history UX.\n",
|
|
51
51
|
"skills.md": "# Skills\n\nSkills are file-backed capability packs discovered at startup and exposed to the model as:\n\n- lightweight metadata in the system prompt (name + description)\n- on-demand content via the `read` tool against `skill://...`\n- optional interactive `/skill:<name>` commands\n\nThis document covers current runtime behavior in `src/extensibility/skills.ts`, `src/discovery/builtin.ts`, `src/internal-urls/skill-protocol.ts`, and `src/discovery/agents-md.ts`.\n\n## What a skill is in this codebase\n\nA discovered skill is represented as:\n\n- `name`\n- `description`\n- `filePath` (the `SKILL.md` path)\n- `baseDir` (skill directory)\n- source metadata (`provider`, `level`, path)\n\nThe runtime only requires `name` and `path` for validity. In practice, matching quality depends on `description` being meaningful.\n\n## Required layout and SKILL.md expectations\n\n### Directory layout\n\nFor provider-based discovery (native/Claude/Codex/Agents/plugin providers), skills are discovered as **one level under `skills/`**:\n\n- `<skills-root>/<skill-name>/SKILL.md`\n\nNested patterns like `<skills-root>/group/<skill>/SKILL.md` are not discovered by provider loaders.\n\nFor `skills.customDirectories`, scanning uses the same non-recursive layout (`*/SKILL.md`).\n\n```text\nProvider-discovered layout (non-recursive under skills/):\n\n<root>/skills/\n ├─ postgres/\n │ └─ SKILL.md ✅ discovered\n ├─ pdf/\n │ └─ SKILL.md ✅ discovered\n └─ team/\n └─ internal/\n └─ SKILL.md ❌ not discovered by provider loaders\n\nCustom-directory scanning is also non-recursive, so nested paths are ignored unless you point `customDirectories` at that nested parent.\n```\n\n### `SKILL.md` frontmatter\n\nSupported frontmatter fields on the skill type:\n\n- `name?: string`\n- `description?: string`\n- `globs?: string[]`\n- `alwaysApply?: boolean`\n- additional keys are preserved as unknown metadata\n\nCurrent runtime behavior:\n\n- `name` defaults to the skill directory name\n- `description` is required for:\n - native `.omp` provider skill discovery (`requireDescription: true`)\n - `skills.customDirectories` scans via `scanSkillsFromDir` in `src/discovery/helpers.ts` (non-recursive)\n- non-native providers can load skills without description\n\n## Discovery pipeline\n\n`discoverSkills()` in `src/extensibility/skills.ts` does two passes:\n\n1. **Capability providers** via `loadCapability(\"skills\")`\n2. **Custom directories** via `scanSkillsFromDir(..., { requireDescription: true })` (one-level directory enumeration)\n\nIf `skills.enabled` is `false`, discovery returns no skills.\n\n### Built-in skill providers and precedence\n\nProvider ordering is priority-first (higher wins), then registration order for ties.\n\nCurrent registered skill providers:\n\n1. `native` (priority 100) — `.omp` user/project skills via `src/discovery/builtin.ts`\n2. `claude` (priority 80)\n3. priority 70 group (in registration order):\n - `claude-plugins`\n - `agents`\n - `codex`\n4. `opencode` (priority 55)\n Dedup key is skill name. First item with a given name wins.\n\n### Source toggles and filtering\n\n`discoverSkills()` applies these controls:\n\n- source toggles: `enableCodexUser`, `enableClaudeUser`, `enableClaudeProject`, `enablePiUser`, `enablePiProject`\n- `disabledExtensions` entries with `skill:<name>`\n- `ignoredSkills` (exclude)\n- `includeSkills` (include allowlist; empty means include all)\n\nFilter order is:\n\n1. not disabled by `disabledExtensions`\n2. source enabled\n3. not ignored\n4. included (if include list present)\n For providers other than codex/claude/native (for example `agents`, `claude-plugins`, `opencode`), enablement currently falls back to: enabled if **any** built-in source toggle is enabled.\n\n### Collision and duplicate handling\n\n- Capability dedup already keeps first skill per name (highest-precedence provider)\n- `extensibility/skills.ts` additionally:\n - de-duplicates identical files by `realpath` (symlink-safe)\n - emits collision warnings when a later skill name conflicts\n - keeps the convenience `discoverSkillsFromDir({ dir, source })` API as a thin adapter over `scanSkillsFromDir`\n- Custom-directory skills are merged after provider skills and follow the same collision behavior\n\n## Runtime usage behavior\n\n### System prompt exposure\n\nSystem prompt construction (`src/system-prompt.ts`) uses discovered skills as follows:\n\n- if `read` tool is available:\n - include discovered skills list in prompt\n- otherwise:\n - omit discovered list\n\nTask tool subagents receive the session's discovered/provided skills list via normal session creation; there is no per-task skill pinning override.\n\n### Interactive `/skill:<name>` commands\n\nIf `skills.enableSkillCommands` is true, interactive mode registers one slash command per discovered skill.\n\n`/skill:<name> [args]` behavior:\n\n- reads the skill file directly from `filePath`\n- strips frontmatter\n- injects skill body as a custom message\n- delivery mode follows the **submission keybinding**:\n - **Enter** → invokes the skill on the `steer` queue while streaming (matches free-text Enter, which also steers), or as a normal idle prompt when the agent is not streaming\n - **Ctrl+Enter** (`app.message.followUp`) → invokes the skill on the `followUp` queue while streaming, or as a normal idle prompt when the agent is not streaming\n- appends metadata (`Skill: <path>`, optional `User: <args>`)\n\nThere is no flag, mode-selector, or frontmatter knob to override this — the keybinding *is* the choice, identical to how free text is routed during streaming (`input-controller.ts:243-249` for Enter, `input-controller.ts:462-500` for Ctrl+Enter; both dispatch through `#invokeSkillCommand`).\n\n## `skill://` URL behavior\n\n`src/internal-urls/skill-protocol.ts` supports:\n\n- `skill://<name>` → resolves to that skill's `SKILL.md`\n- `skill://<name>/<relative-path>` → resolves inside that skill directory\n\n```text\nskill:// URL resolution\n\nskill://pdf\n -> <pdf-base>/SKILL.md\n\nskill://pdf/references/tables.md\n -> <pdf-base>/references/tables.md\n\nGuards:\n- reject absolute paths\n- reject `..` traversal\n- reject any resolved path escaping <pdf-base>\n```\n\nResolution details:\n\n- skill name must match exactly\n- relative paths are URL-decoded\n- absolute paths are rejected\n- path traversal (`..`) is rejected\n- resolved path must remain within `baseDir`\n- missing files return an explicit `File not found` error\n\nContent type:\n\n- `.md` => `text/markdown`\n- everything else => `text/plain`\n\nNo fallback search is performed for missing assets.\n\n## Skills vs AGENTS.md, commands, tools, hooks\n\n### Skills vs AGENTS.md\n\n- **Skills**: named, optional capability packs selected by task context or explicitly requested\n- **AGENTS.md/context files**: persistent instruction files loaded as context-file capability and merged by level/depth rules\n\n`src/discovery/agents-md.ts` specifically walks ancestor directories from `cwd` to discover standalone `AGENTS.md` files (up to depth 20), excluding hidden-directory segments.\n\n### Skills vs slash commands\n\n- **Skills**: model-readable knowledge/workflow content\n- **Slash commands**: user-invoked command entry points\n- `/skill:<name>` is a convenience wrapper that injects skill text; it does not change skill discovery semantics\n\n### Skills vs custom tools\n\n- **Skills**: documentation/workflow content loaded through prompt context and `read`\n- **Custom tools**: executable tool APIs callable by the model with schemas and runtime side effects\n\n### Skills vs hooks\n\n- **Skills**: passive content\n- **Hooks**: event-driven runtime interceptors that can block/modify behavior during execution\n\n## Practical authoring guidance tied to discovery logic\n\n- Put each skill in its own directory: `<skills-root>/<skill-name>/SKILL.md`\n- Always include explicit `name` and `description` frontmatter\n- Keep referenced assets under the same skill directory and access with `skill://<name>/...`\n- For nested taxonomy (`team/domain/skill`), point `skills.customDirectories` to the nested parent directory; scanning itself remains non-recursive\n- Avoid duplicate skill names across sources; first match wins by provider precedence\n",
|
|
52
|
-
"skills/authoring-extensions.md": "---\nname: authoring-extensions\ndescription: Use when creating a new omp extension. Covers ExtensionAPI, factory signature, tool/command/event registration, and local-dev testing.\n---\n\n# Authoring Extensions\n\nExtensions are the primary way to add capabilities to `oh-my-pi`. A single extension module can register tools the LLM can call, slash commands users can invoke, and event handlers that run throughout the session lifecycle — all from one TypeScript file.\n\n## Minimum viable extension\n\n```ts\nimport type { ExtensionAPI } from \"@oh-my-pi/pi-coding-agent\";\n\nexport default function (pi: ExtensionAPI) {\n pi.on(\"session_start\", async (_event, ctx) => {\n ctx.ui.notify(\"My extension loaded!\", \"info\");\n });\n}\n```\n\nThat is a working extension. Drop it into `~/.omp/agent/extensions/hello.ts` and restart omp to see the notification.\n\n## Full example\n\nThe following extension registers a slash command, a tool, and a session-start hook:\n\n```ts\nimport type { ExtensionAPI } from \"@oh-my-pi/pi-coding-agent\";\n\nexport default function myExtension(pi: ExtensionAPI) {\n const
|
|
52
|
+
"skills/authoring-extensions.md": "---\nname: authoring-extensions\ndescription: Use when creating a new omp extension. Covers ExtensionAPI, factory signature, tool/command/event registration, and local-dev testing.\n---\n\n# Authoring Extensions\n\nExtensions are the primary way to add capabilities to `oh-my-pi`. A single extension module can register tools the LLM can call, slash commands users can invoke, and event handlers that run throughout the session lifecycle — all from one TypeScript file.\n\n## Minimum viable extension\n\n```ts\nimport type { ExtensionAPI } from \"@oh-my-pi/pi-coding-agent\";\n\nexport default function (pi: ExtensionAPI) {\n pi.on(\"session_start\", async (_event, ctx) => {\n ctx.ui.notify(\"My extension loaded!\", \"info\");\n });\n}\n```\n\nThat is a working extension. Drop it into `~/.omp/agent/extensions/hello.ts` and restart omp to see the notification.\n\n## Full example\n\nThe following extension registers a slash command, a tool, and a session-start hook:\n\n```ts\nimport type { ExtensionAPI } from \"@oh-my-pi/pi-coding-agent\";\n\nexport default function myExtension(pi: ExtensionAPI) {\n const z = pi.zod;\n\n // Runs once when the session loads\n pi.on(\"session_start\", async (_event, ctx) => {\n ctx.ui.notify(`Session ready in ${ctx.cwd}`, \"info\");\n });\n\n // Slash command: /greet\n pi.registerCommand(\"greet\", {\n description: \"Send a greeting into the conversation\",\n handler: async (args, ctx) => {\n const name = args.trim() || \"world\";\n pi.sendMessage(\n {\n customType: \"greeting\",\n content: `Hello, ${name}!`,\n display: true,\n attribution: \"user\",\n },\n { triggerTurn: false }\n );\n ctx.ui.notify(`Greeted ${name}`, \"info\");\n },\n });\n\n // LLM-callable tool\n pi.registerTool({\n name: \"word_count\",\n label: \"Word Count\",\n description: \"Count the words in a string\",\n parameters: z.object({\n text: z.string().describe(\"Text to count\"),\n }),\n async execute(_id, params, _signal, _onUpdate, _ctx) {\n const count = params.text.split(/\\s+/).filter(Boolean).length;\n return {\n content: [{ type: \"text\", text: String(count) }],\n details: { count },\n };\n },\n });\n}\n```\n\n## Discovery path\n\nomp discovers extension modules in this order:\n\n1. **Project-scoped auto-discovery** — `<cwd>/.omp/extensions/`\n2. **User-scoped auto-discovery** — `~/.omp/agent/extensions/`\n3. **Marketplace-installed plugins** — `~/.omp/plugins/node_modules/` (extensions shipped inside installed plugin packages)\n4. **CLI flag** — `omp --extension ./my-ext.ts` (also `-e`; `--hook` is treated as an alias)\n5. **Settings `extensions` array** — paths listed in `~/.omp/agent/config.yml` or `<cwd>/.omp/settings.json`\n\nWithin each scope, de-duplication is by resolved absolute path — first seen wins.\n\nWhen a path points to a directory, omp resolves the entry point in this order:\n\n1. `package.json` with `omp.extensions` (or legacy `pi.extensions`) field\n2. `index.ts`\n3. `index.js`\n4. One-level scan for `*.ts` / `*.js` files and subdir `index.*` / `package.json` manifests\n\n## package.json manifest\n\nTo package an extension as an installable plugin, add an `omp` field to `package.json`:\n\n```json\n{\n \"name\": \"my-omp-extension\",\n \"omp\": {\n \"extensions\": [\"./src/main.ts\"]\n }\n}\n```\n\nThe legacy `pi` key is also accepted for backwards compatibility:\n\n```json\n{\n \"pi\": {\n \"extensions\": [\"./index.ts\"]\n }\n}\n```\n\nMultiple entry points are supported:\n\n```json\n{\n \"omp\": {\n \"extensions\": [\"./src/safety.ts\", \"./src/tools.ts\"]\n }\n}\n```\n\n## Registering commands\n\n```ts\npi.registerCommand(\"my-cmd\", {\n description: \"What the command does\",\n handler: async (args, ctx) => {\n // args: everything the user typed after /my-cmd\n // ctx: ExtensionCommandContext — includes ctx.ui, ctx.cwd, session controls\n ctx.ui.notify(\"Running!\", \"info\");\n await ctx.waitForIdle();\n await ctx.newSession();\n },\n});\n```\n\n`ExtensionCommandContext` session-control methods (safe to call from commands only):\n\n| Method | Effect |\n|---|---|\n| `waitForIdle()` | Wait for the agent to finish streaming |\n| `newSession(opts?)` | Open a fresh session |\n| `switchSession(path)` | Switch to an existing session file |\n| `branch(entryId)` | Fork from a specific history entry |\n| `navigateTree(id, opts?)` | Jump to a different point in the session tree |\n| `reload()` | Reload the session runtime |\n| `compact(opts?)` | Compact the current context |\n\n## Registering tools\n\nTools are called by the LLM. Parameters use [Zod](https://zod.dev) schemas, available at `pi.zod`:\n\n```ts\nconst z = pi.zod;\n\npi.registerTool({\n name: \"search_notes\", // snake_case, unique\n label: \"Search Notes\", // human-readable label for TUI\n description: \"Full-text search through project notes\",\n parameters: z.object({\n query: z.string().describe(\"Search query\"),\n limit: z.number().default(10).describe(\"Max results\").optional(),\n }),\n async execute(toolCallId, params, signal, onUpdate, ctx) {\n if (signal?.aborted) {\n return { content: [{ type: \"text\", text: \"Cancelled\" }] };\n }\n onUpdate?.({ content: [{ type: \"text\", text: \"Searching...\" }] });\n // ... do work ...\n return {\n content: [{ type: \"text\", text: `Found N results for \"${params.query}\"` }],\n details: { query: params.query, count: 0 },\n };\n },\n});\n```\n\n## Subscribing to events\n\n```ts\npi.on(\"tool_call\", async (event, ctx) => {\n // event.toolName, event.input, event.toolCallId\n if (event.toolName !== \"bash\") return;\n\n const command = String((event.input as { command?: unknown }).command ?? \"\");\n if (command.includes(\"rm -rf /\")) {\n return { block: true, reason: \"Blocked by safety policy\" };\n }\n});\n\npi.on(\"turn_end\", async (_event, ctx) => {\n ctx.ui.setStatus(\"tokens\", `~${ctx.getContextUsage()?.tokens ?? \"?\"} tokens`);\n});\n```\n\nFull event catalog: see [hooks authoring guide](./authoring-hooks.md).\n\n## Extension vs hook — when to use which\n\n| Need | Use |\n|---|---|\n| Tools + commands + events in one module | **Extension** (`ExtensionAPI`) |\n| Pure event interception (policy, redaction) | **Extension** or **Hook** (both work; extension is preferred) |\n| Legacy hook module already exists | **Hook** (`HookAPI` from `@oh-my-pi/pi-coding-agent/extensibility/hooks`) |\n| Registering provider / custom message renderer | **Extension only** |\n| Shipping as a marketplace plugin | **Extension** (use `package.json` manifest) |\n\nExtensions are a strict superset of hooks. New authoring should use `ExtensionAPI`.\n\n## Debugging\n\nStart omp with `--log-level debug` to see extension load messages:\n\n```\nomp --log-level debug\n```\n\nWatch for lines like:\n\n```\n[extension-loader] loading /home/you/.omp/agent/extensions/my-ext.ts\n[extension-loader] loaded: my-ext (1 tool, 1 command, 2 handlers)\n```\n\nTo temporarily disable a specific extension by name without removing the file:\n\n```yaml\n# ~/.omp/agent/config.yml\ndisabledExtensions:\n - extension-module:my-ext\n```\n\nThe derived name is the filename stem (or directory name for `index.ts`-style entries): `/path/to/my-ext.ts` → `my-ext`.\n\n## Important constraints\n\n- **Do not call runtime actions during load.** Methods like `pi.sendMessage()` throw `ExtensionRuntimeNotInitializedError` if called synchronously during module evaluation (before a session is active). Register handlers/tools/commands during load; perform runtime actions only from event handlers, tools, or commands.\n- **`tool_call` errors are fail-closed.** If a `tool_call` handler throws, the tool is blocked.\n- **Command names must not clash with built-ins.** Conflicts are skipped with a diagnostic log.\n- **Reserved shortcuts are ignored** (`ctrl+c`, `ctrl+d`, `ctrl+z`, `ctrl+k`, `ctrl+p`, `ctrl+l`, `ctrl+o`, `ctrl+t`, `ctrl+g`, `shift+tab`, `shift+ctrl+p`, `alt+enter`, `escape`, `enter`).\n\n## Further reading\n\n- `docs/extensions.md` — runtime internals and full API surface reference\n- `docs/extension-loading.md` — detailed path resolution rules\n- `docs/hooks.md` — hook subsystem internals\n- `docs/skills/examples/hello-extension/` — complete working example\n",
|
|
53
53
|
"skills/authoring-hooks.md": "---\nname: authoring-hooks\ndescription: Use when creating a new omp hook. Covers HookAPI, event catalog, blocking/overriding tool calls, and context modification.\n---\n\n# Authoring Hooks\n\nHooks are event-driven interceptors that run alongside the agent loop. They are best used for cross-cutting concerns: safety policy, secret redaction, context pruning, audit logging. A hook module registers handlers via `pi.on(event, handler)` and can block tool execution, override tool output, or rewrite the message context before each LLM call.\n\n> **Relationship to extensions:** The hook subsystem (`HookAPI`) is the legacy API. The extension runner now handles everything hooks can do plus more. `ExtensionAPI` supports the hook event model plus extension-only events. Use `ExtensionAPI` for new work; use `HookAPI` only if you are maintaining an existing hook module.\n\n## Factory signature\n\n```ts\nimport type { HookAPI } from \"@oh-my-pi/pi-coding-agent/extensibility/hooks\";\n\nexport default function myHook(omp: HookAPI): void {\n omp.on(\"tool_call\", async (event, ctx) => {\n // intercept every tool call\n });\n}\n```\n\nThe default export must be a plain function (not async, not a class). It receives a `HookAPI` instance and must register all handlers synchronously during execution.\n\nAlternatively, using `ExtensionAPI` (preferred):\n\n```ts\nimport type { ExtensionAPI } from \"@oh-my-pi/pi-coding-agent\";\n\nexport default function myExtension(pi: ExtensionAPI): void {\n pi.on(\"tool_call\", async (event, ctx) => { /* ... */ });\n}\n```\n\n## Event catalog\n\n### Tool lifecycle\n\n| Event | Fires | Can return |\n|---|---|---|\n| `tool_call` | Before every tool execution | `{ block?: boolean; reason?: string }` |\n| `tool_result` | After every tool execution | `{ content?; details?; isError?: boolean }` |\n\n### Session lifecycle\n\n| Event | Fires | Can return |\n|---|---|---|\n| `session_start` | On initial session load | — |\n| `session_before_switch` | Before session switch | `{ cancel?: boolean }` |\n| `session_switch` | After session switch | — |\n| `session_before_branch` | Before session branch | `{ cancel?: boolean; skipConversationRestore?: boolean }` |\n| `session_branch` | After session branch | — |\n| `session_before_compact` | Before compaction | `{ cancel?: boolean; compaction?: CompactionResult }` |\n| `session.compacting` | During compaction (inject context) | `{ context?: string[]; prompt?: string; preserveData?: Record<string, unknown> }` |\n| `session_compact` | After compaction | — |\n| `session_before_tree` | Before tree navigation | `{ cancel?: boolean; summary?: { summary: string; details?: unknown } }` |\n| `session_tree` | After tree navigation | — |\n| `session_shutdown` | On session shutdown | — |\n\n### Agent/turn lifecycle\n\n| Event | Fires | Can return |\n|---|---|---|\n| `before_agent_start` | Before agent starts a turn | `{ message?: { customType; content; display; details; attribution? } }` |\n| `agent_start` | Agent streaming starts | — |\n| `agent_end` | Agent streaming ends | — |\n| `turn_start` | Start of a user→agent turn | — |\n| `turn_end` | End of a user→agent turn | — |\n| `context` | Before each LLM API call | `{ messages?: Message[] }` |\n| `auto_compaction_start` | Auto-compaction begins | — |\n| `auto_compaction_end` | Auto-compaction ends | — |\n| `auto_retry_start` | Auto-retry begins | — |\n| `auto_retry_end` | Auto-retry ends | — |\n| `ttsr_triggered` | TTSR (too-short response) triggered | — |\n| `todo_reminder` | Todo reminder fires | — |\n\nExtension-only events such as `tool_execution_start`, `tool_execution_update`, `tool_execution_end`, `input`, `user_bash`, and `user_python` require `ExtensionAPI`.\n\n## Pre-tool blocking contract\n\nReturn `{ block: true, reason: \"...\" }` from a `tool_call` handler to prevent execution:\n\n```ts\nomp.on(\"tool_call\", async (event, ctx) => {\n if (event.toolName === \"bash\") {\n const cmd = String(event.input.command ?? \"\");\n if (/\\brm\\s+-rf\\s+\\//.test(cmd)) {\n return { block: true, reason: \"Refusing to delete root filesystem\" };\n }\n }\n});\n```\n\nContract:\n\n- If **any** handler returns `{ block: true }`, execution stops immediately.\n- `reason` is returned to the LLM as the tool error text.\n- If a handler **throws**, the tool is also blocked (fail-closed).\n- Last non-blocking return wins for non-blocking results; first `block: true` short-circuits.\n\n## Post-tool override contract\n\nReturn `{ content, details, isError }` from a `tool_result` handler to patch what the LLM sees:\n\n```ts\nomp.on(\"tool_result\", async (event, ctx) => {\n if (event.toolName === \"read\" && !event.isError) {\n const redacted = event.content.map(chunk => {\n if (chunk.type !== \"text\") return chunk;\n return {\n ...chunk,\n text: chunk.text.replace(/(?:sk|pk)-[a-zA-Z0-9]{20,}/g, \"[REDACTED_API_KEY]\"),\n };\n });\n return { content: redacted };\n }\n});\n```\n\nContract:\n\n- Handlers run in registration order. For `HookAPI`, each handler receives the original tool result event, and the last returned override wins.\n- `content` replaces the full content array for the LLM.\n- `details` replaces the structured details object.\n- `isError` overrides the error flag (typed, but note: `HookToolWrapper` behavior for error path rethrows regardless).\n- On a tool failure, `tool_result` is still emitted with `isError: true`; the original error is rethrown after handlers complete.\n\n## Context modification contract\n\nReturn `{ messages: [...] }` from a `context` handler to rewrite the message list before each LLM API call:\n\n```ts\nomp.on(\"context\", async (event, ctx) => {\n // Remove debug-only custom messages from LLM context\n const filtered = event.messages.filter(\n msg => !(msg.role === \"custom\" && msg.customType === \"debug-only\")\n );\n return { messages: filtered };\n});\n```\n\nContract:\n\n- `event.messages` is the current accumulated list.\n- Handlers run in order; each receives the output of the previous handler.\n- Return `undefined` (or nothing) to pass messages through unmodified.\n\n## Three complete examples\n\n### 1. rm-rf blocker\n\n```ts\nimport type { HookAPI } from \"@oh-my-pi/pi-coding-agent/extensibility/hooks\";\n\nexport default function rmRfBlocker(omp: HookAPI): void {\n omp.on(\"tool_call\", async (event, ctx) => {\n if (event.toolName !== \"bash\") return;\n\n const cmd = String(event.input.command ?? \"\");\n if (!/\\brm\\s+-rf\\s+\\//.test(cmd)) return;\n\n // Allow if user explicitly confirms (interactive mode only)\n if (ctx.hasUI) {\n const allow = await ctx.ui.confirm(\n \"Dangerous command\",\n `This command deletes from root:\\n${cmd}\\n\\nProceed?`\n );\n if (allow) return;\n }\n\n return { block: true, reason: \"rm -rf / blocked by safety policy\" };\n });\n}\n```\n\n### 2. API-key redactor\n\n```ts\nimport type { HookAPI } from \"@oh-my-pi/pi-coding-agent/extensibility/hooks\";\n\n// Matches common API key patterns: sk-..., pk-..., AKIA..., ghp_..., etc.\nconst SECRET_PATTERNS = [\n /\\b(sk|pk)-[a-zA-Z0-9]{20,}\\b/g,\n /\\bAKIA[A-Z0-9]{16}\\b/g,\n /\\bghp_[a-zA-Z0-9]{36}\\b/g,\n /\\b[a-zA-Z0-9_-]{20,}\\s*=\\s*[\"']?[a-zA-Z0-9._/+=-]{20,}[\"']?/g,\n];\n\nexport default function apiKeyRedactor(omp: HookAPI): void {\n omp.on(\"tool_result\", async (event) => {\n if (event.isError) return;\n\n let changed = false;\n const redacted = event.content.map(chunk => {\n if (chunk.type !== \"text\") return chunk;\n let text = chunk.text;\n for (const pattern of SECRET_PATTERNS) {\n const next = text.replace(pattern, \"[REDACTED]\");\n if (next !== text) { changed = true; text = next; }\n }\n return { ...chunk, text };\n });\n\n if (changed) return { content: redacted };\n });\n}\n```\n\n### 3. Context filter\n\n```ts\nimport type { HookAPI } from \"@oh-my-pi/pi-coding-agent/extensibility/hooks\";\n\nexport default function contextFilter(omp: HookAPI): void {\n omp.on(\"context\", async (event) => {\n const MAX_TOOL_OUTPUT_CHARS = 8_000;\n\n const trimmed = event.messages.map(msg => {\n // Truncate very large tool results to keep context manageable\n if (msg.role !== \"tool\") return msg;\n const content = msg.content.map(chunk => {\n if (chunk.type !== \"text\" || chunk.text.length <= MAX_TOOL_OUTPUT_CHARS) return chunk;\n return {\n ...chunk,\n text: chunk.text.slice(0, MAX_TOOL_OUTPUT_CHARS) + \"\\n[... truncated by context-filter hook]\",\n };\n });\n return { ...msg, content };\n });\n\n return { messages: trimmed };\n });\n}\n```\n\n## UI methods in hook context\n\n`ctx.ui` is a `HookUIContext`. Available methods:\n\n| Method | Description |\n|---|---|\n| `notify(message, type?)` | Show an in-app notification |\n| `setStatus(key, text)` | Set footer status text (keyed, sorted by key) |\n| `select(title, options)` | Show a selection dialog |\n| `confirm(title, message)` | Show a yes/no dialog |\n| `input(title, placeholder?)` | Show a text input dialog |\n| `editor(title, prefill?, { signal }?)` | Show a multi-line editor |\n| `setEditorText(text)` | Set the input editor content |\n| `getEditorText()` | Get current input editor content |\n| `custom(factory)` | Render a custom TUI component |\n| `theme` | Current theme object |\n\n`ctx.hasUI` is `false` in headless/print/subagent mode — always guard interactive calls.\n\n## Further reading\n\n- `docs/hooks.md` — hook subsystem internals, ordering rules, error propagation\n- `docs/extensions.md` — `ExtensionAPI` (superset of `HookAPI`)\n- `docs/skills/examples/safety-hook/` — complete working example\n",
|
|
54
54
|
"skills/authoring-marketplaces.md": "---\nname: authoring-marketplaces\ndescription: Use when creating a new omp marketplace. Covers marketplace.json schema, source types, install commands, and publishing.\n---\n\n# Authoring Marketplaces\n\nA marketplace is a Git repository (or local directory) that contains a catalog file at `.claude-plugin/marketplace.json`. Anyone can author one. Users add it with `/marketplace add owner/repo` and then install individual plugins from it.\n\n## Minimum viable marketplace\n\n```\nmy-marketplace/\n .claude-plugin/\n marketplace.json\n plugins/\n my-plugin/\n package.json\n index.ts\n```\n\n```json\n{\n \"name\": \"my-marketplace\",\n \"owner\": { \"name\": \"Your Name\" },\n \"plugins\": [\n {\n \"name\": \"my-plugin\",\n \"description\": \"What it does\",\n \"source\": \"./plugins/my-plugin\"\n }\n ]\n}\n```\n\nPush to GitHub. Users install with:\n\n```\n/marketplace add your-github-username/my-marketplace\n/marketplace install my-plugin@my-marketplace\n```\n\n## marketplace.json schema\n\nThe catalog file must live at `.claude-plugin/marketplace.json` in the repository root.\n\n### Top-level fields\n\n| Field | Required | Description |\n|---|---|---|\n| `name` | yes | Marketplace name. Lowercase alphanumeric, hyphens, dots. Must start and end with alphanumeric. Max 64 chars. |\n| `owner` | yes | Object with at minimum `owner.name` (string) |\n| `owner.name` | yes | Marketplace owner name |\n| `owner.email` | no | Owner contact email |\n| `description` | no | Short description of the marketplace |\n| `plugins` | yes | Array of plugin entries (see below) |\n| `metadata.pluginRoot` | no | String prepended to all relative plugin source paths |\n\n### Plugin entry fields\n\n| Field | Required | Description |\n|---|---|---|\n| `name` | yes | Plugin name (same naming rules as marketplace name) |\n| `source` | yes | Where to find the plugin — string or object (see source types below) |\n| `description` | no | Short plugin description |\n| `version` | no | Version string |\n| `author` | no | `{ name, email? }` |\n| `homepage` | no | URL |\n| `category` | no | e.g. `development`, `productivity`, `security` |\n| `tags` | no | Array of string tags |\n\n### Full catalog example\n\n```json\n{\n \"$schema\": \"https://anthropic.com/claude-code/marketplace.schema.json\",\n \"name\": \"acme-plugins\",\n \"owner\": {\n \"name\": \"Acme Corp\",\n \"email\": \"plugins@acme.example\"\n },\n \"description\": \"Official Acme plugins for oh-my-pi\",\n \"plugins\": [\n {\n \"name\": \"acme-linter\",\n \"description\": \"Enforce Acme coding standards\",\n \"category\": \"development\",\n \"source\": \"./plugins/linter\"\n },\n {\n \"name\": \"acme-deploy\",\n \"description\": \"One-command deploy to Acme cloud\",\n \"category\": \"devops\",\n \"source\": {\n \"source\": \"github\",\n \"repo\": \"acme-corp/omp-deploy-plugin\",\n \"ref\": \"main\"\n }\n }\n ]\n}\n```\n\n## Plugin source types\n\n### 1. Relative path string\n\nPoints to a subdirectory inside the marketplace repository itself. Must start with `./`.\n\n```json\n\"source\": \"./plugins/my-plugin\"\n```\n\nThe path is resolved relative to the marketplace repository root. Path traversal outside the repo root is rejected.\n\nUse `metadata.pluginRoot` to avoid repeating a common prefix:\n\n```json\n{\n \"metadata\": { \"pluginRoot\": \"./plugins\" },\n \"plugins\": [\n { \"name\": \"plugin-a\", \"source\": \"./plugin-a\" },\n { \"name\": \"plugin-b\", \"source\": \"./plugin-b\" }\n ]\n}\n```\n\n### 2. Git URL\n\nA full Git repository URL. Optionally pin to a branch/tag (`ref`) or exact commit (`sha`):\n\n```json\n\"source\": {\n \"source\": \"url\",\n \"url\": \"https://github.com/org/my-plugin.git\",\n \"ref\": \"main\",\n \"sha\": \"a1b2c3d4...\"\n}\n```\n\n### 3. GitHub shorthand\n\nShorthand for GitHub repositories. Functionally equivalent to a Git URL but more concise:\n\n```json\n\"source\": {\n \"source\": \"github\",\n \"repo\": \"org/my-plugin\",\n \"ref\": \"v2.1.0\",\n \"sha\": \"a1b2c3d4...\"\n}\n```\n\n### 4. Git subdirectory (monorepo)\n\nFor plugins living inside a subdirectory of a larger repository. `url` accepts a full HTTPS URL or a GitHub `owner/repo` shorthand:\n\n```json\n\"source\": {\n \"source\": \"git-subdir\",\n \"url\": \"https://github.com/org/monorepo.git\",\n \"path\": \"packages/my-plugin\",\n \"ref\": \"main\",\n \"sha\": \"a1b2c3d4...\"\n}\n```\n\nThe `path` must resolve inside the cloned repository — directory escape is rejected.\n\n### 5. NPM package\n\nDeclares the plugin as an npm package. `version` is optional:\n\n```json\n\"source\": {\n \"source\": \"npm\",\n \"package\": \"@acme/omp-plugin\",\n \"version\": \"1.2.0\"\n}\n```\n\n> Note: npm plugin sources are declared in the schema but installation support is not yet fully implemented. Use Git-based sources for plugins that need to work today.\n\n## Plugin structure\n\nEach plugin directory (regardless of source type) should contain:\n\n```\nmy-plugin/\n package.json ← required: declares omp.extensions entry points\n src/\n main.ts ← extension factory\n README.md ← recommended: description + usage\n```\n\nMinimum `package.json`:\n\n```json\n{\n \"name\": \"my-plugin\",\n \"omp\": {\n \"extensions\": [\"./src/main.ts\"]\n }\n}\n```\n\n## Install command\n\n```\n/marketplace install name@marketplace-name\n/marketplace install --force name@marketplace-name # reinstall\n/marketplace install --scope project name@marketplace # project-scoped\n```\n\nCLI equivalent:\n\n```\nomp plugin marketplace add owner/repo\nomp plugin install name@marketplace-name\n```\n\nScope behavior:\n\n- **user** (default) — installed in `~/.omp/plugins/installed_plugins.json`, available in all projects\n- **project** — installed in `<project>/.omp/plugins/installed_plugins.json`, available only in that project\n\nProject-scoped installs shadow user-scoped installs of the same plugin name.\n\n## Naming rules\n\nMarketplace names and plugin names must:\n\n- Contain only lowercase letters, digits, hyphens (`-`), and dots (`.`)\n- Start and end with a lowercase letter or digit\n- Be at most 64 characters\n\nPlugin IDs (`name@marketplace`) must be at most 128 characters total.\n\nValid: `my-plugin`, `code-review`, `acme.tools`, `ai-v2`\nInvalid: `-bad-start`, `bad-end-`, `.dot-start`, `Under_score`, `HAS_CAPS`\n\n## Publishing workflow\n\n1. Create `marketplace.json` at `.claude-plugin/marketplace.json` in a new Git repo.\n2. Add plugin entries pointing to subdirectories (or external sources).\n3. Push to GitHub.\n4. Share the `owner/repo` string. Users add it with `/marketplace add owner/repo`.\n5. When you update the catalog, users run `/marketplace update your-marketplace-name` to pull the latest.\n\nTo test locally before publishing:\n\n```\n/marketplace add ./path/to/my-marketplace\n```\n\nLocal path sources also accept `~/` and absolute paths.\n\n## Further reading\n\n- `docs/marketplace.md` — marketplace system internals, on-disk layout, command reference\n- `docs/skills/authoring-extensions.md` — how to author the extension modules inside plugins\n- `docs/skills/examples/mini-marketplace/` — minimal working marketplace example\n",
|
|
55
55
|
"skills/examples/hello-extension/README.md": "# hello-extension\n\nA minimal `oh-my-pi` extension that demonstrates the two most common authoring patterns: subscribing to `session_start` to notify on load, and registering a `/hello` slash command that sends a greeting into the conversation. It is intentionally small — use it as a copy-paste starting point for your own extension.\n\n## Install\n\n**Option A — drop into user extensions directory:**\n\n```\ncp -r . ~/.omp/agent/extensions/hello-extension\n```\n\nRestart `omp`. You will see the startup notification immediately.\n\n**Option B — point the settings `extensions` array at it:**\n\n```yaml\n# ~/.omp/agent/config.yml\nextensions:\n - /path/to/hello-extension\n```\n\n**Option C — load once via CLI flag:**\n\n```\nomp --extension ./hello-extension\n```\n\n## Usage\n\nAfter loading, type `/hello` in the omp prompt to trigger the notification.\n\n## What it demonstrates\n\n- Default export factory receiving `ExtensionAPI`\n- `pi.on(\"session_start\", ...)` — session lifecycle hook\n- `pi.registerCommand(...)` — slash command registration\n- `ctx.ui.notify(...)` — user-facing notification\n- `package.json` with `omp.extensions` manifest field\n",
|
|
@@ -61,27 +61,27 @@ export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
|
|
|
61
61
|
"tools/ask.md": "# ask\n\n> Prompts the interactive user for one or more choices or free-form answers.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/ask.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/ask.md`\n- Key collaborators:\n - `packages/coding-agent/src/config/settings-schema.ts` — `ask.timeout` / `ask.notify` defaults\n - `packages/coding-agent/src/modes/theme/theme.ts` — checkbox and tree glyphs for TUI rendering\n - `packages/coding-agent/src/tui.ts` — status-line rendering\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `questions` | `Question[]` | Yes | One or more questions. Empty arrays are rejected by schema and also guarded at runtime. |\n\n### `Question`\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `id` | `string` | Yes | Stable identifier used in multi-question results. |\n| `question` | `string` | Yes | Prompt text shown to the user. |\n| `options` | `{ label: string }[]` | Yes | Explicit options. The UI always appends `Other (type your own)`; callers must not include it. |\n| `multi` | `boolean` | No | Enables multi-select mode. Default: `false`. |\n| `recommended` | `number` | No | Zero-based recommended option index. In single-select mode the label gets ` (Recommended)` appended in the UI. |\n\n## Outputs\n- Single-shot result.\n- `content[0].text` is plain text:\n - single question: `User selected: ...` and/or `User provided custom input: ...`\n - multiple questions: `User answers:` followed by one line per `id`\n- `details`:\n - single question: `{ question, options, multi, selectedOptions, customInput? }`\n - multiple questions: `{ results: QuestionResult[] }`, where each item includes `id`, `question`, `options`, `multi`, `selectedOptions`, and optional `customInput`\n- Cancellation and headless cases throw instead of returning a structured success result.\n\n## Flow\n1. `AskTool.createIf()` only registers the tool when `session.hasUI` is true; headless sessions never get it.\n2. `execute()` requires `context.ui`; if missing it aborts the context and throws `ToolAbortError(\"Ask tool requires interactive mode\")`.\n3. It reads `ask.timeout` from settings, converts seconds to milliseconds, and disables timeout entirely while plan mode is enabled (`packages/coding-agent/src/tools/ask.ts`).\n4. If `ask.notify` is not `off`, it sends a terminal notification: `Waiting for input`.\n5. For each question, `askSingleQuestion()` drives either:\n - single-select list + optional editor for `Other`\n - multi-select checkbox loop + `Done selecting` sentinel + optional editor for `Other`\n6. In multi-question mode, left/right arrow handlers enable back/forward navigation between questions and preserve prior selections.\n7. If a timeout fires before any selection/custom input, the tool auto-selects the recommended option, or the first option when no valid `recommended` index exists.\n8. If the user cancels without timeout, `execute()` aborts the tool context and throws `ToolAbortError(\"Ask tool was cancelled by the user\")`.\n9. On success it formats human-readable text plus structured `details`; the TUI renderer uses `details` for rich display.\n\n## Modes / Variants\n- Single question: returns flattened `details` fields for one question.\n- Multiple questions: returns `details.results[]` and allows back/forward navigation across questions.\n- Single-select: one option or custom input.\n- Multi-select: toggled checkbox list, `Done selecting` sentinel only when forward navigation is not active.\n\n## Side Effects\n- User-visible prompts / interactive UI\n - Opens a selection dialog via `context.ui.select(...)`.\n - Opens a text editor dialog via `context.ui.editor(...)` for `Other`.\n - Sends a terminal notification unless `ask.notify=off`.\n- Session state\n - Reads plan-mode state to disable timeouts.\n - Calls `context.abort()` on headless use or user cancellation.\n- Background work / cancellation\n - Wraps UI waits in `untilAborted(...)` so abort signals interrupt pending dialogs.\n\n## Limits & Caps\n- `questions` must contain at least 1 item (`askSchema` in `packages/coding-agent/src/tools/ask.ts`).\n- `ask.timeout` default is `30` seconds; `0` disables timeout (`packages/coding-agent/src/config/settings-schema.ts`).\n- Prompt guidance says provide 2-5 options, but code does not enforce that (`packages/coding-agent/src/prompts/tools/ask.md`).\n- Timeout only applies to the option picker; once the user chooses `Other`, the editor has no timeout (`packages/coding-agent/src/prompts/tools/ask.md`).\n\n## Errors\n- Missing interactive UI: throws `ToolAbortError(\"Ask tool requires interactive mode\")`.\n- User cancels picker/editor without timeout: throws `ToolAbortError(\"Ask tool was cancelled by the user\")`.\n- Abort signal during input: converted to `ToolAbortError(\"Ask input was cancelled\")`.\n- Empty `questions` at runtime returns a text error payload instead of throwing: `Error: questions must not be empty`.\n\n## Notes\n- `recommended` is only a UI hint; invalid indexes are ignored.\n- In single-select mode the returned `selectedOptions` value strips the appended ` (Recommended)` suffix.\n- Multi-select results preserve selection order by `Set` insertion order, not original option order after arbitrary toggles.\n- Option labels and prompt text are returned verbatim in `details`; the tool does not interpret them beyond UI affordances like `Other` and ` (Recommended)`.\n",
|
|
62
62
|
"tools/ast-edit.md": "# ast_edit\n\n> Preview and apply structural rewrites over source files via native ast-grep.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/ast-edit.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/ast-edit.md`\n- Key collaborators:\n - `crates/pi-natives/src/ast.rs` — native rewrite planning and file mutation\n - `crates/pi-natives/src/language/mod.rs` — language aliases and extension inference\n - `packages/coding-agent/src/tools/path-utils.ts` — path/glob parsing and multi-path resolution\n - `packages/coding-agent/src/tools/resolve.ts` — preview/apply queueing\n - `packages/coding-agent/src/tools/render-utils.ts` — parse-error dedupe and display caps\n - `packages/coding-agent/src/utils/file-display-mode.ts` — hashline vs line-number diff references\n - `packages/coding-agent/src/hashline/hash.ts` — stable hashline diff anchors\n - `packages/natives/native/index.d.ts` — JS-visible native binding contract\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `ops` | `{ pat: string; out: string }[]` | Yes | One or more rewrite rules. `pat` must be non-empty. Duplicate `pat` values fail before native execution. Empty `out` deletes the matched node. |\n| `paths` | `string[]` | Yes | One or more files, directories, globs, or internal URLs with backing files. Empty entries are rejected. Globs are forbidden for internal URLs. |\n\nShared AST pattern grammar and language catalog: see [`ast_grep`](./ast-grep.md#inputs).\n\n- `ast_edit` uses the same `$NAME`, `$_`, `$$$NAME`, and `$$$` metavariable semantics.\n- The tool prompt adds rewrite-specific constraints:\n - metavariable names must be uppercase and must stand for whole AST nodes,\n - captures from `pat` are substituted into `out`,\n - each rewrite is a 1:1 structural substitution; one capture cannot expand into multiple sibling nodes unless the grammar itself permits that expansion at that position.\n\n## Outputs\n- Single-shot preview result from `ast_edit` itself.\n- Model-facing `content` is one text block showing proposed edits, grouped by file for directory/multi-file runs.\n - Each change renders as two lines: `-REF|before` and `+REF|after` in hashline mode, or `-LINE:COLUMN before` / `+LINE:COLUMN after` when hashlines are off.\n - Only the first line of each `before`/`after` snippet is shown, truncated to 120 characters in the wrapper.\n - `Limit reached; narrow paths.` and formatted parse issues are appended when applicable.\n- If no rewrites match, text is `No replacements made` plus formatted parse issues when present.\n- `details` includes aggregate preview metadata:\n - `totalReplacements`, `filesTouched`, `filesSearched`, `applied`, `limitReached`\n - optional `parseErrors`, `scopePath`, `files`, `fileReplacements`, `displayContent`, `meta`\n- The tool always previews first (`applied: false` in the direct result). Actual file writes happen only later through `resolve(action: \"apply\", ...)`.\n- When preview produced replacements, `ast_edit` also queues a pending `resolve` action. Successful apply returns a separate `resolve` result, not another `ast_edit` result.\n\n## Flow\n1. `AstEditTool.execute()` validates each op in `packages/coding-agent/src/tools/ast-edit.ts`:\n - empty `pat` fails,\n - at least one op is required,\n - duplicate `pat` values fail,\n - ops are converted to a `Record<pattern, replacement>`.\n2. The wrapper reads `PI_MAX_AST_FILES` via `$envpos(..., 1000)` and uses that as the native `maxFiles` cap for both preview and apply.\n3. Path normalization, internal URL handling, missing-path partitioning, and multi-path resolution follow the same `path-utils.ts` flow as `ast_grep`.\n4. The wrapper stats the resolved base path to decide whether to render grouped directory output.\n5. `runAstEditOnce(...)` always runs native `astEdit(...)` with `dryRun: true` and `failOnParseError: false` on the first pass.\n6. Native `ast_edit` in `crates/pi-natives/src/ast.rs`:\n - normalizes the rewrite map and sorts rules by pattern string,\n - resolves strictness (`smart` by default),\n - collects candidate files from a file or gitignore-aware directory scan,\n - infers a single language for the whole call unless `lang` was supplied,\n - compiles every rewrite pattern for that language,\n - parses each file, skips files with syntax-error trees, collects `replace_by(...)` edits for every match, enforces replacement and file caps, and returns textual before/after slices plus source ranges.\n7. The TS wrapper deduplicates parse errors, groups changes by file, and renders preview diff lines.\n8. If preview found replacements and `applied` is false, `queueResolveHandler(...)` registers a forced `resolve` action and injects a `resolve-reminder` steering message.\n9. On `resolve(action: \"apply\")`, the queued callback reruns the same rewrite set with `dryRun: false`, recomputes counts, and rejects the apply as an error if the live result no longer matches the preview (`stalePreview`).\n10. On a non-stale apply, the callback returns `Applied N replacements in M files.`; on discard, `resolve` returns a discard message without mutating files.\n\n## Modes / Variants\n- Single file: preview or apply against one file.\n- Directory + optional glob: native scan walks the directory, then filters by compiled glob.\n- Multiple explicit paths/globs: wrapper unions them into one synthetic scope or runs per-target native calls when paths only meet at root.\n- Internal URL inputs: only supported when the router resolves them to a backing file path.\n- Preview mode: always the direct `ast_edit` tool result.\n- Apply mode: only reachable through the queued `resolve` callback after a preview.\n- Hashline output mode vs plain line/column mode: controlled by `resolveFileDisplayMode()`.\n\n## Side Effects\n- Filesystem\n - Preview reads files and scans directories.\n - Apply rewrites files in place with `std::fs::write(...)`, but only when the computed output differs from the original source.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Queues a one-shot forced `resolve` tool choice through `queueResolveHandler(...)`.\n - Adds a `resolve-reminder` steering message.\n- User-visible prompts / interactive UI\n - Direct `ast_edit` results are previews.\n - Follow-up apply/discard is exposed through the hidden `resolve` tool.\n- Background work / cancellation\n - Native preview/apply work runs on a blocking worker via `task::blocking(...)`.\n - Cancellation and optional native timeout are cooperative through `CancelToken::heartbeat()`.\n\n## Limits & Caps\n- File cap exposed by the wrapper: `PI_MAX_AST_FILES`, default `1000`, in `packages/coding-agent/src/tools/ast-edit.ts`.\n- Native `maxFiles` and `maxReplacements` are both clamped to at least `1` when provided in `crates/pi-natives/src/ast.rs`.\n- The wrapper never sets `maxReplacements`; native behavior therefore defaults to effectively unbounded replacements for a run.\n- Parse issues are rendered with at most `PARSE_ERRORS_LIMIT = 20` lines in `packages/coding-agent/src/tools/render-utils.ts`; `details.parseErrors` is deduplicated but not capped.\n- Directory scans use `include_hidden: true`, `use_gitignore: true`, and skip `node_modules` unless the glob text explicitly mentions `node_modules` in `crates/pi-natives/src/ast.rs`.\n- No separate glob-expansion count cap exists. Candidate count is whatever the resolved path/glob expands to after gitignore filtering, then native `maxFiles` stops mutations after the configured number of touched files.\n- Preview text truncates each rendered `before` and `after` first line to 120 characters in `packages/coding-agent/src/tools/ast-edit.ts`.\n\n## Errors\n- TS wrapper throws `ToolError` for empty patterns, duplicate rewrite patterns, empty path entries, unsupported internal-URL globs, internal URLs without `sourcePath`, and missing paths.\n- Native code returns hard errors for:\n - inability to infer one language across all candidates when `lang` is absent,\n - unsupported explicit `lang`,\n - bad glob compilation or unreadable search roots,\n - overlapping computed edits (`Overlapping replacements detected; refine pattern to avoid ambiguous edits`),\n - out-of-bounds edit ranges or non-UTF-8 replacement text,\n - write failures during apply,\n - cancellation or timeout.\n- With `failOnParseError: false` (the wrapper always uses this), pattern compile failures and file parse failures become `parseErrors` instead of aborting the whole run.\n- If every rewrite pattern fails to compile, native `ast_edit` returns a successful zero-replacement result with `parseErrors` populated.\n- Files containing tree-sitter error nodes are skipped for rewriting; they do not get partial edits.\n- Apply can fail after a successful preview if the preview becomes stale. The resolve callback compares replacement totals and per-file counts and returns an error result rather than applying a mismatched preview silently.\n\n## Notes\n- `ast_edit` does not expose the native `lang`, `strictness`, `selector`, `maxReplacements`, `failOnParseError`, or `timeoutMs` fields to the model. The runtime fixes the call shape to a preview-first, smart-strictness, best-effort parse mode.\n- Because the wrapper does not expose `lang`, mixed-language rewrites only succeed when every candidate infers to the same canonical language. This is stricter than `ast_grep`.\n- Idempotency is not enforced syntactically. A rewrite like `foo($A) -> foo($A)` previews zero changes because output equals input; a rewrite that keeps matching its own output may still produce replacements on repeated calls.\n- Rewrites are accumulated per file, then applied from the end of the file backward after an overlap check. Independent matches can coexist; overlapping matches abort the run.\n- Native rewrite rule order is by pattern-string sort, not by the original `ops` array order, because `normalize_rewrite_map(...)` sorts the `(pattern, rewrite)` pairs.\n- Preview/apply parity is validated only by totals and per-file counts, not by a byte-for-byte diff of every replacement payload.",
|
|
63
63
|
"tools/ast-grep.md": "# ast_grep\n\n> Structural code search over supported source files via native ast-grep.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/ast-grep.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/ast-grep.md`\n- Key collaborators:\n - `crates/pi-natives/src/ast.rs` — native scan, parse, match engine\n - `crates/pi-natives/src/language/mod.rs` — language aliases and extension inference\n - `packages/coding-agent/src/tools/path-utils.ts` — path/glob parsing and multi-path resolution\n - `packages/coding-agent/src/tools/render-utils.ts` — parse-error dedupe and display caps\n - `packages/coding-agent/src/tools/match-line-format.ts` — anchor-prefixed match rendering\n - `packages/coding-agent/src/utils/file-display-mode.ts` — hashline vs line-number output mode\n - `packages/natives/native/index.d.ts` — JS-visible native binding contract\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `pat` | `string` | Yes | Single AST pattern. The wrapper trims it and rejects empty strings. |\n| `paths` | `string[]` | Yes | One or more files, directories, globs, or internal URLs with backing files. Empty entries are rejected. Globs are forbidden for internal URLs. |\n| `skip` | `number` | No | Match offset. Defaults to `0`, then `Math.floor(...)`; negatives and non-finite values fail. |\n\nPattern grammar and language support exposed to the model:\n- `$NAME` — capture one AST node.\n- `$_` — match one AST node without binding.\n- `$$$NAME` — capture zero or more AST nodes; ast-grep stops lazily at the next satisfiable node.\n- `$$$` — match zero or more AST nodes without binding.\n- Metavariable names must be uppercase and must stand for whole AST nodes, not partial tokens or string fragments.\n- Reusing the same metavariable requires identical code at each occurrence.\n- Patterns must parse as one valid AST node for the inferred target language.\n- Supported canonical languages come from `SupportLang::all_langs()` in `crates/pi-natives/src/language/mod.rs`: `astro`, `bash`, `c`, `cmake`, `cpp`, `csharp`, `dart`, `clojure`, `css`, `diff`, `dockerfile`, `elixir`, `erlang`, `go`, `graphql`, `haskell`, `hcl`, `html`, `ini`, `java`, `javascript`, `json`, `just`, `julia`, `kotlin`, `lua`, `make`, `markdown`, `nix`, `objc`, `ocaml`, `odin`, `perl`, `php`, `powershell`, `protobuf`, `python`, `r`, `regex`, `ruby`, `rust`, `scala`, `solidity`, `sql`, `starlark`, `svelte`, `swift`, `toml`, `tlaplus`, `tsx`, `typescript`, `verilog`, `vue`, `xml`, `yaml`, `zig`.\n\n## Outputs\n- Single-shot tool result.\n- Model-facing `content` is one text block:\n - grouped by file for directory/multi-file searches,\n - match lines rendered as `*LINE+HASH|text` in hashline mode or `*LINE|text` otherwise,\n - continuation lines for multi-line matches rendered with a leading space,\n - optional `meta: NAME=value` lines when ast-grep captured metavariables.\n- If no matches are found, text is `No matches found` or `No matches found. Parse issues mean the query may be mis-scoped; narrow paths before concluding absence.` plus formatted parse issues.\n- If the wrapper truncates visible results, the text ends with `Result limit reached; narrow paths or increase limit.`\n- `details` includes counts and metadata, not full match payloads:\n - `matchCount`, `fileCount`, `filesSearched`, `limitReached`\n - optional `parseErrors`, `scopePath`, `files`, `fileMatches`, `displayContent`, `meta`\n- Native ranges (`byteStart`, `byteEnd`, `startLine`, `startColumn`, `endLine`, `endColumn`) exist only inside the native result; the wrapper does not emit them directly to the model.\n\n## Flow\n1. `AstGrepTool.execute()` validates `pat`, normalizes `skip`, and normalizes each `paths` entry in `packages/coding-agent/src/tools/ast-grep.ts`.\n2. Internal URLs are resolved through `session.internalRouter`; entries without `sourcePath` fail, and internal-URL globs fail early.\n3. For multiple path inputs, `partitionExistingPaths()` drops missing bases only when at least one surviving base remains; if all bases are missing the call fails.\n4. `parseSearchPath()` splits a single path into `basePath` plus optional `glob`. `resolveExplicitSearchPaths()` collapses multiple inputs into a common base plus a brace-union glob, or separate `targets` when the only common base is a filesystem root.\n5. The wrapper stats the resolved base path to decide whether output should be grouped as a directory result.\n6. Execution dispatches to either:\n - one native `astGrep(...)` call for a single resolved base, or\n - `runMultiTargetAstGrep(...)`, which calls the native binding once per target, rebases paths back to the common root, sorts globally, then applies `skip` and the wrapper limit.\n7. Native `ast_grep` in `crates/pi-natives/src/ast.rs`:\n - normalizes and deduplicates patterns,\n - resolves a `MatchStrictness` (`smart` by default),\n - collects candidate files from a file or gitignore-aware directory scan,\n - infers language per candidate from extension unless `lang` was provided,\n - compiles the pattern separately for each language present,\n - reads each file, reports syntax-error trees as parse issues, runs `find_all`, and optionally captures metavariable bindings.\n8. Native results are sorted by path and source position, then paged by `offset`/`limit`.\n9. The TS wrapper normalizes parse-error strings, deduplicates them, groups matches by formatted path, renders anchor lines, appends limit/parse notices, and returns `toolResult(...).text(...).done()`.\n\n## Modes / Variants\n- Single file: native path is the file; output is a flat list of rendered match lines.\n- Directory + optional glob: native scan walks the directory, then filters by compiled glob.\n- Multiple explicit paths/globs: wrapper unions them into one synthetic scope or runs per-target native calls when paths only meet at root.\n- Internal URL inputs: only supported when the router can resolve them to a backing file path.\n- Hashline output mode vs plain line-number mode: controlled by `resolveFileDisplayMode()`; hashline mode requires the edit tool and non-raw, mutable sources.\n\n## Side Effects\n- Filesystem\n - Stats input paths in the TS wrapper.\n - Native code reads matched files and scans directories through `fs_cache`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - None beyond normal tool transcript/result metadata.\n- Background work / cancellation\n - Native work runs on a blocking worker via `task::blocking(...)`.\n - Cancellation and optional native timeout are cooperative through `CancelToken::heartbeat()`.\n\n## Limits & Caps\n- Wrapper-visible result cap: `DEFAULT_AST_LIMIT = 50` in `packages/coding-agent/src/tools/ast-grep.ts`.\n - Single-target calls rely on the native default limit of 50 in `crates/pi-natives/src/ast.rs`.\n - Multi-target calls fetch `skip + 50 + 1` matches per target, then re-page after global sort.\n- Native `limit` is clamped to at least `1`; omitted `offset` defaults to `0` in `crates/pi-natives/src/ast.rs`.\n- Parse issues are rendered with at most `PARSE_ERRORS_LIMIT = 20` lines in `packages/coding-agent/src/tools/render-utils.ts`; `details.parseErrors` itself is only deduplicated, not capped.\n- Directory scans use `include_hidden: true`, `use_gitignore: true`, and skip `node_modules` unless the glob text explicitly mentions `node_modules` in `crates/pi-natives/src/ast.rs`.\n- No hard file-count cap is applied by the wrapper or native `ast_grep`; candidate count is whatever the resolved path/glob expands to after gitignore filtering.\n- Multi-path union deduplicates identical path inputs before resolution in `resolveExplicitSearchPaths()`.\n\n## Errors\n- TS wrapper throws `ToolError` for empty patterns, invalid `skip`, empty path entries, unsupported internal-URL globs, internal URLs without `sourcePath`, and missing paths.\n- Native code returns hard errors for:\n - unsupported explicit `lang`,\n - inability to infer language for a candidate when `lang` is not supplied,\n - invalid AST pattern compilation for every relevant language,\n - unreadable search roots or bad glob compilation,\n - cancellation (`Aborted: Signal`) or timeout (`Aborted: Timeout`).\n- File-level parse failures and many per-language pattern compile failures are non-fatal: they are accumulated in `parseErrors` and surfaced alongside successful matches.\n- `no matches` is not an error, even when parse issues were recorded.\n\n## Notes\n- `pat` is always wrapped into a one-element `patterns` array by the TS tool; the model cannot send multiple patterns through `ast_grep` even though the native binding supports it.\n- `ast_grep` can search mixed-language trees because native compilation happens per discovered language, but the prompt still tells the model to keep calls single-language when possible to reduce parse noise.\n- Pattern compilation is per language present in the candidate set. One pattern can succeed for some languages and generate per-file parse errors for others in the same run.\n- A file with tree-sitter error nodes still gets searched; the syntax warning is additive, not a skip condition.\n- For glob semantics, `*.ts` matches only direct children while `**/*.ts` recurses; this is covered by native tests in `crates/pi-natives/src/ast.rs`.\n- Output anchors are intended for follow-up tools, but the exact anchor format depends on session edit mode (`hashline` vs line-number mode).",
|
|
64
|
-
"tools/bash.md": "# bash\n\n> Execute a shell command in the session workspace, with optional PTY or background-job handling.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/bash.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/bash.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/bash-interactive.ts` — PTY/TUI execution path.\n - `packages/coding-agent/src/tools/bash-interceptor.ts` — blocks tool-better shell patterns.\n - `packages/coding-agent/src/tools/bash-skill-urls.ts` — expands internal URLs to paths.\n - `packages/coding-agent/src/exec/bash-executor.ts` — non-PTY shell execution.\n - `packages/coding-agent/src/session/streaming-output.ts` — tail buffer, truncation, artifact spill.\n - `packages/coding-agent/src/tools/tool-timeouts.ts` — timeout clamp bounds.\n - `packages/coding-agent/src/config/settings-schema.ts` — default interceptor rules.\n - `docs/bash-tool-runtime.md` — deeper executor/runtime notes; use as the companion doc for shell-session internals.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `command` | `string` | Yes | Shell command text to execute. A leading `cd <path> && ...` is rewritten into `cwd` only when `cwd` was omitted. |\n| `env` | `Record<string, string>` | No | Extra environment variables. Keys must match `^[A-Za-z_][A-Za-z0-9_]*$` or the tool throws. Values also go through internal-URL expansion. |\n| `timeout` | `number` | No | Timeout in seconds. Default `300`; clamped to `1..3600` by `clampTimeout(\"bash\", ...)`. |\n| `cwd` | `string` | No | Working directory, resolved against `session.cwd` via `resolveToCwd`. Must exist and be a directory. |\n| `pty` | `boolean` | No | Request PTY mode. Default `false`. PTY is used only when `pty: true`, `PI_NO_PTY !== \"1\"`, and the tool context has a UI. |\n| `async` | `boolean` | No | Background execution request. Present only when `async.enabled` is true for the session. Returns immediately with a job id instead of waiting. |\n\n## Outputs\nThe tool returns a single `text` content block plus optional `details`.\n\n- Success, foreground:\n - `content[0].text`: command output, or `(no output)` when the command produced nothing.\n - `details.timeoutSeconds`: effective timeout after clamping.\n - `details.requestedTimeoutSeconds`: only present when the requested timeout was clamped.\n - `details.meta.truncation`: present when output was truncated in memory; includes `artifactId` when full output spilled to an artifact.\n- Success, background start (`async: true` or auto-background):\n - `content[0].text`: optional preview tail, timeout notice if any, then `Background job <id> started: <label>` with follow-up instructions.\n - `details.async`: `{ state: \"running\", jobId, type: \"bash\" }`.\n- Background progress / completion:\n - delivered through `onUpdate` / async job manager, not the initial return.\n - running updates contain tail text and `details.async.state: \"running\"` only after the job is considered backgrounded.\n - completion/failure updates carry final text and `details.async.state: \"completed\" | \"failed\"`.\n- Failure:\n - the tool throws `ToolError` / `ToolAbortError`; non-zero exits are surfaced as errors, not success results.\n\nStdout and stderr are merged before the model sees them. Non-zero exit codes are appended to the thrown error text as `Command exited with code <n>`.\n\n## Flow\n1. `BashTool.execute()` in `packages/coding-agent/src/tools/bash.ts` reads `command`, normalizes `env`, and defaults `timeout` to `300`.\n2. If `cwd` is absent, it rewrites a leading `cd <path> && ...` into the structured `cwd` field and strips that prefix from `command`.\n3. If `async: true` is requested while `async.enabled` is off, it throws `ToolError` before any execution.\n4. If `bashInterceptor.enabled` is on, `checkBashInterception()` runs against both the original command and the `cd`-stripped command. A matching enabled rule throws before URL expansion or execution.\n5. `expandInternalUrls()` rewrites supported internal URLs inside `command`, each `env` value, and protocol-looking `cwd` values. Command/env replacements are shell-escaped unless `noEscape` is requested by the caller path.\n6. `resolveToCwd()` resolves `cwd` against `session.cwd`; `fs.stat()` verifies that the target exists and is a directory.\n7. `clampTimeout(\"bash\", requestedTimeoutSec)` enforces `TOOL_TIMEOUTS.bash` (`default: 300`, `min: 1`, `max: 3600`). When clamped, `#buildCompletedResult()` / `#buildBackgroundStartResult()` append a notice line.\n8. Execution path splits:\n 1. `async: true` -> `#startManagedBashJob()` registers a session async job and returns immediately.\n 2. Non-PTY with `bash.autoBackground.enabled` and an async job manager -> starts a managed job, waits up to `min(thresholdMs, timeoutMs - 1000)`, and either returns the completed result or converts the run into a background job.\n 3. Otherwise runs foreground execution.\n9. Foreground non-PTY calls `executeBash()` from `packages/coding-agent/src/exec/bash-executor.ts`.\n10. Foreground PTY calls `runInteractiveBashPty()` from `packages/coding-agent/src/tools/bash-interactive.ts`.\n11. Both paths allocate an output artifact first when `session.allocateOutputArtifact` is available. The artifact path/id are passed into the sink so large output can spill to disk.\n12. `executeBash()` loads shell settings, optional shell snapshot, and shell minimizer settings, then runs via a persistent native `Shell` session or one-shot `executeShell()`. `docs/bash-tool-runtime.md` covers that path in detail.\n13. `runInteractiveBashPty()` creates a `PtySession`, overlays an xterm-backed console UI, forwards user key input into the PTY, captures output through `OutputSink`, and kills the PTY on dismiss/dispose.\n14. On completion, `#buildCompletedResult()` formats `(no output)` when needed, attaches truncation metadata from the `OutputSink` summary, and re-checks exit status / timeout / cancellation before returning.\n15. On non-zero exit, timeout, missing exit status, or cancellation, `#buildResultText()` throws with the captured output included in the error message.\n\n## Modes / Variants\n1. Foreground non-PTY\n - Default path.\n - Uses `executeBash()`.\n - Streams tail-only updates through `streamTailUpdates()` and `TailBuffer(DEFAULT_MAX_BYTES)`.\n2. Foreground PTY\n - Requires `pty: true`, UI context, and `PI_NO_PTY !== \"1\"`.\n - Uses `runInteractiveBashPty()` and a `PtySession` overlay.\n - Supports interactive input; `Esc` kills the session from the overlay.\n3. Explicit background job\n - Requires `async: true` and `async.enabled`.\n - Registers a job with `session.asyncJobManager` and returns `{ state: \"running\", jobId }` immediately.\n4. Auto-backgrounded non-PTY job\n - Requires `bash.autoBackground.enabled`, no PTY, and an async job manager.\n - Starts like a foreground managed job, then backgrounds it when it outlives the wait window.\n5. Intercepted command\n - No subprocess created.\n - Returns a `ToolError` pointing the model at `read`, `search`, `find`, `edit`, or `write`.\n\n## Side Effects\n- Filesystem\n - Validates `cwd` with `fs.stat()`.\n - May allocate and write artifact files for full output (`bash`) and minimizer-preserved raw output (`bash-original`).\n - `expandInternalUrls(..., { ensureLocalParentDirs: true })` creates parent directories for `local://` paths before execution.\n- Subprocesses / native bindings\n - Non-PTY uses native shell execution via `@oh-my-pi/pi-natives` (`Shell.run()` or `executeShell()`).\n - PTY uses native `PtySession.start()`.\n- Session state\n - Reads session settings for async, auto-background, interceptor, tool availability, and shell configuration.\n - Registers jobs with `session.asyncJobManager` for explicit/auto background runs.\n - Uses `session.getSessionId()` to isolate shell reuse and async session keys.\n - Uses `session.allocateOutputArtifact()` for spill files.\n- User-visible prompts / interactive UI\n - PTY mode opens a TUI overlay titled `Console` and forwards input to the PTY.\n - Background start messages direct the agent to `job` and to read `jobs://<id>`.\n- Background work / cancellation\n - Async and auto-background jobs continue after the initial tool return.\n - Cancellation aborts the native run; PTY overlay dismissal also kills the PTY.\n\n## Limits & Caps\n- Default timeout: `300s` (`TOOL_TIMEOUTS.bash.default` in `packages/coding-agent/src/tools/tool-timeouts.ts`).\n- Timeout clamp: `1..3600s` (`TOOL_TIMEOUTS.bash.min/max`).\n- Auto-background default threshold: `60_000ms` (`DEFAULT_AUTO_BACKGROUND_THRESHOLD_MS` in `packages/coding-agent/src/tools/bash.ts`), further capped to `timeoutMs - 1000` by `#resolveAutoBackgroundWaitMs()`.\n- Hard kill grace beyond requested timeout in non-PTY executor: `5_000ms` (`HARD_TIMEOUT_GRACE_MS` in `packages/coding-agent/src/exec/bash-executor.ts`).\n- In-memory output tail cap: `50 * 1024` bytes (`DEFAULT_MAX_BYTES` in `packages/coding-agent/src/session/streaming-output.ts`). Once exceeded, the sink keeps only the tail window in memory.\n- Streaming callback throttle in `executeBash()`: `50ms` between `onChunk` calls when streaming is enabled.\n- TUI collapsed preview: `10` visual lines (`BASH_DEFAULT_PREVIEW_LINES`) when rendered inline in the agent UI; this is a renderer cap, not a tool output cap.\n\n## Errors\n- Input validation:\n - invalid env key -> `ToolError(\"Invalid bash env name: <key>\")`.\n - async requested while disabled -> `ToolError(\"Async bash execution is disabled...\")`.\n - missing async job manager -> `ToolError(\"Async job manager unavailable for this session.\")`.\n - missing/bad `cwd` -> `ToolError(\"Working directory does not exist: ...\")` or `ToolError(\"Working directory is not a directory: ...\")`.\n- Interceptor:\n - matched command -> `ToolError` with `Blocked: <rule.message>` and the original command.\n - invalid interceptor regexes are silently skipped by `compileRules()`.\n- Internal URL expansion:\n - unsupported scheme, unknown skill, path traversal, missing router support, or router resolution failures all throw `ToolError` from `packages/coding-agent/src/tools/bash-skill-urls.ts`.\n- Execution:\n - non-zero exit -> thrown `ToolError` containing captured output plus `Command exited with code <n>`.\n - missing exit code -> thrown `ToolError` with `Command failed: missing exit status`.\n - timeout -> thrown `ToolError`; PTY uses `Command timed out after <n> seconds`, non-PTY executor returns cancelled output that `BashTool` converts to an error.\n - user abort -> `ToolAbortError` when the caller signal is aborted.\n- Artifact allocation / artifact save failures are swallowed in `saveBashOriginalArtifact()` and `OutputSink.#createFileSink()`; execution continues without that artifact.\n\n## Notes\n- `strict = true` and `concurrency = \"exclusive\"` are set on `BashTool`; the tool does not run concurrently with another bash tool call in the same session.\n- `command` and `env` URL expansions shell-escape replacements; `cwd` expansion uses `noEscape: true` because it becomes a filesystem path argument, not shell text.\n- `checkBashInterception()` blocks only when the matching rule's `tool` name is present in `ctx.toolNames`; missing tools disable their corresponding rule.\n- Default interceptor rules come from `DEFAULT_BASH_INTERCEPTOR_RULES` in `packages/coding-agent/src/config/settings-schema.ts`:\n - `cat|head|tail|less|more` -> `read`\n - `grep|rg|ripgrep|ag|ack` -> `search`\n - `find|fd|locate` with name/type/glob flags -> `find`\n - `sed -i`, `perl -i`, `awk -i inplace` -> `edit`\n - `echo|printf|cat <<` with redirection -> `write`\n- PTY mode is ignored in non-UI contexts and when `PI_NO_PTY=1`; the tool silently falls back to non-PTY execution.\n- Non-PTY runs merge `NON_INTERACTIVE_ENV` with `env`; PTY runs also prepend `NON_INTERACTIVE_ENV` before custom env values.\n- When the shell minimizer rewrites output inside `executeBash()`, the visible output is replaced with minimized text and a `[raw output: artifact://<id>]` footer may be appended if `onMinimizedSave` persisted the original text.\n- The TUI renderer parses partial JSON to recover `env` assignments early in streaming previews; that behavior is display-only.\n- For executor internals that are not tool-specific — shell session reuse keys, snapshots, prefix handling, and native timeout behavior — see `docs/bash-tool-runtime.md`.\n",
|
|
64
|
+
"tools/bash.md": "# bash\n\n> Execute a shell command in the session workspace, with optional PTY or background-job handling.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/bash.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/bash.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/bash-interactive.ts` — PTY/TUI execution path.\n - `packages/coding-agent/src/tools/bash-interceptor.ts` — blocks tool-better shell patterns.\n - `packages/coding-agent/src/tools/bash-skill-urls.ts` — expands internal URLs to paths.\n - `packages/coding-agent/src/exec/bash-executor.ts` — non-PTY shell execution.\n - `packages/coding-agent/src/session/streaming-output.ts` — tail buffer, truncation, artifact spill.\n - `packages/coding-agent/src/tools/tool-timeouts.ts` — timeout clamp bounds.\n - `packages/coding-agent/src/config/settings-schema.ts` — default interceptor rules.\n - `docs/bash-tool-runtime.md` — deeper executor/runtime notes; use as the companion doc for shell-session internals.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `command` | `string` | Yes | Shell command text to execute. A leading `cd <path> && ...` is rewritten into `cwd` only when `cwd` was omitted. |\n| `env` | `Record<string, string>` | No | Extra environment variables. Keys must match `^[A-Za-z_][A-Za-z0-9_]*$` or the tool throws. Values also go through internal-URL expansion. |\n| `timeout` | `number` | No | Timeout in seconds. Default `300`; clamped to `1..3600` by `clampTimeout(\"bash\", ...)`. |\n| `cwd` | `string` | No | Working directory, resolved against `session.cwd` via `resolveToCwd`. Must exist and be a directory. |\n| `pty` | `boolean` | No | Request PTY mode. Default `false`. PTY is used only when `pty: true`, `PI_NO_PTY !== \"1\"`, and the tool context has a UI. |\n| `async` | `boolean` | No | Background execution request. Present only when `async.enabled` is true for the session. Returns immediately with a job id instead of waiting. |\n\n## Outputs\nThe tool returns a single `text` content block plus optional `details`.\n\n- Success, foreground:\n - `content[0].text`: command output, or `(no output)` when the command produced nothing.\n - `details.timeoutSeconds`: effective timeout after clamping.\n - `details.requestedTimeoutSeconds`: only present when the requested timeout was clamped.\n - `details.meta.truncation`: present when output was truncated in memory; includes `artifactId` when full output spilled to an artifact.\n- Success, background start (`async: true` or auto-background):\n - `content[0].text`: optional preview tail, timeout notice if any, then `Background job <id> started: <label>` with follow-up instructions.\n - `details.async`: `{ state: \"running\", jobId, type: \"bash\" }`.\n- Background progress / completion:\n - delivered through `onUpdate` / async job manager, not the initial return.\n - running updates contain tail text and `details.async.state: \"running\"` only after the job is considered backgrounded.\n - completion/failure updates carry final text and `details.async.state: \"completed\" | \"failed\"`.\n- Failure:\n - the tool throws `ToolError` / `ToolAbortError`; non-zero exits are surfaced as errors, not success results.\n\nStdout and stderr are merged before the model sees them. Non-zero exit codes are appended to the thrown error text as `Command exited with code <n>`.\n\n## Flow\n1. `BashTool.execute()` in `packages/coding-agent/src/tools/bash.ts` reads `command`, normalizes `env`, and defaults `timeout` to `300`.\n2. If `cwd` is absent, it rewrites a leading `cd <path> && ...` into the structured `cwd` field and strips that prefix from `command`.\n3. If `async: true` is requested while `async.enabled` is off, it throws `ToolError` before any execution.\n4. If `bashInterceptor.enabled` is on, `checkBashInterception()` runs against both the original command and the `cd`-stripped command. A matching enabled rule throws before URL expansion or execution.\n5. `expandInternalUrls()` rewrites supported internal URLs inside `command`, each `env` value, and protocol-looking `cwd` values. Command/env replacements are shell-escaped unless `noEscape` is requested by the caller path.\n6. `resolveToCwd()` resolves `cwd` against `session.cwd`; `fs.stat()` verifies that the target exists and is a directory.\n7. `clampTimeout(\"bash\", requestedTimeoutSec)` enforces `TOOL_TIMEOUTS.bash` (`default: 300`, `min: 1`, `max: 3600`). When clamped, `#buildCompletedResult()` / `#buildBackgroundStartResult()` append a notice line.\n8. Execution path splits:\n 1. `async: true` -> `#startManagedBashJob()` registers a session async job and returns immediately.\n 2. Non-PTY with `bash.autoBackground.enabled` and an async job manager -> starts a managed job, waits up to `min(thresholdMs, timeoutMs - 1000)`, and either returns the completed result or converts the run into a background job.\n 3. Otherwise runs foreground execution.\n9. Foreground non-PTY calls `executeBash()` from `packages/coding-agent/src/exec/bash-executor.ts`.\n10. Foreground PTY calls `runInteractiveBashPty()` from `packages/coding-agent/src/tools/bash-interactive.ts`.\n11. Both paths allocate an output artifact first when `session.allocateOutputArtifact` is available. The artifact path/id are passed into the sink so large output can spill to disk.\n12. `executeBash()` loads shell settings, optional shell snapshot, and shell minimizer settings, then runs via a persistent native `Shell` session or one-shot `executeShell()`. `docs/bash-tool-runtime.md` covers that path in detail.\n13. `runInteractiveBashPty()` creates a `PtySession`, overlays an xterm-backed console UI, forwards user key input into the PTY, captures output through `OutputSink`, and kills the PTY on dismiss/dispose.\n14. On completion, `#buildCompletedResult()` formats `(no output)` when needed, attaches truncation metadata from the `OutputSink` summary, and re-checks exit status / timeout / cancellation before returning.\n15. On non-zero exit, timeout, missing exit status, or cancellation, `#buildResultText()` throws with the captured output included in the error message.\n\n## Modes / Variants\n1. Foreground non-PTY\n - Default path.\n - Uses `executeBash()`.\n - Streams tail-only updates through `streamTailUpdates()` and `TailBuffer(DEFAULT_MAX_BYTES)`.\n2. Foreground PTY\n - Requires `pty: true`, UI context, and `PI_NO_PTY !== \"1\"`.\n - Uses `runInteractiveBashPty()` and a `PtySession` overlay.\n - Supports interactive input; `Esc` kills the session from the overlay.\n3. Explicit background job\n - Requires `async: true` and `async.enabled`.\n - Registers a job with `session.asyncJobManager` and returns `{ state: \"running\", jobId }` immediately.\n4. Auto-backgrounded non-PTY job\n - Requires `bash.autoBackground.enabled`, no PTY, and an async job manager.\n - Starts like a foreground managed job, then backgrounds it when it outlives the wait window.\n5. Intercepted command\n - No subprocess created.\n - Returns a `ToolError` pointing the model at `read`, `search`, `find`, `edit`, or `write`.\n\n## Side Effects\n- Filesystem\n - Validates `cwd` with `fs.stat()`.\n - May allocate and write artifact files for full output (`bash`) and minimizer-preserved raw output (`bash-original`).\n - `expandInternalUrls(..., { ensureLocalParentDirs: true })` creates parent directories for `local://` paths before execution.\n- Subprocesses / native bindings\n - Non-PTY uses native shell execution via `@oh-my-pi/pi-natives` (`Shell.run()` or `executeShell()`).\n - PTY uses native `PtySession.start()`.\n- Session state\n - Reads session settings for async, auto-background, interceptor, tool availability, and shell configuration.\n - Registers jobs with `session.asyncJobManager` for explicit/auto background runs.\n - Uses `session.getSessionId()` to isolate shell reuse and async session keys.\n - Uses `session.allocateOutputArtifact()` for spill files.\n- User-visible prompts / interactive UI\n - PTY mode opens a TUI overlay titled `Console` and forwards input to the PTY.\n - Background start messages direct the agent to the `job` tool (use `list: true` for a snapshot, or pass `poll: [id]` to wait).\n- Background work / cancellation\n - Async and auto-background jobs continue after the initial tool return.\n - Cancellation aborts the native run; PTY overlay dismissal also kills the PTY.\n\n## Limits & Caps\n- Default timeout: `300s` (`TOOL_TIMEOUTS.bash.default` in `packages/coding-agent/src/tools/tool-timeouts.ts`).\n- Timeout clamp: `1..3600s` (`TOOL_TIMEOUTS.bash.min/max`).\n- Auto-background default threshold: `60_000ms` (`DEFAULT_AUTO_BACKGROUND_THRESHOLD_MS` in `packages/coding-agent/src/tools/bash.ts`), further capped to `timeoutMs - 1000` by `#resolveAutoBackgroundWaitMs()`.\n- Hard kill grace beyond requested timeout in non-PTY executor: `5_000ms` (`HARD_TIMEOUT_GRACE_MS` in `packages/coding-agent/src/exec/bash-executor.ts`).\n- In-memory output tail cap: `50 * 1024` bytes (`DEFAULT_MAX_BYTES` in `packages/coding-agent/src/session/streaming-output.ts`). Once exceeded, the sink keeps only the tail window in memory.\n- Streaming callback throttle in `executeBash()`: `50ms` between `onChunk` calls when streaming is enabled.\n- TUI collapsed preview: `10` visual lines (`BASH_DEFAULT_PREVIEW_LINES`) when rendered inline in the agent UI; this is a renderer cap, not a tool output cap.\n\n## Errors\n- Input validation:\n - invalid env key -> `ToolError(\"Invalid bash env name: <key>\")`.\n - async requested while disabled -> `ToolError(\"Async bash execution is disabled...\")`.\n - missing async job manager -> `ToolError(\"Async job manager unavailable for this session.\")`.\n - missing/bad `cwd` -> `ToolError(\"Working directory does not exist: ...\")` or `ToolError(\"Working directory is not a directory: ...\")`.\n- Interceptor:\n - matched command -> `ToolError` with `Blocked: <rule.message>` and the original command.\n - invalid interceptor regexes are silently skipped by `compileRules()`.\n- Internal URL expansion:\n - unsupported scheme, unknown skill, path traversal, missing router support, or router resolution failures all throw `ToolError` from `packages/coding-agent/src/tools/bash-skill-urls.ts`.\n- Execution:\n - non-zero exit -> thrown `ToolError` containing captured output plus `Command exited with code <n>`.\n - missing exit code -> thrown `ToolError` with `Command failed: missing exit status`.\n - timeout -> thrown `ToolError`; PTY uses `Command timed out after <n> seconds`, non-PTY executor returns cancelled output that `BashTool` converts to an error.\n - user abort -> `ToolAbortError` when the caller signal is aborted.\n- Artifact allocation / artifact save failures are swallowed in `saveBashOriginalArtifact()` and `OutputSink.#createFileSink()`; execution continues without that artifact.\n\n## Notes\n- `strict = true` and `concurrency = \"exclusive\"` are set on `BashTool`; the tool does not run concurrently with another bash tool call in the same session.\n- `command` and `env` URL expansions shell-escape replacements; `cwd` expansion uses `noEscape: true` because it becomes a filesystem path argument, not shell text.\n- `checkBashInterception()` blocks only when the matching rule's `tool` name is present in `ctx.toolNames`; missing tools disable their corresponding rule.\n- Default interceptor rules come from `DEFAULT_BASH_INTERCEPTOR_RULES` in `packages/coding-agent/src/config/settings-schema.ts`:\n - `cat|head|tail|less|more` -> `read`\n - `grep|rg|ripgrep|ag|ack` -> `search`\n - `find|fd|locate` with name/type/glob flags -> `find`\n - `sed -i`, `perl -i`, `awk -i inplace` -> `edit`\n - `echo|printf|cat <<` with redirection -> `write`\n- PTY mode is ignored in non-UI contexts and when `PI_NO_PTY=1`; the tool silently falls back to non-PTY execution.\n- Non-PTY runs merge `NON_INTERACTIVE_ENV` with `env`; PTY runs also prepend `NON_INTERACTIVE_ENV` before custom env values.\n- When the shell minimizer rewrites output inside `executeBash()`, the visible output is replaced with minimized text and a `[raw output: artifact://<id>]` footer may be appended if `onMinimizedSave` persisted the original text.\n- The TUI renderer parses partial JSON to recover `env` assignments early in streaming previews; that behavior is display-only.\n- For executor internals that are not tool-specific — shell session reuse keys, snapshots, prefix handling, and native timeout behavior — see `docs/bash-tool-runtime.md`.\n",
|
|
65
65
|
"tools/browser.md": "# browser\n\n> Open, reuse, close, and script Puppeteer tabs against headless Chromium or CDP-attached apps.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/browser.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/browser.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/browser/tab-supervisor.ts` — global tab registry; worker lifecycle; run/close coordination.\n - `packages/coding-agent/src/tools/browser/tab-worker.ts` — executes `run` code; implements the `tab` helper API.\n - `packages/coding-agent/src/tools/browser/tab-worker-entry.ts` — worker-thread transport bootstrap.\n - `packages/coding-agent/src/tools/browser/registry.ts` — browser-handle registry keyed by browser kind.\n - `packages/coding-agent/src/tools/browser/launch.ts` — Puppeteer loading, Chromium resolution/download, headless launch, stealth injection.\n - `packages/coding-agent/src/tools/browser/attach.ts` — CDP attach/reuse, target picking, spawned-app process handling.\n - `packages/coding-agent/src/tools/browser/tab-protocol.ts` — worker init/run/result message schema.\n - `packages/coding-agent/src/tools/browser/readable.ts` — `tab.extract()` readability extraction.\n - `packages/coding-agent/src/tools/browser/render.ts` — TUI rendering for `open`/`close` status lines and `run` JS cells.\n - `packages/coding-agent/src/tools/puppeteer/00_stealth_tampering.txt` — mask patched functions/descriptors as native.\n - `packages/coding-agent/src/tools/puppeteer/01_stealth_activity.txt` — synthesize visibility/focus/scroll activity.\n - `packages/coding-agent/src/tools/puppeteer/02_stealth_hairline.txt` — fix Modernizr hairline detection.\n - `packages/coding-agent/src/tools/puppeteer/03_stealth_botd.txt` — spoof `navigator.webdriver`, `window.chrome`, and Chrome fingerprint surfaces.\n - `packages/coding-agent/src/tools/puppeteer/04_stealth_iframe.txt` — patch iframe `contentWindow`/`srcdoc` behavior.\n - `packages/coding-agent/src/tools/puppeteer/05_stealth_webgl.txt` — spoof WebGL vendor/renderer/precision.\n - `packages/coding-agent/src/tools/puppeteer/06_stealth_screen.txt` — normalize screen/viewport/device-pixel-ratio values.\n - `packages/coding-agent/src/tools/puppeteer/07_stealth_fonts.txt` — spoof local fonts and perturb canvas text rendering.\n - `packages/coding-agent/src/tools/puppeteer/08_stealth_audio.txt` — spoof audio latency/sample-rate and perturb offline rendering.\n - `packages/coding-agent/src/tools/puppeteer/09_stealth_locale.txt` — force locale/languages/timezone/date strings.\n - `packages/coding-agent/src/tools/puppeteer/10_stealth_plugins.txt` — synthesize `navigator.plugins`/`navigator.mimeTypes`.\n - `packages/coding-agent/src/tools/puppeteer/11_stealth_hardware.txt` — spoof `navigator.hardwareConcurrency`.\n - `packages/coding-agent/src/tools/puppeteer/12_stealth_codecs.txt` — spoof media codec support.\n - `packages/coding-agent/src/tools/puppeteer/13_stealth_worker.txt` — carry UA/platform spoofing into `Worker`/`SharedWorker`.\n\n## Inputs\n\n### Shared fields\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `action` | `\"open\" \\| \"close\" \\| \"run\"` | Yes | Dispatches to the open/close/run path. |\n| `name` | `string` | No | Tab id. Defaults to `\"main\"`. Tabs live in a process-global map, so the same name is reused across later calls and in-process subagents until closed. |\n| `timeout` | `number` | No | Tool wall-clock timeout in seconds. Defaults to `30`; clamped to the browser tool range before execution. |\n\n### `action: \"open\"`\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `url` | `string` | No | Navigate after the tab is ready. Existing reusable tabs also navigate when `url` is supplied. |\n| `viewport` | `{ width: number; height: number; scale?: number }` | No | Requested viewport. For headless launch this becomes the initial viewport; for a page it is applied with `page.setViewport()`. `scale` maps to Puppeteer `deviceScaleFactor`. |\n| `wait_until` | `\"load\" \\| \"domcontentloaded\" \\| \"networkidle0\" \\| \"networkidle2\"` | No | Navigation wait condition. Defaults to `\"networkidle2\"` where omitted. |\n| `dialogs` | `\"accept\" \\| \"dismiss\"` | No | Installs a page `dialog` handler that auto-accepts or auto-dismisses dialogs. Omitted means no handler. |\n| `app` | `{ path?: string; cdp_url?: string; args?: string[]; target?: string }` | No | Selects browser kind. No `app` uses the session `browser.headless` setting. `app.path` is resolved against the session cwd and used as the executable path for spawn/attach reuse. `app.cdp_url` connects to an existing CDP endpoint. `args` are appended only when spawning `app.path`. `target` is only used for attached/spawned-app page selection. |\n\n### `action: \"close\"`\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `all` | `boolean` | No | Close every known tab. Omitted closes only `name`. |\n| `kill` | `boolean` | No | When a tab release drops a spawned-app browser handle to refcount 0, also terminate its process tree. Has no effect on headless shutdown and only disconnects connected CDP browsers. |\n\n### `action: \"run\"`\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `code` | `string` | Yes | Async-function body executed in a VM context with `page`, `browser`, `tab`, `display`, `assert`, `wait`, `console`, timers, `URL`, `TextEncoder`, `TextDecoder`, and `Buffer` in scope. |\n\n## Outputs\nThe tool returns one result per call; no streaming partial output is emitted from the browser implementation itself.\n\n- `open`: text content with `Opened` or `Reused`, browser description, URL, and optional title. `details` includes `action`, `name`, `browser`, `url`, `viewport`, and the same text in `details.result`.\n- `close`: text content with either `Closed ...` or `No tab named ...`. `details` includes `action`, `name`, and `details.result`.\n- `run`: ordered `content` array built as:\n 1. every `display(value)` call in execution order,\n 2. final return value, JSON-stringified unless already a string,\n 3. or `Ran code on tab \"...\"` if nothing else was produced.\n- `display(value)` coercion in `packages/coding-agent/src/tools/browser/tab-worker.ts`:\n - `{ type: \"image\", data: string, mimeType: string }` becomes image content,\n - `string` becomes text content,\n - other values become pretty JSON text when serializable, else `String(value)`.\n- `tab.screenshot()` also appends text plus an image content item unless `silent: true`; `details.screenshots` records persisted screenshot metadata `{ dest, mimeType, bytes, width, height }`.\n- `run` `details` includes `action`, `name`, current `browser`/`url` when the tab exists, optional `screenshots`, and `details.result` containing only the concatenated text outputs.\n\n## Flow\n1. `BrowserTool.execute()` (`packages/coding-agent/src/tools/browser.ts`) abort-checks, clamps `timeout` via `clampTimeout(\"browser\", ...)`, defaults `name` to `\"main\"`, and dispatches on `action`.\n2. `open` resolves browser kind with `resolveBrowserKind()`:\n - `app.cdp_url` → `{ kind: \"connected\" }` after trimming trailing slashes.\n - `app.path` → `{ kind: \"spawned\" }` after resolving against session cwd.\n - otherwise → `{ kind: \"headless\", headless: session.settings.get(\"browser.headless\") }`.\n3. `open` rejects reusing the same tab name across different browser kinds (`sameBrowserKind()`); callers must close first.\n4. `open` acquires a browser handle through `acquireBrowser()` (`packages/coding-agent/src/tools/browser/registry.ts`):\n - existing connected handle is reused by browser-kind key;\n - stale disconnected handles are disposed and recreated;\n - headless launches via `launchHeadlessBrowser()`;\n - `connected` waits for `${cdpUrl}/json/version`, then `puppeteer.connect()`;\n - `spawned` first tries `findReusableCdp()`, else kills same-path processes, allocates a free loopback port, spawns the executable with `--remote-debugging-port=<port>`, waits for CDP, then connects.\n5. `open` acquires a tab through `acquireTab()` (`packages/coding-agent/src/tools/browser/tab-supervisor.ts`):\n - same-name + same-browser + alive tab is reused unless `dialogs` changed;\n - same-name but different browser handle, dead state, or changed dialog policy forces release and recreation;\n - reusing with a new `url` navigates by issuing `await tab.goto(...)` through the worker.\n6. New tabs build a `WorkerInitPayload` in `buildInitPayload()`:\n - headless mode sends `url`, `waitUntil`, `viewport`, `dialogs`, and timeout;\n - attach mode resolves a page with `pickElectronTarget()`, gets its target id, and sends `targetId` plus `dialogs`.\n7. `acquireTab()` spawns a dedicated Bun `Worker` from `tab-worker-entry.ts`; if that fails it falls back to inline execution in the main thread (`spawnInlineWorker()`), preserving behavior but losing protection against synchronous infinite loops.\n8. `WorkerCore.#init()` (`packages/coding-agent/src/tools/browser/tab-worker.ts`) connects back to the browser websocket endpoint. Headless mode opens a new page, applies stealth patches, applies viewport, installs dialog handling if requested, and optionally navigates. Attach mode resolves the requested target page and optionally installs dialog handling.\n9. On success the worker sends `ready` with `{ url, title, viewport, targetId }`; the supervisor stores a `TabSession`, increments browser-handle refcount with `holdBrowser()`, and keeps the tab in a process-global `Map<string, TabSession>`.\n10. `run` requires non-empty `code`, looks up the tab with `getTab()`, then delegates to `runInTab()`.\n11. `runInTabWithSnapshot()` rejects dead tabs and concurrent runs (`Tab ... is busy`), captures session cwd plus optional `browser.screenshotDir`, registers an abort hook, sends a `run` message to the worker, and races the result against `timeoutMs + 750` ms. Timeouts force-kill the tab worker and, for headless tabs, close the orphaned page target.\n12. `WorkerCore.#run()` creates a VM context, exposes the raw Puppeteer `page`/`browser` plus a synthetic `tab` API, and executes `(async () => { ...code... })()` via `vm.runInContext()`.\n13. The `tab` helper API implemented in `#createTabApi()` is:\n - `tab.name: string`\n - `tab.page: Page`\n - `tab.signal?: AbortSignal`\n - `tab.url(): string`\n - `tab.title(): Promise<string>`\n - `tab.goto(url, { waitUntil? })`\n - `tab.observe({ includeAll?, viewportOnly? })`\n - `tab.screenshot({ selector?, fullPage?, save?, silent? })`\n - `tab.extract(format = \"markdown\")`\n - `tab.click(selector)`\n - `tab.type(selector, text)`\n - `tab.fill(selector, value)`\n - `tab.press(key, { selector? })`\n - `tab.scroll(deltaX, deltaY)`\n - `tab.drag(from, to)`\n - `tab.waitFor(selector)`\n - `tab.evaluate(fn, ...args)`\n - `tab.scrollIntoView(selector)`\n - `tab.select(selector, ...values)`\n - `tab.uploadFile(selector, ...filePaths)`\n - `tab.waitForUrl(pattern, { timeout? })`\n - `tab.waitForResponse(pattern, { timeout? })`\n - `tab.id(n)`\n14. Selector handling in `normalizeSelector()` accepts plain CSS and Puppeteer query handlers, and rewrites legacy Playwright-style prefixes `p-text/`, `p-xpath/`, `p-pierce/`, `p-aria/`; other `p-*` prefixes throw a `ToolError`.\n15. `tab.observe()` clears the element cache, takes a Puppeteer accessibility snapshot, filters to interactive nodes unless `includeAll`, optionally filters to viewport-visible nodes, assigns numeric ids, caches `ElementHandle`s, and returns URL/title/viewport/scroll metadata plus `elements`.\n16. `tab.id(n)` resolves the cached `ElementHandle`, verifies `el.isConnected`, and throws a stale-id error after cache invalidation if the DOM changed or the cache was cleared.\n17. `tab.goto()` clears the cached element ids before navigating. Any new `tab.observe()` also clears and rebuilds the cache.\n18. `tab.click()` uses a custom retry loop for `text/...` selectors to find an actionable visible match; other selectors use `page.locator(...).click()` with the run timeout.\n19. `tab.screenshot()` captures either the whole page or a selector PNG, downsizes a copy for model output, chooses a persistence path, writes the image to disk, records metadata, and optionally emits text + image display entries.\n20. `display()` calls accumulate in an array. After code finishes, the worker posts `{ displays, returnValue, screenshots }`; `BrowserTool.#run()` appends the return value as trailing text content when not `undefined`.\n21. `close` releases one tab or all tabs via `releaseTab()` / `releaseAllTabs()`. Each tab aborts pending runs, asks the worker to close, waits up to `750` ms for a `closed` ack, terminates the worker, decrements browser refcount, and disposes the browser handle when refcount reaches zero.\n\n## Modes / Variants\n- **Action dispatch**\n - `open` — acquire/reuse browser + tab.\n - `close` — release one tab or all tabs.\n - `run` — execute JS inside the tab worker.\n- **Browser kind**\n - **Headless**: launches local Chromium with Puppeteer, applies stealth patches, and creates a fresh page per tab.\n - **Spawned app (`app.path`)**: reuses an existing CDP-enabled process for that executable when possible; otherwise kills same-path processes, spawns the executable with remote debugging enabled, then attaches. No stealth patches are injected.\n - **Connected browser (`app.cdp_url`)**: attaches to an already-running CDP endpoint. No process ownership; close only disconnects.\n- **Target selection for attached/spawned browsers**\n - With `app.target`, `pickElectronTarget()` returns the first page whose URL or title contains the case-insensitive substring.\n - Without `app.target`, it skips titles/URLs matching `request handler|devtools|background page|background host|service worker` and otherwise falls back to the first page.\n- **Worker mode**\n - **Dedicated worker**: normal path; user code runs off the main thread and can be aborted even when it blocks synchronously.\n - **Inline fallback**: activated when Bun worker spawn fails; behavior matches, but synchronous infinite loops on user code cannot be interrupted.\n- **Dialog policy**\n - No `dialogs` field: no auto-handler.\n - `accept`/`dismiss`: page `dialog` events are handled automatically.\n - Changing dialog policy on an existing live tab forces tab recreation instead of mutating the worker in place.\n- **Screenshot persistence**\n - `save` provided: persist full-resolution PNG at the resolved cwd-relative or absolute path.\n - `browser.screenshotDir` session setting set: persist full-resolution PNG under that directory with a timestamped filename.\n - Neither set: persist the resized image to a temp-file path under the OS temp dir.\n\n## Side Effects\n- Filesystem\n - `loadPuppeteer()` writes `{}` to `<puppeteer-safe-dir>/package.json` before importing `puppeteer-core`.\n - First headless launch may download Chromium into the Puppeteer cache directory returned by `getPuppeteerDir()`.\n - `tab.screenshot()` creates parent directories and writes image files.\n - `tab.uploadFile()` resolves supplied paths against the session cwd.\n- Network\n - CDP attach paths poll `http://127.0.0.1:<port>/json/version` or the supplied `cdp_url` `/json/version`.\n - Headless/browser-attach sessions create CDP websocket connections.\n - Headless first-use Chromium download uses `@puppeteer/browsers`.\n - User `page` / `tab` operations perform normal browser network traffic.\n- Subprocesses / native bindings\n - Headless mode launches Chromium through Puppeteer.\n - `app.path` mode may spawn the target executable via `Bun.spawn()`.\n - `killExistingByPath()` / `gracefulKillTreeOnce()` use `@oh-my-pi/pi-natives` process inspection/termination.\n - Worker mode uses Bun `Worker`; fallback mode does not.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Browser handles are cached in a process-global `Map` keyed by browser kind in `packages/coding-agent/src/tools/browser/registry.ts`.\n - Tabs are cached in a process-global `Map` keyed by `name` in `packages/coding-agent/src/tools/browser/tab-supervisor.ts`.\n - `run` captures session cwd and optional `browser.screenshotDir` for screenshot/save path resolution.\n - `restartForModeChange()` drops only headless tabs.\n- User-visible prompts / interactive UI\n - None beyond normal tool output. Dialog auto-handling is invisible unless it fails and emits debug logs.\n- Background work / cancellation\n - `open`, `run`, CDP waits, and browser actions thread through abort signals.\n - A timed-out `run` aborts the worker execution path and can tear down the tab.\n\n## Limits & Caps\n- Tool timeout clamp: default `30` s, min `1` s, max `30` s (`TOOL_TIMEOUTS.browser` in `packages/coding-agent/src/tools/tool-timeouts.ts`).\n- Supervisor grace period around init/run/close: `750` ms (`GRACE_MS` in `packages/coding-agent/src/tools/browser/tab-supervisor.ts`).\n- Puppeteer protocol timeout for launch/connect operations: `60_000` ms (`BROWSER_PROTOCOL_TIMEOUT_MS` in `packages/coding-agent/src/tools/browser/launch.ts`).\n- Connected-browser CDP readiness wait: `5_000` ms before `puppeteer.connect()` (`packages/coding-agent/src/tools/browser/registry.ts`).\n- Spawned-app CDP readiness wait after spawn: `30_000` ms (`packages/coding-agent/src/tools/browser/registry.ts`).\n- CDP polling cadence: 150 ms in `waitForCdp()` (`packages/coding-agent/src/tools/browser/attach.ts`).\n- Headless default viewport: `1365x768` at `deviceScaleFactor: 1.25` (`DEFAULT_VIEWPORT` in `packages/coding-agent/src/tools/browser/launch.ts`).\n- Screenshot model-attachment resize cap: `maxWidth 1024`, `maxHeight 1024`, `maxBytes 150 * 1024`, `jpegQuality 70` (`packages/coding-agent/src/tools/browser/tab-worker.ts`).\n- `tab.waitForUrl()` polling interval: `200` ms (`packages/coding-agent/src/tools/browser/tab-worker.ts`).\n- Drag simulation uses `12` mouse-move steps (`packages/coding-agent/src/tools/browser/tab-worker.ts`).\n\n## Errors\n- `BrowserTool.execute()` converts DOM-style `AbortError` into `ToolAbortError`; other errors propagate.\n- `run` hard-fails on missing code: `Missing required parameter 'code' for action 'run'.`\n- `open` fails when reusing a name across browser kinds: `Tab \"...\" is bound to a different browser (...). Close it first.`\n- `runInTabWithSnapshot()` fails when the tab is absent/dead (`Tab \"...\" is not alive. Reopen it.`) or already running (`Tab \"...\" is busy`).\n- Worker init failures and run failures are serialized through `RunErrorPayload`; `ToolError` and abort state are reconstructed on the host side by `errorFromPayload()`.\n- Attached-target mismatches surface as:\n - `No page targets available on the attached browser`\n - `No page target matched \"...\". Available pages:\\n...`\n - `Target ... is no longer available on the attached browser`\n- Spawned-app path validation requires an absolute executable path, not an app bundle path.\n- Spawn/attach failures are wrapped into `ToolError`s such as `Timed out waiting for CDP endpoint ...`, `Failed to attach to ...`, or `Connected to ... but puppeteer.connect failed: ...`.\n- `tab` helper errors are user-visible `ToolError`s, including unsupported selector prefix, stale/unknown element id, invalid drag target, missing upload files, non-`<select>` for `tab.select()`, non-file-input for `tab.uploadFile()`, and screenshot selector misses.\n- On run timeout, the worker reports `Browser code execution timed out after <ms>ms`; the supervisor may escalate to `Browser code execution hung past grace; tab killed` if the worker does not respond after the grace window.\n\n## Notes\n- `loadPuppeteer()` and `loadPuppeteerInWorker()` temporarily redirect `cwd` to a safe Puppeteer directory before importing `puppeteer-core`, because Puppeteer probes the current working directory during module load.\n- Headless launch prefers a detected system Chrome/Chromium, then `PUPPETEER_EXECUTABLE_PATH`, and only then downloads Chromium.\n- Headless launch always passes `--no-sandbox`, `--disable-setuid-sandbox`, `--disable-blink-features=AutomationControlled`, and a `--window-size=...` matching the initial viewport. It also ignores Puppeteer default args `--disable-extensions`, `--disable-default-apps`, and `--disable-component-extensions-with-background-pages`.\n- Proxy-related env vars only affect headless launch: `PUPPETEER_PROXY`, `PUPPETEER_PROXY_BYPASS_LOOPBACK`, and `PUPPETEER_PROXY_IGNORE_CERT_ERRORS`.\n- Stealth patches are applied only in headless mode. Spawned or externally connected browsers are intentionally left untouched.\n- `applyStealthPatches()` also strips Puppeteer's `//# sourceURL=__puppeteer_evaluation_script__` suffix from CDP `Runtime.evaluate` / `Runtime.callFunctionOn` payloads.\n- `tab.extract()` reads `page.content()`, runs Readability first, then falls back to `main article`/`article`/`main`/`[role='main']`/`body`, and returns `null` if neither extraction path yields content.\n- `close(all: true, kill: false)` disconnects from spawned/connected browsers when the last tab closes but leaves spawned app processes running.\n- Headless orphan cleanup is best-effort: if a worker dies before closing its page, the supervisor searches browser targets by `targetId` and closes that page.\n- Console methods inside `run` do not appear in tool output; they are forwarded as debug/warn/error logs through the worker transport.",
|
|
66
66
|
"tools/calc.md": "# calc\n\n> Evaluates one or more arithmetic expressions and returns formatted numeric results.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/calculator.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/calculator.md`\n- Key collaborators:\n - `packages/coding-agent/src/tui.ts` — status lines and tree-list rendering\n - `packages/coding-agent/src/tools/render-utils.ts` — preview limits and formatting helpers\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `calculations` | `Calculation[]` | Yes | Batch of expressions to evaluate in order. |\n\n### `Calculation`\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `expression` | `string` | Yes | Arithmetic expression string. |\n| `prefix` | `string` | Yes | Prepended verbatim to the rendered numeric result. |\n| `suffix` | `string` | Yes | Appended verbatim to the rendered numeric result. |\n\n## Outputs\n- Single-shot result.\n- `content[0].text` is the newline-joined `prefix + value + suffix` string for each calculation.\n- `details.results` is an array of `{ expression, value, output }`.\n- On renderer fallback, if `details` is missing but `content[0].text` exists, the TUI tries to pair each output line with the original expressions from call args.\n\n## Flow\n1. `execute()` wraps evaluation in `untilAborted(...)`.\n2. For each entry, `evaluateExpression(...)` tokenizes the expression, parses it with a recursive-descent parser, rejects non-finite outputs, and normalizes `-0` to `0`.\n3. `tokenizeExpression(...)` accepts whitespace, parentheses, operators, and number literals; any other character throws immediately.\n4. `ExpressionParser` applies precedence in this order: `+ -`, `* / %`, unary `+ -`, exponentiation `**`, parentheses/literals.\n5. Exponentiation is right-associative (`2 ** 3 ** 2` parses as `2 ** (3 ** 2)`).\n6. Each numeric result is formatted with `String(value)` and wrapped with the provided `prefix` and `suffix`.\n7. The tool returns text output plus structured `details`.\n\n## Side Effects\n- Background work / cancellation\n - Supports abort via `untilAborted(...)`.\n- Session state\n - None.\n- Filesystem / Network / Subprocesses\n - None.\n\n## Limits & Caps\n- Supported operators: `+`, `-`, `*`, `/`, `%`, `**` (`packages/coding-agent/src/tools/calculator.ts`).\n- Supported numeric literals:\n - decimal integers/floats, including leading-dot forms like `.5`\n - scientific notation like `1e10`, `2.5E-3`\n - hexadecimal `0x...`\n - binary `0b...`\n - octal `0o...`\n- Results must be finite; `Infinity` and `NaN` are rejected.\n- The renderer collapses long result lists using `PREVIEW_LIMITS.COLLAPSED_ITEMS` from `packages/coding-agent/src/tools/render-utils.ts`.\n\n## Errors\n- Invalid characters: e.g. `Invalid character \"x\" in expression`.\n- Malformed numbers: invalid prefixed literal, invalid exponent, invalid number.\n- Syntax errors: `Unexpected token in expression`, `Unexpected end of expression`, `Missing closing parenthesis`, `Expression is empty`.\n- Non-finite arithmetic: `Expression result is not a finite number`.\n- Any evaluation error aborts the whole batch; the tool does not return partial successes.\n\n## Notes\n- Despite the schema example showing `sqrt(16)`, the parser does not support functions, identifiers, units, or constants; only numeric literals, operators, and parentheses are accepted.\n- Precision is plain JavaScript `number` semantics throughout, including floating-point rounding behavior.\n- `/` and `%` use JavaScript numeric operators directly; there is no integer-only mode or unit handling.\n- Unary operators bind tighter than `*`/`/`/`%` but looser than exponentiation because unary parsing delegates to `#parsePower()`.\n",
|
|
67
67
|
"tools/checkpoint.md": "# checkpoint\n\n> Mark the current top-level conversation state so later `rewind` can collapse exploratory context into a report.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/checkpoint.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/checkpoint.md`\n- Key collaborators:\n - `packages/coding-agent/src/session/agent-session.ts` — captures the active checkpoint after tool success.\n - `packages/coding-agent/src/session/session-manager.ts` — persists the normal session entry stream; not the active checkpoint marker.\n - `packages/coding-agent/src/tools/index.ts` — registers the tool and gates it behind `checkpoint.enabled`.\n - `packages/coding-agent/src/config/settings-schema.ts` — defines the disabled-by-default feature flag.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `goal` | `string` | Yes | Investigation goal. Required by the TypeBox schema and echoed in the tool result. |\n\n## Outputs\nThe tool returns a single text result plus structured details:\n\n- text body:\n - `Checkpoint created.`\n - `Goal: <goal>`\n - `Run your investigation, then call rewind with a concise report.`\n- `details`:\n - `goal: string`\n - `startedAt: string` — ISO timestamp created inside `CheckpointTool.execute()`\n\nNo checkpoint ID, artifact URI, job handle, file path, or restore token is returned.\n\n## Flow\n1. `CheckpointTool.createIf()` in `packages/coding-agent/src/tools/checkpoint.ts` returns `null` for subagents by checking `session.taskDepth`; only top-level sessions can see the tool.\n2. `CheckpointTool.execute()` rejects subagent calls again with `ToolError(\"Checkpoint not available in subagents.\")`.\n3. It rejects nested checkpoints with `ToolError(\"Checkpoint already active.\")` when `session.getCheckpointState?.()` is already set.\n4. It creates `startedAt = new Date().toISOString()` and returns a normal `toolResult()` payload. The tool itself does not persist anything.\n5. On the later `tool_execution_end` event, `AgentSession` in `packages/coding-agent/src/session/agent-session.ts` detects successful `checkpoint` execution and captures three in-memory fields:\n - `checkpointMessageCount` — current `agent.state.messages.length`, after the checkpoint tool result has already been appended\n - `checkpointEntryId` — `sessionManager.getEntries().at(-1)?.id ?? null`, i.e. the last persisted session entry ID at checkpoint time\n - `startedAt` — copied from tool details or regenerated\n6. `AgentSession` stores that object in its private `#checkpointState` field and clears `#pendingRewindReport`.\n\n## Side Effects\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Sets `AgentSession.#checkpointState` in memory.\n - Records the checkpoint boundary as a message count plus a session entry ID.\n - Enables the later yield guard: if a checkpoint is active and no rewind report is pending, `#enforceRewindBeforeYield()` injects a developer-role warning and schedules another turn.\n- User-visible prompts / interactive UI\n - The tool result tells the model to call `rewind` after the investigation.\n - If the agent tries to `yield` first, `AgentSession` injects:\n\n```text\n<system-warning>\nYou are in an active checkpoint. You MUST call rewind with your investigation findings before yielding. Do NOT yield without completing the checkpoint.\n</system-warning>\n```\n\n## Limits & Caps\n- Availability is gated by `checkpoint.enabled`, default `false`, in `packages/coding-agent/src/config/settings-schema.ts`.\n- The tool is registered as discoverable in `packages/coding-agent/src/tools/index.ts`.\n- Only one active checkpoint is allowed per top-level session.\n- Checkpoint state is not persisted as a dedicated session entry. If the process exits, a resumed session can reload the conversation history, but not the live `#checkpointState` guard.\n- Session persistence still applies to the ordinary checkpoint tool call message. Global session persistence truncation is `MAX_PERSIST_CHARS = 500_000` in `packages/coding-agent/src/session/session-manager.ts`.\n\n## Errors\n- `ToolError(\"Checkpoint not available in subagents.\")` — thrown for subagent sessions.\n- `ToolError(\"Checkpoint already active.\")` — thrown when a prior checkpoint has not been rewound or cleared.\n- The tool body has no local `try/catch`; unexpected exceptions propagate.\n\n## Notes\n- Despite the summary string `Create a git-based checkpoint to save and restore session state`, the implementation does not call git and does not snapshot filesystem state.\n- Captured state is conversation/session metadata only:\n - in-memory message count\n - session entry ID in the session tree\n - timestamp\n- Not captured:\n - working tree contents\n - staged changes\n - artifacts\n - blob-store contents\n - SQLite history rows from `packages/coding-agent/src/session/history-storage.ts`\n - auth or agent records from `packages/coding-agent/src/session/agent-storage.ts`\n- If the turn ends with `stopReason === \"aborted\"` while a checkpoint is active, `AgentSession` clears `#checkpointState` and `#pendingRewindReport` instead of preserving a half-finished checkpoint.\n",
|
|
68
68
|
"tools/debug.md": "# debug\n\n> Drive one DAP debug session; adjacent debug UI code reuses the same subsystem for logs, raw SSE capture, reports, profiling, and system diagnostics.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/debug.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/debug.md`\n- Key collaborators:\n - `packages/coding-agent/src/dap/session.ts` — session lifecycle, breakpoint/state cache\n - `packages/coding-agent/src/dap/client.ts` — adapter process/socket transport, DAP message loop\n - `packages/coding-agent/src/dap/config.ts` — adapter resolution and auto-selection\n - `packages/coding-agent/src/dap/defaults.json` — built-in adapter definitions\n - `packages/coding-agent/src/dap/types.ts` — request/response/capability shapes\n - `packages/coding-agent/src/tools/tool-timeouts.ts` — per-tool timeout clamp\n - `packages/coding-agent/src/debug/index.ts` — interactive debug selector menu\n - `packages/coding-agent/src/debug/log-viewer.ts` — recent-log TUI viewer\n - `packages/coding-agent/src/debug/raw-sse.ts` — raw SSE TUI viewer\n - `packages/coding-agent/src/debug/raw-sse-buffer.ts` — bounded SSE capture buffer\n - `packages/coding-agent/src/debug/profiler.ts` — CPU/heap profiling helpers\n - `packages/coding-agent/src/debug/report-bundle.ts` — `.tar.gz` report bundling, log source, cache cleanup\n - `packages/coding-agent/src/debug/system-info.ts` — system snapshot collection and env redaction\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `action` | `\"launch\" \\| \"attach\" \\| \"set_breakpoint\" \\| \"remove_breakpoint\" \\| \"set_instruction_breakpoint\" \\| \"remove_instruction_breakpoint\" \\| \"data_breakpoint_info\" \\| \"set_data_breakpoint\" \\| \"remove_data_breakpoint\" \\| \"continue\" \\| \"step_over\" \\| \"step_in\" \\| \"step_out\" \\| \"pause\" \\| \"evaluate\" \\| \"stack_trace\" \\| \"threads\" \\| \"scopes\" \\| \"variables\" \\| \"disassemble\" \\| \"read_memory\" \\| \"write_memory\" \\| \"modules\" \\| \"loaded_sources\" \\| \"custom_request\" \\| \"output\" \\| \"terminate\" \\| \"sessions\"` | Yes | Dispatch key for the tool switch in `packages/coding-agent/src/tools/debug.ts`. |\n| `program` | `string` | No | Launch target path. Required for `launch`. Resolved relative to `cwd` if provided, otherwise session cwd. |\n| `args` | `string[]` | No | Program argv for `launch`. |\n| `adapter` | `string` | No | Explicit adapter name. Otherwise `selectLaunchAdapter()` / `selectAttachAdapter()` auto-pick from `packages/coding-agent/src/dap/config.ts`. |\n| `cwd` | `string` | No | Launch/attach working directory. Defaults to session cwd. |\n| `file` | `string` | No | Source file path for source breakpoints. |\n| `line` | `number` | No | Source line for source breakpoints. |\n| `function` | `string` | No | Function breakpoint name. Mutually exclusive with `file`+`line` in breakpoint actions. |\n| `name` | `string` | No | Data breakpoint info target name. Required for `data_breakpoint_info`. |\n| `condition` | `string` | No | Conditional expression for source/function/instruction/data breakpoints. |\n| `hit_condition` | `string` | No | Hit-count condition for instruction/data breakpoints. |\n| `expression` | `string` | No | Expression or raw debugger command. Required for `evaluate`. |\n| `context` | `string` | No | Evaluate context. Defaults to `\"repl\"`. Passed through as DAP evaluate context. |\n| `frame_id` | `number` | No | Frame selector for `evaluate`, `scopes`, `data_breakpoint_info`. `scopes` and `evaluate` default to the current stopped frame when omitted. |\n| `scope_id` | `number` | No | Variables reference from a scope. Accepted by `variables`; also used as a fallback variables reference for `data_breakpoint_info`. |\n| `variable_ref` | `number` | No | Variables reference for `variables`; preferred over `scope_id` when both are present. |\n| `pid` | `number` | No | Local process id for `attach`. `attach` requires `pid` or `port`. |\n| `port` | `number` | No | Remote attach port. If no adapter is forced, attach prefers `debugpy` when `port` is present. |\n| `host` | `string` | No | Remote attach host for `attach`. |\n| `levels` | `number` | No | Max stack frames for `stack_trace`. |\n| `memory_reference` | `string` | No | Memory reference/address for `disassemble`, `read_memory`, `write_memory`. `disassemble` also accepts it via `instruction_reference` fallback logic in `resolveDisassemblyReference()`. |\n| `instruction_reference` | `string` | No | Instruction breakpoint reference; required for instruction breakpoint actions. |\n| `instruction_count` | `number` | No | Required for `disassemble`. |\n| `instruction_offset` | `number` | No | Instruction offset for `disassemble`. |\n| `count` | `number` | No | Byte count for `read_memory`. Required there. |\n| `data` | `string` | No | Base64 payload for `write_memory`. Required there. |\n| `data_id` | `string` | No | Data breakpoint id. Required for `set_data_breakpoint` / `remove_data_breakpoint`. |\n| `access_type` | `\"read\" \\| \"write\" \\| \"readWrite\"` | No | Access filter for `set_data_breakpoint`. |\n| `command` | `string` | No | Custom DAP request command. Required for `custom_request`. |\n| `arguments` | `Record<string, unknown>` | No | Custom DAP request body for `custom_request`. |\n| `offset` | `number` | No | Offset for instruction breakpoints, disassembly, memory read, memory write. |\n| `resolve_symbols` | `boolean` | No | `disassemble` symbol-resolution flag. |\n| `allow_partial` | `boolean` | No | `write_memory` partial-write allowance. |\n| `start_module` | `number` | No | Modules pagination start index for `modules`. |\n| `module_count` | `number` | No | Modules pagination count for `modules`. |\n| `timeout` | `number` | No | Per-request timeout in seconds. Default `30`, clamped to `5..300`. |\n\n### Action-specific requirements\n- `launch`: `program`\n- `attach`: `pid` or `port`\n- `set_breakpoint` / `remove_breakpoint`: `function`, or `file` + `line`\n- `set_instruction_breakpoint` / `remove_instruction_breakpoint`: `instruction_reference`\n- `data_breakpoint_info`: `name`\n- `set_data_breakpoint` / `remove_data_breakpoint`: `data_id`\n- `evaluate`: `expression`\n- `variables`: `variable_ref` or `scope_id`\n- `disassemble`: capability `supportsDisassembleRequest`, plus `instruction_count`\n- `read_memory`: capability `supportsReadMemoryRequest`, plus `memory_reference` and `count`\n- `write_memory`: capability `supportsWriteMemoryRequest`, plus `memory_reference` and `data`\n- `modules`: capability `supportsModulesRequest`\n- `loaded_sources`: capability `supportsLoadedSourcesRequest`\n- `custom_request`: `command`\n\n### Interactive selector values\n`packages/coding-agent/src/debug/index.ts` also exposes a fixed UI-only selector with values `open-artifacts`, `performance`, `work`, `dump`, `memory`, `logs`, `system`, `raw-sse`, `transcript`, `clear-cache`. These are not model-callable through `debugSchema`; they are local TUI menu routes.\n\n## Outputs\nThe agent tool returns a standard `toolResult()` payload from `packages/coding-agent/src/tools/debug.ts`:\n- `content`: one text block. Every action renders human-readable text; there is no structured JSON block in `content`.\n- `details.action`: echoed action.\n- `details.success`: always initialized `true`; failures surface by throwing before a result is returned.\n- `details.snapshot`: present for actions that operate on or create a session, using `DapSessionSummary` from `packages/coding-agent/src/dap/types.ts`.\n- Action-specific `details` fields:\n - `launch` / `attach`: `adapter`\n - breakpoint actions: `breakpoints`, `functionBreakpoints`, `instructionBreakpoints`, `dataBreakpoints`\n - `data_breakpoint_info`: `dataBreakpointInfo`\n - `continue` / `step_*`: `state`, `timedOut`\n - `threads`: `threads`\n - `stack_trace`: `stackFrames`\n - `scopes`: `scopes`\n - `variables`: `variables`\n - `evaluate`: `evaluation`\n - `disassemble`: `disassembly`\n - `read_memory`: `memoryAddress`, `memoryData`, `unreadableBytes`\n - `write_memory`: `bytesWritten`\n - `modules`: `modules`\n - `loaded_sources`: `sources`\n - `custom_request`: `customBody`\n - `output`: `output`\n - `sessions`: `sessions`\n\nStreaming/UI behavior:\n- The tool renderer merges call and result (`mergeCallAndResult: true`) and renders inline.\n- `debug.ts` itself does not emit progress updates through `_onUpdate`; result delivery is single-shot.\n- The interactive selector is UI-driven instead of model-driven. It swaps TUI components, appends status lines to the chat pane, opens files in external viewers, or writes archives/temp files.\n\nSide-channel artifacts outside the model tool result:\n- `createReportBundle()` writes `omp-report-<timestamp>.tar.gz` under the reports dir and returns the filesystem path to the UI handler.\n- `#handleWorkReport()` writes `/tmp/work-profile-<Date.now()>.svg` before opening it.\n- `RawSseViewerComponent` and `DebugLogViewerComponent` can copy captured text to the clipboard.\n\n## Flow\n1. Tool registration is conditional: `DebugTool.createIf()` in `packages/coding-agent/src/tools/debug.ts` returns `null` unless `session.settings.get(\"debug.enabled\")` is true. `packages/coding-agent/src/tools/index.ts` wires the factory and rechecks the same setting in tool filtering.\n2. `DebugTool.execute()` clamps `params.timeout` through `clampTimeout(\"debug\", params.timeout)` and composes the caller `AbortSignal` with `AbortSignal.timeout(...)`.\n3. `launch` and `attach` resolve cwd/program paths, select an adapter in `packages/coding-agent/src/dap/config.ts`, then delegate to `dapSessionManager.launch()` / `.attach()`.\n4. `DapSessionManager.launch()` / `.attach()` enforce the single-session rule with `#ensureLaunchSlot()`, spawn the adapter through `DapClient.spawn()`, register listeners, send `initialize`, cache capabilities, start listening for an initial stop event before sending `launch`/`attach`, then complete the `initialized` → `configurationDone` handshake in `#completeConfigurationHandshake()`.\n5. `DapClient.spawn()` starts the adapter detached with `NON_INTERACTIVE_ENV`. Most adapters use stdio; socket-mode adapters (`dlv`) use `#spawnSocketUnix()` on Linux or `#spawnSocketClientAddr()` on macOS/other.\n6. `#registerSession()` in `packages/coding-agent/src/dap/session.ts` installs reverse-request handlers:\n - `runInTerminal`: spawns the requested debuggee command detached via `ptree.spawn()` and returns `{ processId }`\n - `startDebugging`: logs the child-session request and returns `{}`; it does not create nested sessions\n - events: `output`, `initialized`, `stopped`, `continued`, `exited`, `terminated` update cached session state\n7. Operational actions (`set_breakpoint`, `evaluate`, `threads`, `read_memory`, `custom_request`, and similar) call `dapSessionManager` methods. Most flow through `#sendRequestWithConfig()`, which first sends `configurationDone` when required, then sends the DAP request, then updates `lastUsedAt`.\n8. Breakpoint actions maintain local cached breakpoint sets in `DapSessionManager` and remap adapter responses back onto those cached records.\n9. `continue` and the three step actions clear cached stop state, subscribe for `stopped`/`terminated`/`exited` before sending the DAP request, then `#awaitStopOutcome()` either returns the new stopped location or reports that the program is still running after timeout.\n10. `pause` sends DAP `pause`, waits for a stopped event if needed, and reuses cached stop state if the program was already stopped.\n11. `stack_trace`, `scopes`, `variables`, and `evaluate` default to the current stopped thread/frame when the caller omits ids and cached state is available.\n12. `output` reads the in-memory output ring from `DapSessionManager.getOutput()`. `terminate` sends `terminate` when supported, always attempts `disconnect`, marks the session terminated, and disposes the client.\n13. `sessions` reads the manager’s current map and formats all summaries. Although the manager stores a map, only one active session can exist because new launch/attach calls are blocked until the active one is terminated or cleaned up.\n14. The interactive selector in `packages/coding-agent/src/debug/index.ts` builds a `SelectList` of fixed values and dispatches each to a handler:\n - `performance`: `startCpuProfile()`, wait for Enter/Escape, stop profiling, read a 30-second work profile with `getWorkProfile(30)`, then bundle via `createReportBundle()`\n - `work`: read `getWorkProfile(30)`, write a temp SVG, open it externally\n - `dump`: create a report bundle immediately\n - `memory`: force GC, call `Bun.generateHeapSnapshot(\"v8\")`, then bundle\n - `logs`: build a `DebugLogSource` and mount `DebugLogViewerComponent`\n - `raw-sse`: resolve a `RawSseDebugBuffer` from the session and mount `RawSseViewerComponent`\n - `system`: call `collectSystemInfo()` and render `formatSystemInfo()` into the chat pane\n - `open-artifacts`: open the current session artifact directory if it exists\n - `transcript`: delegates to `ctx.handleDebugTranscriptCommand()`\n - `clear-cache`: show confirmation, then remove artifact directories older than 30 days with `clearArtifactCache()`\n\n## Modes / Variants\n- **Availability gate**\n - Tool hidden when `debug.enabled` is false.\n- **Adapter selection**\n - `launch`: explicit `adapter` wins; otherwise `selectLaunchAdapter()` ranks available adapters by extension match, root-marker match, then native-debugger preference (`gdb`, `lldb-dap`) for extensionless binaries.\n - `attach`: explicit `adapter` wins; otherwise remote `port` prefers `debugpy`, then native debuggers, then first available adapter.\n- **Transport**\n - stdio adapters: direct `stdin`/`stdout` framing.\n - socket adapters: Unix domain socket on Linux; TCP callback on macOS/other.\n- **DAP agent-tool actions**\n - `launch` — spawn adapter, initialize session, maybe stop on entry; returns formatted session snapshot and `details.adapter`.\n - `attach` — connect to a live process or remote port; same output shape as `launch`.\n - `set_breakpoint` — source or function breakpoint add/update; returns the current breakpoint list for that target.\n - `remove_breakpoint` — source or function breakpoint removal; returns the remaining breakpoint list.\n - `set_instruction_breakpoint` / `remove_instruction_breakpoint` — require `supportsInstructionBreakpoints`; return current instruction breakpoint list.\n - `data_breakpoint_info` — require `supportsDataBreakpoints`; asks the adapter for a `dataId`, access types, and description for `name`.\n - `set_data_breakpoint` / `remove_data_breakpoint` — require `supportsDataBreakpoints`; return the cached data-breakpoint list.\n - `continue` / `step_over` / `step_in` / `step_out` — return text describing whether execution stopped, terminated, or kept running, plus `details.state` and `details.timedOut`.\n - `pause` — interrupts a running target and returns a stopped snapshot.\n - `evaluate` — adapter expression evaluation; defaults context to `repl`.\n - `stack_trace` — fetches frames for the resolved thread.\n - `threads` — fetches current threads.\n - `scopes` — frame scopes for an explicit `frame_id` or the current stopped frame.\n - `variables` — variables for `variable_ref` or `scope_id`.\n - `disassemble` — require `supportsDisassembleRequest`; disassembles around a memory reference.\n - `read_memory` — require `supportsReadMemoryRequest`; returns address, base64 data, unreadable-byte count.\n - `write_memory` — require `supportsWriteMemoryRequest`; writes base64 data and reports bytes written.\n - `modules` — require `supportsModulesRequest`; optional pagination via `start_module` / `module_count`.\n - `loaded_sources` — require `supportsLoadedSourcesRequest`; returns loaded source descriptors.\n - `custom_request` — sends any DAP request name with arbitrary arguments.\n - `output` — dumps captured stdout/stderr/console text from the session cache.\n - `terminate` — disconnects and disposes the active session; returns `No debug session to terminate.` when none exists.\n - `sessions` — lists all cached session summaries.\n- **Interactive selector routes (UI-only)**\n - `logs` — loads today’s log tail and optional older daily log files into `DebugLogViewerComponent`; supports copy, range selection, pid filtering, load-older.\n - `raw-sse` — live view over the session’s `RawSseDebugBuffer`; supports tail-follow, scrolling, copy-all.\n - `performance` — CPU profile + 30-second work profile + report bundle.\n - `memory` — heap snapshot + report bundle.\n - `dump` — report bundle without profiler artifacts.\n - `work` — standalone work-profile flamegraph export/open.\n - `system` — formatted OS/arch/CPU/memory/version/cwd/shell/terminal dump.\n - `open-artifacts` / `transcript` / `clear-cache` — artifact directory open, transcript export, artifact-cache pruning.\n\n## Side Effects\n- Filesystem\n - Resolves program/file/cwd paths against the session cwd.\n - Report creation writes `.tar.gz` bundles and may read the session JSONL, artifact files, subagent session JSONLs, and log files.\n - Work-profile export writes `/tmp/work-profile-<timestamp>.svg`.\n - Log source reads daily log files from the logs dir.\n - Artifact-cache cleanup removes session artifact directories older than the cutoff.\n - `resolveRawSseDebugBuffer()` may attach a non-enumerable `rawSseDebugBuffer` property to the owner object.\n- Network\n - Socket-mode adapters bind/connect local sockets.\n - Remote attach may connect through the adapter to a remote debug port.\n- Subprocesses / native bindings\n - Spawns debugger adapters (`gdb`, `lldb-dap`, `python -m debugpy.adapter`, `dlv`, and others from `defaults.json`) detached.\n - Reverse DAP `runInTerminal` requests spawn the debuggee detached via `ptree.spawn()`.\n - `getWorkProfile(30)` comes from `@oh-my-pi/pi-natives`.\n - CPU profiling uses `node:inspector/promises`; heap snapshots use `Bun.generateHeapSnapshot(\"v8\")`; raw/log viewers sanitize text via `@oh-my-pi/pi-natives`.\n - `openPath()` launches the OS default file/browser handler for artifact dirs and SVGs.\n - Log/raw-SSE viewers can call `copyToClipboard()`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - `DapSessionManager` keeps session summaries, breakpoints, threads, stack frames, stop location, output capture, capabilities, and last-used timestamps in memory.\n - Active-session id is global to the singleton `dapSessionManager`.\n - `RawSseDebugBuffer` stores recent SSE events per owner/session.\n - The tool is `exclusive`; concurrent debug tool calls are blocked by the scheduler.\n- User-visible prompts / interactive UI\n - Debug selector shows confirmation before cache deletion.\n - Performance profiling temporarily hijacks editor Enter/Escape handlers until profiling stops.\n - Log/raw-SSE viewers replace the editor pane with custom components.\n- Background work / cancellation\n - Every DAP request accepts an `AbortSignal`; timeouts and caller cancellation abort the active request, not the whole session lifetime.\n - `DapSessionManager` runs a background cleanup loop every 30 seconds.\n - Raw SSE viewers subscribe to buffer updates until closed.\n\n## Limits & Caps\n- Tool timeout clamp: `default=30`, `min=5`, `max=300` in `packages/coding-agent/src/tools/tool-timeouts.ts`.\n- Per-request DAP default timeout: `DEFAULT_REQUEST_TIMEOUT_MS = 30_000` in `packages/coding-agent/src/dap/client.ts`.\n- Single active session: enforced by `#ensureLaunchSlot()` in `packages/coding-agent/src/dap/session.ts`.\n- Idle session cleanup: `IDLE_TIMEOUT_MS = 10 * 60 * 1000`, checked every `CLEANUP_INTERVAL_MS = 30 * 1000`.\n- Adapter liveness heartbeat: `HEARTBEAT_INTERVAL_MS = 5 * 1000`.\n- Output capture cap: `MAX_OUTPUT_BYTES = 128 * 1024`; older text is trimmed in ~1 KiB slices and `outputTruncated` is recorded.\n- Initial stop capture timeout after launch/attach: `STOP_CAPTURE_TIMEOUT_MS = 5_000`.\n- Socket-mode adapter readiness timeout: `10_000` ms in `waitForCondition()` and TCP connect timeout logic in `packages/coding-agent/src/dap/client.ts`.\n- Raw SSE buffer caps in `packages/coding-agent/src/debug/raw-sse-buffer.ts`:\n - `MAX_RAW_SSE_EVENTS = 1_000`\n - `MAX_RAW_SSE_CHARS = 512_000`\n - `MAX_RAW_SSE_EVENT_CHARS = 64_000` per event, with `: omp-debug-truncated ...` marker appended on trim\n- Log viewer window in `packages/coding-agent/src/debug/log-viewer.ts`:\n - `INITIAL_LOG_CHUNK = 50`\n - `LOAD_OLDER_CHUNK = 50`\n- Report/log ingestion caps in `packages/coding-agent/src/debug/report-bundle.ts`:\n - `MAX_LOG_LINES = 5000` for interactive log reading\n - `MAX_LOG_BYTES = 2 * 1024 * 1024` tail-read ceiling\n - report bundles include only the last `1000` log lines\n - subagent session inclusion is capped at the most recent `10` JSONL files\n- Interactive profiling windows in `packages/coding-agent/src/debug/index.ts`: both performance and work reports request `getWorkProfile(30)`.\n- Artifact cache pruning default: `30` days in `clearArtifactCache()` and the selector confirmation text.\n\n## Errors\n- Parameter validation in `packages/coding-agent/src/tools/debug.ts` throws `ToolError` with explicit messages such as:\n - `program is required for launch`\n - `attach requires pid or port`\n - `set_breakpoint requires file+line or function`\n - `variables requires variable_ref or scope_id`\n - `memory_reference is required for read_memory`\n - `count is required for read_memory`\n - `data is required for write_memory`\n - `command is required for custom_request`\n- Adapter selection failure throws `No debugger adapter available. Installed adapters: ...`.\n- Capability-gated actions throw from `requireCapability(...)`, e.g. `Active adapter does not support memory reads.`\n- No-session and state errors come from `DapSessionManager`, e.g. `No active debug session. Launch or attach first.`, `No active stack frame. Run stack_trace first or supply frame_id.`, `Debugger reported no threads.`\n- Launching a second live session throws `Debug session <id> is still active. Terminate it before launching another.`\n- DAP transport/request failures surface as thrown errors from `DapClient`:\n - `DAP request <command> timed out after <ms>ms`\n - `DAP event <event> timed out after <ms>ms`\n - `DAP adapter <name> is not running`\n - `DAP adapter exited (code N): <stderr>` or `DAP adapter exited unexpectedly (code N)`\n - adapter response `message` when a DAP request fails\n- `continue` / `step_*` are intentionally non-fatal when the target stays running past the timeout: they return `details.timedOut = true` and `state: \"running\"` instead of throwing.\n- `terminate` suppresses adapter errors while sending `terminate`/`disconnect`; it still disposes the client and returns the last summary when possible.\n- Interactive selector handlers report UI errors instead of throwing:\n - profiler start/stop, report bundling, log reading, system-info collection, cache clearing, and artifact opening use `ctx.showError(...)` / `ctx.showWarning(...)`\n - empty logs and empty artifact caches are warnings/status messages, not failures\n - copy failures in log/raw-SSE viewers become status/error text in the UI\n- Report-bundle helpers are intentionally best-effort for many file reads: missing session files, missing artifact dirs, unreadable artifact files, missing log dirs, inaccessible cache dirs, and missing subagent files are skipped silently.\n- `collectSystemInfo()` is best-effort for CPU probing; failure there falls back to `Unknown CPU`.\n\n## Notes\n- `packages/coding-agent/src/prompts/tools/debug.md` tells the model only one active session is supported; that is not advisory, it is enforced in code.\n- `configurationDone` is sent automatically both during launch/attach handshake and lazily before later requests if the adapter required it and the initial handshake did not complete.\n- `startDebugging` reverse requests are acknowledged but not implemented; child debug sessions are not spawned.\n- `output` exposes the merged `output` event stream only; the tool does not distinguish stdout, stderr, and console categories.\n- Session summaries expose `needsConfigurationDone`; this is derived from adapter capabilities and whether `configurationDone` has been sent.\n- Source breakpoint file paths are normalized with `path.resolve()` before caching and sending to the adapter.\n- `evaluate` defaults to `repl`, so the tool can forward raw debugger commands when the adapter supports them.\n- `disassemble` resolves its target from `memory_reference` first, then `instruction_reference`; it throws if neither is present.\n- `RawSseDebugBuffer.recordEvent()` increments `totalEvents` before bounded retention. A snapshot can therefore show fewer retained records than total observed events.\n- Raw SSE buffer listener failures are swallowed so viewer bugs do not break capture.\n- `createDebugLogSource()` walks daily log files newest-first, but `loadOlderLogs()` reverses each requested slice before concatenation so older chunks prepend in chronological order.\n- `clearArtifactCache()` deletes directories by directory mtime, not per-file age.\n- `addDirectoryToArchive()` reads artifact files as text with `Bun.file(...).text()`. Binary artifact contents are not preserved byte-for-byte in the report bundle.\n- The tool renderer truncates displayed output for the TUI preview, but the underlying text result still contains the full returned string.\n",
|
|
69
69
|
"tools/edit.md": "# edit\n\n> Applies source edits; default mode is the hashline patch language consumed from a single `input` string.\n\n## Source\n- Entry: `packages/coding-agent/src/edit/index.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/hashline.md`\n- Key collaborators:\n - `packages/coding-agent/src/utils/edit-mode.ts` — selects active edit mode\n - `packages/coding-agent/src/hashline/grammar.lark` — custom-tool grammar for hashline mode\n - `packages/coding-agent/src/hashline/input.ts` — splits `@@ PATH` sections (legacy single-`@` headers are still accepted)\n - `packages/coding-agent/src/hashline/parser.ts` — parses ops and payload lines\n - `packages/coding-agent/src/hashline/apply.ts` — validates anchors and applies edits\n - `packages/coding-agent/src/hashline/anchors.ts` — stale-anchor mismatch formatting\n - `packages/coding-agent/src/hashline/recovery.ts` — cache-based stale-anchor recovery\n - `packages/coding-agent/src/hashline/hash.ts` — computes `LINEhh|` anchors shared with `read`/`search`\n - `packages/coding-agent/src/edit/file-read-cache.ts` — per-session read snapshot cache\n - `packages/coding-agent/src/tools/read.ts` — emits anchored lines and records read snapshots\n - `packages/coding-agent/src/tools/search.ts` — records sparse snapshots from matches/context\n - `packages/coding-agent/src/tools/fs-cache-invalidation.ts` — invalidates FS scan caches after writes\n - `packages/coding-agent/src/edit/streaming.ts` — computes in-flight diff previews for the TUI\n\n## Inputs\n\n### Hashline mode (default)\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `input` | `string` | Yes | One or more edit sections. First non-blank line must be `@@ PATH` (legacy single-`@` is still accepted) unless the caller supplies the legacy fallback `path` outside the model schema and the body already looks like hashline ops (`packages/coding-agent/src/hashline/input.ts`). Optional `*** Begin Patch` / `*** End Patch` envelope is ignored if present. |\n\nPatch language inside `input`:\n\n- Section header: `@@ PATH`\n- Insert after: `+ ANCHOR`\n- Insert before: `< ANCHOR`\n- Delete range: `- A..B`\n- Replace range: `= A..B`\n- Payload line: `~TEXT` by default; separator is `HL_EDIT_SEP` and can be overridden once at process start by `PI_HL_SEP` (`packages/coding-agent/src/hashline/hash.ts`)\n- Special anchors: `BOF`, `EOF`\n- Anchor token: `<line><2-char-hash>`, for example `41th`\n\nAnchors come from `read`/`search` output. `read` formats lines as `LINEhh|TEXT` via `formatHashLine` / `formatHashLines` in `packages/coding-agent/src/hashline/hash.ts`; copy only the token left of `|` into op lines.\n\nOther edit modes exist (`replace`, `patch`, `vim`, `apply_patch`) and are selected outside the tool payload by `resolveEditMode()` in `packages/coding-agent/src/utils/edit-mode.ts`. Their schemas are different; this document covers the default hashline mode.\n\n## Outputs\n- Single-shot tool result; hashline mode does not use a `resolve` preview/apply handshake.\n- `content` contains one text block per call. For a successful single-file edit it is either:\n - `<path>:` plus a compact diff preview from `packages/coding-agent/src/hashline/diff-preview.ts`, or\n - `Updated <path>` / `Created <path>` when no compact preview text is emitted.\n- Parse or recovery warnings are appended as:\n\n```text\nWarnings:\n...\n```\n\n- `details` is `EditToolDetails` from `packages/coding-agent/src/edit/renderer.ts`:\n - `diff`: unified diff string\n - `firstChangedLine`: first changed post-edit line\n - `diagnostics`: LSP/format result if available\n - `op`: `\"create\"` or `\"update\"` for hashline mode\n - `meta`: output metadata\n - `perFileResults`: present for multi-section input\n- Multi-section input returns one aggregated result with combined text and per-file details.\n- While the model is still typing arguments, the TUI can compute a diff preview with `packages/coding-agent/src/edit/streaming.ts`; that preview is not a deferred action and does not block execution.\n\n## Flow\n1. `EditTool.execute()` in `packages/coding-agent/src/edit/index.ts` resolves the active mode. Default is `hashline`; `customFormat` exposes `packages/coding-agent/src/hashline/grammar.lark` with `$HFMT$` / `$HSEP$` placeholders filled from `packages/coding-agent/src/hashline/hash.ts`.\n2. `executeHashlineSingle()` in `packages/coding-agent/src/hashline/execute.ts` splits the raw `input` into `@PATH` sections with `splitHashlineInputs()`.\n3. If multiple sections target the same path, `mergeSamePathSections()` concatenates them before execution so every op still refers to the original file snapshot.\n4. Multi-section calls run a preflight pass (`preflightHashlineSection()`): parse ops, enforce plan-mode write rules, load the current file, reject anchor-scoped edits against missing files, reject auto-generated files, apply edits in memory, and fail if the result is a no-op. This prevents partial batches.\n5. `parseHashlineWithWarnings()` in `packages/coding-agent/src/hashline/parser.ts` tokenizes the diff body:\n - ignores blank lines and optional `*** Begin Patch`\n - stops at `*** End Patch`\n - stops at `*** Abort` and emits `ABORT_WARNING`\n - turns `+` / `<` payload runs into one `insert` edit per payload line\n - turns `- A..B` into one `delete` edit per line in the range\n - turns `= A..B` into inserts before `A`, then deletes for `A..B`; no payload means replace with a single empty line\n6. `applyHashlineEdits()` in `packages/coding-agent/src/hashline/apply.ts` validates every referenced anchor before mutating anything. Each anchor hash is recomputed from current file content with `computeLineHash()`.\n7. If any anchor hash differs, `applyHashlineEdits()` throws `HashlineMismatchError`. `execute.ts` catches only that class and calls `tryRecoverHashlineWithCache()`.\n8. Recovery replays the edits against the most recent cached read/search snapshot for that path (`packages/coding-agent/src/edit/file-read-cache.ts`), then 3-way merges the result onto current disk content using `Diff.applyPatch(..., { fuzzFactor: 3 })` in `packages/coding-agent/src/hashline/recovery.ts`. On success the edit proceeds with a warning; on failure the original mismatch error is re-thrown.\n9. Before splicing lines, `absorbReplacementBoundaryDuplicates()` normalizes some malformed-but-recoverable ranges:\n - duplicate prefix/suffix lines adjacent to a replacement can be absorbed by widening the delete range\n - pure inserts can auto-drop duplicated leading/trailing payload lines when `edit.hashlineAutoDropPureInsertDuplicates` is enabled\n - all such fixes append warnings\n10. `after_anchor` inserts are normalized to `before_anchor` of the next line, or `EOF` if the anchor was the last line.\n11. Anchor-targeted edits are bucketed by target line and applied bottom-up so earlier splices do not invalidate later original line numbers. `BOF` and `EOF` inserts are applied after that.\n12. The edited text is restored to the original BOM and line ending style with helpers from `packages/coding-agent/src/edit/normalize.ts` and persisted via `serializeEditFileText()` in `packages/coding-agent/src/edit/read-file.ts`.\n13. The writethrough callback from `createLspWritethrough()` may format the file and fetch diagnostics. Late diagnostics are queued back into session state as a hidden deferred message by `EditTool.#injectLateDiagnostics()` in `packages/coding-agent/src/edit/index.ts`.\n14. `invalidateFsScanAfterWrite()` calls `invalidateFsScanCache(path)` so filesystem-backed tools do not serve stale scan results.\n15. The session file-read cache is refreshed with the post-edit file text via `recordContiguous()`, making the just-written content the new recovery base for subsequent stale-anchor merges.\n16. The final response is built from a unified diff (`generateDiffString()`), a compact preview, and any accumulated warnings.\n\n## Modes / Variants\n- `hashline` — default mode; line-anchored patch language described here (`packages/coding-agent/src/utils/edit-mode.ts`).\n- `replace` — exact/fuzzy old/new text replacement (`packages/coding-agent/src/edit/modes/replace.ts`).\n- `patch` — structured JSON diff-hunk mode (`packages/coding-agent/src/edit/modes/patch.ts`).\n- `apply_patch` — freeform Codex-style `*** Begin Patch` envelope, internally expanded into patch-mode entries (`packages/coding-agent/src/edit/modes/apply-patch.ts`).\n- `vim` — persistent modal editing buffer (`packages/coding-agent/src/tools/vim.ts`).\n\nHashline op examples:\n\n```text\n@@ src/a.ts\n+ 4fb\n~const added = true;\n```\n\n```text\n@@ src/a.ts\n< 4fb\n~const addedBefore = true;\n```\n\n```text\n@@ src/a.ts\n- 4fb..6qx\n```\n\n```text\n@@ src/a.ts\n= 4fb..5dm\n~const clean = (name || DEF).trim();\n~return clean.length === 0 ? DEF : clean.toUpperCase();\n```\n\nBOF/EOF examples:\n\n```text\n@@ src/a.ts\n+ BOF\n~const HEADER = true;\n```\n\n```text\n@@ src/a.ts\n+ EOF\n~export const done = true;\n```\n\n## Side Effects\n- Filesystem\n - Reads target files with `readEditFileText()`.\n - Writes full updated file contents with `serializeEditFileText()`.\n - Preserves BOM and original line-ending style.\n- Subprocesses / native bindings\n - `createLspWritethrough()` may trigger formatter / diagnostics work through the LSP subsystem.\n - `invalidateFsScanAfterWrite()` calls native `invalidateFsScanCache()` from `@oh-my-pi/pi-natives`.\n- Session state\n - Reads and updates the per-session `FileReadCache` used for stale-anchor recovery.\n - Stores pending deferred-diagnostics abort controllers per path inside `EditTool`.\n - Queues late diagnostics back into the session transcript as a hidden custom message.\n- Background work / cancellation\n - A new edit to the same path aborts the prior deferred diagnostics fetch for that path (`packages/coding-agent/src/edit/index.ts`).\n - The tool itself is marked `nonAbortable = true` and `concurrency = \"exclusive\"` in `packages/coding-agent/src/edit/index.ts`.\n\n## Limits & Caps\n- Default mode is `hashline` (`DEFAULT_EDIT_MODE`) in `packages/coding-agent/src/utils/edit-mode.ts`.\n- Anchor hashes are always 2 lowercase letters from a stable 647-entry bigram table (`HL_BIGRAMS_COUNT`) in `packages/coding-agent/src/hashline/hash.ts`.\n- The visible mismatch report shows 2 lines of context on each side (`MISMATCH_CONTEXT`) in `packages/coding-agent/src/hashline/constants.ts`.\n- Stale-anchor recovery uses `fuzzFactor: 3` (`HASHLINE_RECOVERY_FUZZ_FACTOR`) in `packages/coding-agent/src/hashline/recovery.ts`.\n- The per-session read cache keeps at most 30 paths (`MAX_PATHS_PER_SESSION`) in `packages/coding-agent/src/edit/file-read-cache.ts`.\n- Hashline streaming chunk defaults are 200 lines or 64 KiB per chunk (`packages/coding-agent/src/hashline/types.ts`, consumed by `packages/coding-agent/src/hashline/stream.ts`).\n- `HL_EDIT_SEP` defaults to `~`; `HL_BODY_SEP` is always `|` (`packages/coding-agent/src/hashline/hash.ts`).\n\n## Errors\n- Missing section header:\n - `input must begin with \"@@ PATH\" on the first non-blank line; got: ... Example: \"@@ src/foo.ts\" then edit ops.`\n- Empty header:\n - `Input header \"@\" is empty; provide a file path.`\n- Bad anchor token:\n - `line N: expected a full anchor such as \"119sr\"; got \"...\".`\n- Bad range syntax:\n - `line N: explicit ranges are required for delete/replace...`\n - `line N: range must include exactly two full anchors separated by \"..\".`\n - `line N: range A..B ends before it starts.`\n - `line N: range A..B uses two different hashes for the same line.`\n- Missing payload for `+` / `<`:\n - `line N: + and < operations require at least one ~TEXT payload line.`\n- Stray payload line:\n - `line N: payload line has no preceding +, <, or = operation.`\n- Unknown op:\n - `line N: unrecognized op. Use < ANCHOR..., + ANCHOR..., - A..B..., = A..B...`\n- Missing file for anchor-scoped edits:\n - `File not found: <path>`\n- Out-of-range anchor:\n - `Line N does not exist (file has M lines)`\n- Stale anchors throw `HashlineMismatchError`. The error message contains re-read guidance and reprints nearby current file lines as `LINEhh|TEXT`; mismatched lines are marked `*`. `displayMessage` renders the same information in a code-frame style.\n- No-op edit:\n - `Edits to <path> resulted in no changes being made.`\n- Recovery failure is silent internally: if cache-based merge cannot prove a valid result, the original mismatch error is surfaced unchanged.\n\n## Notes\n- `read` and `search` are the authoritative source of anchors. The edit parser does not want the trailing `|TEXT`; copy only the `LINEhh` token.\n- Multi-op patches are parsed against the original file snapshot. Do not renumber later anchors after earlier ops; `applyHashlineEdits()` buckets and applies them bottom-up.\n- `= A..B` is not a primitive replace in the parser. It expands to inserts before `A` plus deletes for `A..B`, which is why stale-anchor checking still happens on the original range lines.\n- Interior lines of a multi-line range use hash `**` (`RANGE_INTERIOR_HASH`) and are not individually verified; only the first and last anchor hashes are checked.\n- `computeLineHash()` trims trailing whitespace before hashing. Anchors survive line-ending changes and trailing-space-only changes, but not substantive line edits.\n- For punctuation-only lines, the hash mixes in the line number; identical `}` lines on different lines intentionally get different anchors.\n- `splitHashlineInputs()` normalizes absolute `@@ PATH` headers back to a cwd-relative path when the file is inside the current working tree. Headers with any run of leading `@` chars (e.g. `@ foo.ts`, `@@ foo.ts`, `@@@foo.ts`) are accepted to absorb unified-diff-style drift; the canonical form is `@@ PATH`.\n- Optional `*** Begin Patch` / `*** End Patch` markers are accepted in hashline mode, but the file sections are still `@@ PATH`-based, not Codex `*** Update File:` hunks.\n- `*** Abort` terminates parsing early and returns `ABORT_WARNING`; ops parsed before the marker still apply.\n- File-read cache invalidation is conflict-based, not write-through invalidation. If `read` later records content for a line that disagrees with the cached snapshot, the entire snapshot for that path is replaced with the newly observed lines (`packages/coding-agent/src/edit/file-read-cache.ts`).\n- There is no resolve-style apply/discard phase for hashline edits. The only preview path is the transient TUI diff preview in `packages/coding-agent/src/edit/streaming.ts`.\n",
|
|
70
|
-
"tools/eval.md": "# eval\n\n> Execute Python or JavaScript code in persistent cell-based runtimes.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/eval.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/eval.md`\n- Key collaborators:\n - `packages/coding-agent/src/eval/parse.ts` — lenient cell parser\n - `packages/coding-agent/src/eval/sniff.ts` — language sniffing heuristics\n - `packages/coding-agent/src/eval/backend.ts` — backend execution contract\n - `packages/coding-agent/src/eval/js/index.ts` — JS backend adapter\n - `packages/coding-agent/src/eval/js/executor.ts` — JS execution + output sink\n - `packages/coding-agent/src/eval/js/context-manager.ts` — persistent VM contexts, prelude, tool bridge\n - `packages/coding-agent/src/eval/js/prelude.txt` — JS global helpers\n - `packages/coding-agent/src/eval/py/index.ts` — Python backend adapter\n - `packages/coding-agent/src/eval/py/executor.ts` — kernel session retention, reset, cleanup\n - `packages/coding-agent/src/eval/py/kernel.ts` — Jupyter gateway/kernel protocol, display capture\n - `packages/coding-agent/src/eval/py/prelude.py` — Python helper functions and status events\n - `packages/coding-agent/src/session/streaming-output.ts` — truncation, artifacts, streamed chunks\n - `docs/python-repl.md` — Python kernel/gateway internals\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `input` | `string` | Yes | Cell program text. Parsed by `parseEvalInput()` in `packages/coding-agent/src/eval/parse.ts`, not by JSON subfields. |\n\n`input` syntax accepted at runtime:\n\n- Cell header: `*** Cell <attrs...>`. Attributes are space-separated tokens with quoted titles (`\"...\"` or `'...'`).\n- Canonical tokens (advertised in the prompt):\n - `<lang>:\"<title>\"` — language + title shorthand. `lang` is `py` or `js` (lenient: also `ts`, plus the long-form aliases `python`, `javascript`, `typescript`, `ipy`, `ipython`).\n - `t:<n>[ms|s|m]` — per-cell timeout (default 30s).\n - `rst` — wipe this cell's language kernel before running.\n- Lenient additional tokens (accepted by the parser, not advertised):\n - bare language token (`py`, `js`)\n - `id:\"...\"` / `title:\"...\"` / `name:\"...\"` / `cell:\"...\"` / `file:\"...\"` / `label:\"...\"` — title aliases\n - `timeout:` / `duration:` / `time:` — `t:` aliases\n - `reset` — `rst` alias\n - `rst:true|false|1|0|yes|no|on|off` — explicit boolean form\n - a bare positional duration token (`30s`, `2m`, `500ms`)\n - any unclassified bare token folds into a positional title fragment\n- Cell body: every following line until the next `*** Cell ...`, the optional `*** End`, or `*** Abort`. `*** End` is a quirk fix for GPT-trained models that emit terminators and is not documented in the prompt.\n\nLeniencies in `packages/coding-agent/src/eval/parse.ts`:\n\n- Markers accept two or more leading `*` and flexible whitespace.\n- `*** End` is optional everywhere; the parser silently consumes trailing tokens (e.g. `*** End py`).\n- Missing terminators between adjacent cells are tolerated; the next `*** Cell` closes the prior cell, and stray non-marker lines between cells fold into the prior cell's body without crashing.\n- Bare code or a single markdown fence such as ```` ```py ```` is treated as one implicit cell.\n- If `*** Abort` appears, the in-progress cell is dropped and the result carries an abort warning. To preserve a completed cell before `*** Abort`, emit `*** End` first.\n\nThe tool also exposes a custom Lark grammar from `packages/coding-agent/src/eval/eval.lark` for constrained sampling. That grammar is stricter than the runtime parser: it requires the canonical `*** Cell <lang>:\"title\"` header form with a fixed attribute order, advertises only `py` / `js`, and pins the trailing `*** End` so GPT-trained models' natural terminator habit aligns with the constrained output.\n\n## Outputs\n\nFinal result from `EvalTool.execute()` is single-shot, but `onUpdate` streams partial text and `details` while cells run.\n\nReturned shape:\n\n- `content`: one text block containing combined cell output, or `(no text output)` / `(no output)` when only rich outputs exist.\n- `details` (`EvalToolDetails` from `packages/coding-agent/src/eval/types.ts`):\n - `cells`: per-cell code, status (`pending`/`running`/`complete`/`error`), output, duration, exit code, status events, markdown flag\n - `language`: first backend used\n - `languages`: distinct backends used, in first-use order\n - `jsonOutputs`: structured values emitted via `display(...)`\n - `images`: image payloads emitted by Python rich display or JS `display({ type: \"image\", ... })`\n - `statusEvents`: aggregated helper/tool status events\n - `notice`: backend fallback notice\n - `meta`: truncation metadata\n - `isError`: set on cell failure or cancellation\n\nRenderer behavior in `packages/coding-agent/src/tools/eval.ts`:\n\n- call preview renders parsed code cells with syntax highlighting\n- result view renders each cell separately, including status, duration, and output\n- markdown outputs are rendered with the Markdown component instead of plain text\n- `jsonOutputs` render as a tree, collapsed or expanded depending on UI state\n- timeout / fallback / truncation notices render as dim metadata lines\n- images are carried in `details.images`; generic tool UI image handling renders them outside the text block\n\nSide-channel artifacts:\n\n- `session.allocateOutputArtifact?.(\"eval\")` may allocate an `artifact://...` backing store for spilled output.\n- Truncated output metadata points at that artifact when available.\n\n## Flow\n\n1. `EvalTool.execute()` in `packages/coding-agent/src/tools/eval.ts` parses `params.input` with `parseEvalInput()`.\n2. `parseEvalInput()` normalizes newlines, collects cells, parses attributes, and assigns each cell a language from the header, language sniffing, or the default `python`.\n3. Back in `execute()`, each parsed cell is resolved to a backend with `resolveBackend()`:\n - explicit `python`/`js` requests are validated against session settings and backend availability\n - otherwise `sniffEvalLanguage()` in `packages/coding-agent/src/eval/sniff.ts` tries shebangs and language markers\n - if no explicit language was present, later cells prefer the previous runtime language before re-sniffing\n - Python is preferred when available; JS is the fallback when Python is unavailable or disabled\n4. The tool allocates an `OutputSink`, a `TailBuffer`, per-cell result objects, and a `sessionAbortController`. `session.trackEvalExecution?.(...)` can wrap the whole run for external cancellation tracking.\n5. Cells execute sequentially. For each cell, `execute()`:\n - clamps the cell timeout through `clampTimeout(\"eval\", ...)`\n - builds a combined abort signal from the tool signal, the timeout, and the session abort controller\n - marks the cell `running` and emits an update\n - calls the backend’s `execute()` with `cwd`, `sessionId`, `sessionFile`, `kernelOwnerId`, `deadlineMs`, `reset`, artifact info, and chunk callback\n6. JS cells dispatch through `packages/coding-agent/src/eval/js/index.ts` into `executeJs()`; Python cells dispatch through `packages/coding-agent/src/eval/py/index.ts` into `executePython()`.\n7. Backend text chunks stream into the shared `OutputSink`; rich outputs are accumulated separately as JSON, images, markdown markers, and status events.\n8. After each cell:\n - text output is trimmed and stored on that cell result\n - multi-cell runs prefix text with `[i/n]` and the optional title\n - cancellations return early with `isError: true` and a cell-specific abort message\n - non-zero exit codes return early with `isError: true` and a message naming the failed cell\n - later cells are skipped after the first error, but earlier cell state persists in the underlying runtime\n9. On success, the tool joins all cell outputs, synthesizes `(no text output)` or `(no output)` when needed, and attaches truncation metadata from `summarizeFinal()`.\n10. The renderer uses `details.cells`, `details.jsonOutputs`, and `details.statusEvents` to build notebook-style output. `mergeCallAndResult = true` and `inline = true`, so call and result render together in the transcript.\n\n## Modes / Variants\n\n### Parsing modes\n\n- Explicit multi-cell format with `*** Cell ...` headers\n- Implicit single-cell fallback for bare code or a single fenced block\n- Abort-recovery parse path when `*** Abort` is present\n\n### Backend selection\n\n- Explicit Python backend\n- Explicit JavaScript backend\n- Auto-detected backend via `sniffEvalLanguage()`\n- Fallback from requested/inferred Python to JS when Python is unavailable\n- Fallback notice when JS markers are seen but `eval.js` is disabled and Python is used instead\n\n### JavaScript runtime\n\nImplemented in `packages/coding-agent/src/eval/js/context-manager.ts` and `packages/coding-agent/src/eval/js/prelude.txt`.\n\n- Persistent `vm.Context` instances keyed by `js:${sessionId}` in `vmContexts`\n- `rst` calls `resetVmContext(sessionKey)` before the cell executes\n- Top-level `await` and bare `return` are supported by wrapping code in an async IIFE when `wrapCode()` sees `await` or `return`\n- Top-level static `import ... from ...` and dynamic `import(...)` calls are routed through `rewriteImports()`, which sends them via `__omp_import__` so the specifier resolves against the session cwd\n- The prelude installs globals:\n - `display`, `print`\n - `read`, `write`, `append`, `sort`, `uniq`, `counter`, `diff`, `tree`, `env`, `output`\n - `tool.<name>(args)` proxy for arbitrary session tool calls\n- JS helpers are async because they cross the VM/tool boundary\n- `display(value)` behavior:\n - plain objects/arrays become JSON outputs\n - `{ type: \"image\", data, mimeType }` becomes an image output\n - scalars become text\n- The VM exposes a restricted `process` subset plus `Buffer`, `fetch`, `Blob`, `File`, `Headers`, `Request`, `Response`, `fs`, `require`, and browser-style globals\n- Per-session VM runs are serialized with `runQueued()`\n\n### Python runtime\n\nImplemented in `packages/coding-agent/src/eval/py/executor.ts`, `packages/coding-agent/src/eval/py/kernel.ts`, and `packages/coding-agent/src/eval/py/prelude.py`. See `docs/python-repl.md` for gateway and kernel details.\n\n- Default mode is retained `session` kernels keyed by `python:${sessionId}`\n- Optional `python.kernelMode = \"per-call\"` creates a fresh kernel for each cell and shuts it down afterward\n- `rst` disposes the retained kernel for that session before the cell runs; later Python cells in the same tool call reuse the fresh kernel\n- Startup path:\n - availability check\n - create/connect kernel\n - initialize cwd / env / `sys.path`\n - execute `PYTHON_PRELUDE`\n- Python cells run inside IPython/Jupyter, so top-level `await` works; the prompt warns not to use `asyncio.run(...)`\n- The Python prelude defines synchronous helpers with the same surface as JS (except `tool.<name>` exists only in JS)\n- `display(value)` wraps dict/list/tuple values in `IPython.display.JSON`; rich display MIME bundles are preserved\n- Kernel `display_data` / `execute_result` messages map to:\n - `application/x-omp-status` → status event\n - `image/png` → image output\n - `application/json` → JSON output\n - `text/markdown` → markdown output\n - `text/plain` → text output\n - `text/html` → HTML converted to markdown with `htmlToBasicMarkdown()`\n- Interactive stdin is rejected: `input_request` sends an empty reply, marks `stdinRequested`, and the executor returns exit code `1`\n\n### Multi-language call behavior\n\nA single tool call can mix Python and JS cells. Persistence is per language runtime:\n\n- resetting Python does not touch JS state\n- resetting JS does not touch Python state\n- each backend keeps its own retained session keyed from the same session-derived ID\n\n## Side Effects\n\n- Filesystem\n - JS/Python prelude helpers can read, write, append, diff, and traverse files under the session cwd or absolute paths.\n - Output may spill to an artifact file via `OutputSink`.\n- Network\n - Python backend speaks NDJSON to a local `python3` subprocess over stdin/stdout (no network).\n - JS runtime exposes `fetch` and `tool.<name>()`; those tools may perform additional network I/O.\n- Subprocesses / native bindings\n - Python availability check runs `<python> -c ...`.\n - Python backend spawns one `python -u runner.py` subprocess per kernel; cancellation sends `SIGINT`. Details in `docs/python-repl.md`.\n- Session state\n - `session.assertEvalExecutionAllowed?.()` can block execution.\n - `session.trackEvalExecution?.(...)` can register cancellable eval work.\n - `session.getSessionFile?.()` and `session.getEvalKernelOwnerId?.()` influence kernel reuse and artifact lookup.\n - JS VM contexts persist in `vmContexts` across eval calls until reset/disposal.\n - Python retained kernels persist in `kernelSessions` until reset, eviction, idle cleanup, or owner cleanup.\n- User-visible prompts / interactive UI\n - none; stdin requests are rejected programmatically\n- Background work / cancellation\n - Python retained kernels have heartbeat and idle cleanup timers.\n - Cancellation interrupts a running Python kernel and aborts JS promise waits.\n\n## Limits & Caps\n\n- Per-cell timeout default: 30s (`DEFAULT_TIMEOUT_MS` in `packages/coding-agent/src/eval/parse.ts`; `TOOL_TIMEOUTS.eval.default` in `packages/coding-agent/src/tools/tool-timeouts.ts`)\n- Timeout clamp: 1s minimum, 600s maximum (`TOOL_TIMEOUTS.eval` in `packages/coding-agent/src/tools/tool-timeouts.ts`)\n- Transcript code/output preview: 10 lines by default (`EVAL_DEFAULT_PREVIEW_LINES` in `packages/coding-agent/src/tools/eval.ts`)\n- Output truncation window: 50KB default (`DEFAULT_MAX_BYTES` in `packages/coding-agent/src/session/streaming-output.ts`)\n- Output line cap inside truncation helpers: 3000 lines (`DEFAULT_MAX_LINES` in `packages/coding-agent/src/session/streaming-output.ts`)\n- Streaming tail buffer for live updates: `DEFAULT_MAX_BYTES * 2` = 100KB (`packages/coding-agent/src/tools/eval.ts`)\n- Python retained kernel idle timeout: 5 minutes (`IDLE_TIMEOUT_MS` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python retained kernel cap: 4 sessions (`MAX_KERNEL_SESSIONS` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python retained kernel cleanup sweep: every 30s (`CLEANUP_INTERVAL_MS` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python owner-cleanup shutdown wait: 2000ms (`OWNER_CLEANUP_KERNEL_SHUTDOWN_TIMEOUT_MS` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python heartbeat interval: 5s (`ensureKernelHeartbeat()` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python external gateway availability check timeout: 5s (`AbortSignal.timeout(5000)` in `packages/coding-agent/src/eval/py/kernel.ts`)\n- Python auto-restart budget: one restart per retained session before hard failure (`restartCount > 1` in `packages/coding-agent/src/eval/py/executor.ts`)\n\n## Errors\n\n- Parse errors from `parseEvalInput()` throw immediately, for example invalid timeout strings.\n- Missing session without proxy executor throws `ToolError(\"Eval tool requires a session when not using proxy executor\")`.\n- Disabled/unavailable backends throw `ToolError` from `resolveBackend()`:\n - `eval.py = false`\n - `eval.js = false`\n - Python kernel unavailable\n - no backend available\n- JS runtime exceptions are converted into text output plus `exitCode: 1`; cancellations return `cancelled: true` and may append `Command timed out`.\n- Python execution errors from the kernel become text output and `exitCode: 1`; later cells are skipped.\n- Python stdin requests are treated as errors with the message `Kernel requested stdin; interactive input is not supported.`\n- Cancellation is returned, not thrown, once backend execution has started. The tool formats it as a cell failure and sets `details.isError = true`.\n- If parsing encountered `*** Abort`, the final text appends `ABORT_WARNING`, explicitly telling the model that earlier cells ran and state persists.\n- If output truncates, the tool still succeeds; truncation is surfaced through `details.meta` and artifact-backed full output when available.\n\n## Notes\n\n- The runtime parser is intentionally more permissive than `packages/coding-agent/src/eval/eval.lark`; maintain both when changing syntax.\n- Cell language in `ParsedEvalCell` is not the last word: `EvalTool.execute()` may override backend selection for cells without an explicit header by inheriting the previous runtime language.\n- `tool.<name>()` exists only in JS. Python prelude helpers do not call back into the full tool registry.\n- JS helper paths reject protocol URIs (`://`) in `resolvePath()`; the JS prelude is filesystem-only unless the code calls `tool.read(...)` or another tool explicitly.\n- Python helper `output(...)` depends on `PI_SESSION_FILE`; it fails outside a session-backed run.\n- `display()` can produce text and structured outputs from the same value; the renderer prefers markdown over `text/plain` when both exist.\n- JS static imports are rewritten only at top level. Nested imports stay invalid and surface normal JS syntax/runtime errors.\n- `EvalTool` is `concurrency = \"exclusive\"`, so eval calls do not overlap within a session.\n- The tool description shown to the model is templated by backend availability (`getEvalToolDescription()`); if Python is unavailable, the prompt omits Python-specific instructions.\n",
|
|
70
|
+
"tools/eval.md": "# eval\n\n> Execute Python or JavaScript code in persistent cell-based runtimes.\n\n> **Notice:** Do not shell out to `python -c`/`python -e`, `bun -e`, or `node -e` via the `bash` tool for ad-hoc code execution. Use this tool instead — it gives you persistent state across cells, structured `display()` output, image/JSON capture, and proper cancellation/timeout handling that one-shot `-e`/`-c` invocations cannot provide.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/eval.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/eval.md`\n- Key collaborators:\n - `packages/coding-agent/src/eval/parse.ts` — lenient cell parser\n - `packages/coding-agent/src/eval/sniff.ts` — language sniffing heuristics\n - `packages/coding-agent/src/eval/backend.ts` — backend execution contract\n - `packages/coding-agent/src/eval/js/index.ts` — JS backend adapter\n - `packages/coding-agent/src/eval/js/executor.ts` — JS execution + output sink\n - `packages/coding-agent/src/eval/js/context-manager.ts` — persistent VM contexts, prelude, tool bridge\n - `packages/coding-agent/src/eval/js/prelude.txt` — JS global helpers\n - `packages/coding-agent/src/eval/py/index.ts` — Python backend adapter\n - `packages/coding-agent/src/eval/py/executor.ts` — kernel session retention, reset, cleanup\n - `packages/coding-agent/src/eval/py/kernel.ts` — Jupyter gateway/kernel protocol, display capture\n - `packages/coding-agent/src/eval/py/prelude.py` — Python helper functions and status events\n - `packages/coding-agent/src/session/streaming-output.ts` — truncation, artifacts, streamed chunks\n - `docs/python-repl.md` — Python kernel/gateway internals\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `input` | `string` | Yes | Cell program text. Parsed by `parseEvalInput()` in `packages/coding-agent/src/eval/parse.ts`, not by JSON subfields. |\n\n`input` syntax accepted at runtime:\n\n- Cell header: `*** Cell <attrs...>`. Attributes are space-separated tokens with quoted titles (`\"...\"` or `'...'`).\n- Canonical tokens (advertised in the prompt):\n - `<lang>:\"<title>\"` — language + title shorthand. `lang` is `py` or `js` (lenient: also `ts`, plus the long-form aliases `python`, `javascript`, `typescript`, `ipy`, `ipython`).\n - `t:<n>[ms|s|m]` — per-cell timeout (default 30s).\n - `rst` — wipe this cell's language kernel before running.\n- Lenient additional tokens (accepted by the parser, not advertised):\n - bare language token (`py`, `js`)\n - `id:\"...\"` / `title:\"...\"` / `name:\"...\"` / `cell:\"...\"` / `file:\"...\"` / `label:\"...\"` — title aliases\n - `timeout:` / `duration:` / `time:` — `t:` aliases\n - `reset` — `rst` alias\n - `rst:true|false|1|0|yes|no|on|off` — explicit boolean form\n - a bare positional duration token (`30s`, `2m`, `500ms`)\n - any unclassified bare token folds into a positional title fragment\n- Cell body: every following line until the next `*** Cell ...`, the optional `*** End`, or `*** Abort`. `*** End` is a quirk fix for GPT-trained models that emit terminators and is not documented in the prompt.\n\nLeniencies in `packages/coding-agent/src/eval/parse.ts`:\n\n- Markers accept two or more leading `*` and flexible whitespace.\n- `*** End` is optional everywhere; the parser silently consumes trailing tokens (e.g. `*** End py`).\n- Missing terminators between adjacent cells are tolerated; the next `*** Cell` closes the prior cell, and stray non-marker lines between cells fold into the prior cell's body without crashing.\n- Bare code or a single markdown fence such as ```` ```py ```` is treated as one implicit cell.\n- If `*** Abort` appears, the in-progress cell is dropped and the result carries an abort warning. To preserve a completed cell before `*** Abort`, emit `*** End` first.\n\nThe tool also exposes a custom Lark grammar from `packages/coding-agent/src/eval/eval.lark` for constrained sampling. That grammar is stricter than the runtime parser: it requires the canonical `*** Cell <lang>:\"title\"` header form with a fixed attribute order, advertises only `py` / `js`, and pins the trailing `*** End` so GPT-trained models' natural terminator habit aligns with the constrained output.\n\n## Outputs\n\nFinal result from `EvalTool.execute()` is single-shot, but `onUpdate` streams partial text and `details` while cells run.\n\nReturned shape:\n\n- `content`: one text block containing combined cell output, or `(no text output)` / `(no output)` when only rich outputs exist.\n- `details` (`EvalToolDetails` from `packages/coding-agent/src/eval/types.ts`):\n - `cells`: per-cell code, status (`pending`/`running`/`complete`/`error`), output, duration, exit code, status events, markdown flag\n - `language`: first backend used\n - `languages`: distinct backends used, in first-use order\n - `jsonOutputs`: structured values emitted via `display(...)`\n - `images`: image payloads emitted by Python rich display or JS `display({ type: \"image\", ... })`\n - `statusEvents`: aggregated helper/tool status events\n - `notice`: backend fallback notice\n - `meta`: truncation metadata\n - `isError`: set on cell failure or cancellation\n\nRenderer behavior in `packages/coding-agent/src/tools/eval.ts`:\n\n- call preview renders parsed code cells with syntax highlighting\n- result view renders each cell separately, including status, duration, and output\n- markdown outputs are rendered with the Markdown component instead of plain text\n- `jsonOutputs` render as a tree, collapsed or expanded depending on UI state\n- timeout / fallback / truncation notices render as dim metadata lines\n- images are carried in `details.images`; generic tool UI image handling renders them outside the text block\n\nSide-channel artifacts:\n\n- `session.allocateOutputArtifact?.(\"eval\")` may allocate an `artifact://...` backing store for spilled output.\n- Truncated output metadata points at that artifact when available.\n\n## Flow\n\n1. `EvalTool.execute()` in `packages/coding-agent/src/tools/eval.ts` parses `params.input` with `parseEvalInput()`.\n2. `parseEvalInput()` normalizes newlines, collects cells, parses attributes, and assigns each cell a language from the header, language sniffing, or the default `python`.\n3. Back in `execute()`, each parsed cell is resolved to a backend with `resolveBackend()`:\n - explicit `python`/`js` requests are validated against session settings and backend availability\n - otherwise `sniffEvalLanguage()` in `packages/coding-agent/src/eval/sniff.ts` tries shebangs and language markers\n - if no explicit language was present, later cells prefer the previous runtime language before re-sniffing\n - Python is preferred when available; JS is the fallback when Python is unavailable or disabled\n4. The tool allocates an `OutputSink`, a `TailBuffer`, per-cell result objects, and a `sessionAbortController`. `session.trackEvalExecution?.(...)` can wrap the whole run for external cancellation tracking.\n5. Cells execute sequentially. For each cell, `execute()`:\n - clamps the cell timeout through `clampTimeout(\"eval\", ...)`\n - builds a combined abort signal from the tool signal, the timeout, and the session abort controller\n - marks the cell `running` and emits an update\n - calls the backend’s `execute()` with `cwd`, `sessionId`, `sessionFile`, `kernelOwnerId`, `deadlineMs`, `reset`, artifact info, and chunk callback\n6. JS cells dispatch through `packages/coding-agent/src/eval/js/index.ts` into `executeJs()`; Python cells dispatch through `packages/coding-agent/src/eval/py/index.ts` into `executePython()`.\n7. Backend text chunks stream into the shared `OutputSink`; rich outputs are accumulated separately as JSON, images, markdown markers, and status events.\n8. After each cell:\n - text output is trimmed and stored on that cell result\n - multi-cell runs prefix text with `[i/n]` and the optional title\n - cancellations return early with `isError: true` and a cell-specific abort message\n - non-zero exit codes return early with `isError: true` and a message naming the failed cell\n - later cells are skipped after the first error, but earlier cell state persists in the underlying runtime\n9. On success, the tool joins all cell outputs, synthesizes `(no text output)` or `(no output)` when needed, and attaches truncation metadata from `summarizeFinal()`.\n10. The renderer uses `details.cells`, `details.jsonOutputs`, and `details.statusEvents` to build notebook-style output. `mergeCallAndResult = true` and `inline = true`, so call and result render together in the transcript.\n\n## Modes / Variants\n\n### Parsing modes\n\n- Explicit multi-cell format with `*** Cell ...` headers\n- Implicit single-cell fallback for bare code or a single fenced block\n- Abort-recovery parse path when `*** Abort` is present\n\n### Backend selection\n\n- Explicit Python backend\n- Explicit JavaScript backend\n- Auto-detected backend via `sniffEvalLanguage()`\n- Fallback from requested/inferred Python to JS when Python is unavailable\n- Fallback notice when JS markers are seen but `eval.js` is disabled and Python is used instead\n\n### JavaScript runtime\n\nImplemented in `packages/coding-agent/src/eval/js/context-manager.ts` and `packages/coding-agent/src/eval/js/prelude.txt`.\n\n- Persistent `vm.Context` instances keyed by `js:${sessionId}` in `vmContexts`\n- `rst` calls `resetVmContext(sessionKey)` before the cell executes\n- Top-level `await` and bare `return` are supported by wrapping code in an async IIFE when `wrapCode()` sees `await` or `return`\n- Top-level static `import ... from ...` and dynamic `import(...)` calls are routed through `rewriteImports()`, which sends them via `__omp_import__` so the specifier resolves against the session cwd\n- The prelude installs globals:\n - `display`, `print`\n - `read`, `write`, `append`, `sort`, `uniq`, `counter`, `diff`, `tree`, `env`, `output`\n - `tool.<name>(args)` proxy for arbitrary session tool calls\n- JS helpers are async because they cross the VM/tool boundary\n- `display(value)` behavior:\n - plain objects/arrays become JSON outputs\n - `{ type: \"image\", data, mimeType }` becomes an image output\n - scalars become text\n- The VM exposes a restricted `process` subset plus `Buffer`, `fetch`, `Blob`, `File`, `Headers`, `Request`, `Response`, `fs`, `require`, and browser-style globals\n- Per-session VM runs are serialized with `runQueued()`\n\n### Python runtime\n\nImplemented in `packages/coding-agent/src/eval/py/executor.ts`, `packages/coding-agent/src/eval/py/kernel.ts`, and `packages/coding-agent/src/eval/py/prelude.py`. See `docs/python-repl.md` for gateway and kernel details.\n\n- Default mode is retained `session` kernels keyed by `python:${sessionId}`\n- Optional `python.kernelMode = \"per-call\"` creates a fresh kernel for each cell and shuts it down afterward\n- `rst` disposes the retained kernel for that session before the cell runs; later Python cells in the same tool call reuse the fresh kernel\n- Startup path:\n - availability check\n - create/connect kernel\n - initialize cwd / env / `sys.path`\n - execute `PYTHON_PRELUDE`\n- Python cells run inside IPython/Jupyter, so top-level `await` works; the prompt warns not to use `asyncio.run(...)`\n- The Python prelude defines synchronous helpers with the same surface as JS (except `tool.<name>` exists only in JS)\n- `display(value)` wraps dict/list/tuple values in `IPython.display.JSON`; rich display MIME bundles are preserved\n- Kernel `display_data` / `execute_result` messages map to:\n - `application/x-omp-status` → status event\n - `image/png` → image output\n - `application/json` → JSON output\n - `text/markdown` → markdown output\n - `text/plain` → text output\n - `text/html` → HTML converted to markdown with `htmlToBasicMarkdown()`\n- Interactive stdin is rejected: `input_request` sends an empty reply, marks `stdinRequested`, and the executor returns exit code `1`\n\n### Multi-language call behavior\n\nA single tool call can mix Python and JS cells. Persistence is per language runtime:\n\n- resetting Python does not touch JS state\n- resetting JS does not touch Python state\n- each backend keeps its own retained session keyed from the same session-derived ID\n\n## Side Effects\n\n- Filesystem\n - JS/Python prelude helpers can read, write, append, diff, and traverse files under the session cwd or absolute paths.\n - Output may spill to an artifact file via `OutputSink`.\n- Network\n - Python backend speaks NDJSON to a local `python3` subprocess over stdin/stdout (no network).\n - JS runtime exposes `fetch` and `tool.<name>()`; those tools may perform additional network I/O.\n- Subprocesses / native bindings\n - Python availability check runs `<python> -c ...`.\n - Python backend spawns one `python -u runner.py` subprocess per kernel; cancellation sends `SIGINT`. Details in `docs/python-repl.md`.\n- Session state\n - `session.assertEvalExecutionAllowed?.()` can block execution.\n - `session.trackEvalExecution?.(...)` can register cancellable eval work.\n - `session.getSessionFile?.()` and `session.getEvalKernelOwnerId?.()` influence kernel reuse and artifact lookup.\n - JS VM contexts persist in `vmContexts` across eval calls until reset/disposal.\n - Python retained kernels persist in `kernelSessions` until reset, eviction, idle cleanup, or owner cleanup.\n- User-visible prompts / interactive UI\n - none; stdin requests are rejected programmatically\n- Background work / cancellation\n - Python retained kernels have heartbeat and idle cleanup timers.\n - Cancellation interrupts a running Python kernel and aborts JS promise waits.\n\n## Limits & Caps\n\n- Per-cell timeout default: 30s (`DEFAULT_TIMEOUT_MS` in `packages/coding-agent/src/eval/parse.ts`; `TOOL_TIMEOUTS.eval.default` in `packages/coding-agent/src/tools/tool-timeouts.ts`)\n- Timeout clamp: 1s minimum, 600s maximum (`TOOL_TIMEOUTS.eval` in `packages/coding-agent/src/tools/tool-timeouts.ts`)\n- Transcript code/output preview: 10 lines by default (`EVAL_DEFAULT_PREVIEW_LINES` in `packages/coding-agent/src/tools/eval.ts`)\n- Output truncation window: 50KB default (`DEFAULT_MAX_BYTES` in `packages/coding-agent/src/session/streaming-output.ts`)\n- Output line cap inside truncation helpers: 3000 lines (`DEFAULT_MAX_LINES` in `packages/coding-agent/src/session/streaming-output.ts`)\n- Streaming tail buffer for live updates: `DEFAULT_MAX_BYTES * 2` = 100KB (`packages/coding-agent/src/tools/eval.ts`)\n- Python retained kernel idle timeout: 5 minutes (`IDLE_TIMEOUT_MS` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python retained kernel cap: 4 sessions (`MAX_KERNEL_SESSIONS` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python retained kernel cleanup sweep: every 30s (`CLEANUP_INTERVAL_MS` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python owner-cleanup shutdown wait: 2000ms (`OWNER_CLEANUP_KERNEL_SHUTDOWN_TIMEOUT_MS` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python heartbeat interval: 5s (`ensureKernelHeartbeat()` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python external gateway availability check timeout: 5s (`AbortSignal.timeout(5000)` in `packages/coding-agent/src/eval/py/kernel.ts`)\n- Python auto-restart budget: one restart per retained session before hard failure (`restartCount > 1` in `packages/coding-agent/src/eval/py/executor.ts`)\n\n## Errors\n\n- Parse errors from `parseEvalInput()` throw immediately, for example invalid timeout strings.\n- Missing session without proxy executor throws `ToolError(\"Eval tool requires a session when not using proxy executor\")`.\n- Disabled/unavailable backends throw `ToolError` from `resolveBackend()`:\n - `eval.py = false`\n - `eval.js = false`\n - Python kernel unavailable\n - no backend available\n- JS runtime exceptions are converted into text output plus `exitCode: 1`; cancellations return `cancelled: true` and may append `Command timed out`.\n- Python execution errors from the kernel become text output and `exitCode: 1`; later cells are skipped.\n- Python stdin requests are treated as errors with the message `Kernel requested stdin; interactive input is not supported.`\n- Cancellation is returned, not thrown, once backend execution has started. The tool formats it as a cell failure and sets `details.isError = true`.\n- If parsing encountered `*** Abort`, the final text appends `ABORT_WARNING`, explicitly telling the model that earlier cells ran and state persists.\n- If output truncates, the tool still succeeds; truncation is surfaced through `details.meta` and artifact-backed full output when available.\n\n## Notes\n\n- The runtime parser is intentionally more permissive than `packages/coding-agent/src/eval/eval.lark`; maintain both when changing syntax.\n- Cell language in `ParsedEvalCell` is not the last word: `EvalTool.execute()` may override backend selection for cells without an explicit header by inheriting the previous runtime language.\n- `tool.<name>()` exists only in JS. Python prelude helpers do not call back into the full tool registry.\n- JS helper paths reject protocol URIs (`://`) in `resolvePath()`; the JS prelude is filesystem-only unless the code calls `tool.read(...)` or another tool explicitly.\n- Python helper `output(...)` depends on `PI_SESSION_FILE`; it fails outside a session-backed run.\n- `display()` can produce text and structured outputs from the same value; the renderer prefers markdown over `text/plain` when both exist.\n- JS static imports are rewritten only at top level. Nested imports stay invalid and surface normal JS syntax/runtime errors.\n- `EvalTool` is `concurrency = \"exclusive\"`, so eval calls do not overlap within a session.\n- The tool description shown to the model is templated by backend availability (`getEvalToolDescription()`); if Python is unavailable, the prompt omits Python-specific instructions.\n",
|
|
71
71
|
"tools/find.md": "# find\n\n> Find filesystem paths by glob; use `search` when you need content matches instead of path matches.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/find.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/find.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/path-utils.ts` — normalize inputs; split base path vs glob.\n - `packages/coding-agent/src/tools/list-limit.ts` — apply result-count caps.\n - `packages/coding-agent/src/session/streaming-output.ts` — truncate text output at byte cap.\n - `packages/coding-agent/src/tools/tool-result.ts` — build `content` and `details.meta`.\n - `packages/coding-agent/src/tools/output-meta.ts` — encode limit / truncation metadata.\n - `packages/coding-agent/src/tools/tool-errors.ts` — map user-facing tool errors.\n - `packages/coding-agent/src/tools/index.ts` — register the built-in local implementation.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `paths` | `string[]` | Yes | One or more globs, files, or directories. Empty strings are rejected. Multiple entries may be merged into one brace-union search when their base paths can be resolved together. |\n| `hidden` | `boolean` | No | Whether hidden files are included. Defaults to `true` (`hidden ?? true`). |\n| `limit` | `number` | No | Max returned paths. Defaults to `1000`. Must be a finite positive number; non-integers are floored. |\n\n## Outputs\nThe tool returns a single text block plus structured `details`.\n\n- Success text: newline-delimited paths, one per line, relative to the session cwd when possible; absolute when outside cwd. Exact file inputs return that file path as one line.\n- Empty result text: `No files found matching pattern`.\n- Multi-path partial miss: appends `Skipped missing paths: ...` after the result block, or after the empty-result line.\n- `details` may include:\n - `scopePath`: display form of the searched root or merged roots.\n - `fileCount`: number of paths returned after result limiting.\n - `files`: returned paths as an array.\n - `truncated`: whether result count or byte truncation occurred.\n - `resultLimitReached`: reached result limit.\n - `missingPaths`: skipped missing inputs in multi-path calls.\n - `truncation` / `meta.limits`: structured truncation and limit metadata for renderers.\n- Streaming: when the runtime supplies `onUpdate`, the local implementation emits incremental newline-delimited text snapshots during globbing, throttled to 200 ms.\n\n## Flow\n1. `FindTool.execute()` normalizes each `paths` entry with `normalizePathLikeInput()` and `/\\\\/g -> \"/\"` (`packages/coding-agent/src/tools/find.ts`). Empty normalized entries fail with `` `paths` must contain non-empty globs or paths ``.\n2. For multi-path local calls, `partitionExistingPaths(..., parseFindPattern)` (`packages/coding-agent/src/tools/path-utils.ts`) stats each base path. Missing entries are skipped; if all are missing, the tool throws `Path not found: ...`. Single missing paths still hard-fail.\n3. The tool tries `resolveExplicitFindPatterns()` to merge multiple inputs into one search rooted at a common base path. If that does not apply, it parses one input with `parseFindPattern()`.\n4. `parseFindPattern()` determines `(basePath, globPattern, hasGlob)`:\n - no glob chars (`*`, `?`, `[`, `{`) => search that path with implicit `**/*`.\n - glob in the first segment => search from `.` and, unless the pattern already starts with `**/`, prefix it with `**/`.\n - glob later in the path => split at the first glob-bearing segment.\n5. `resolveToCwd()` converts the base path to an absolute path under the session cwd. A resolved `/` is rejected with `Searching from root directory '/' is not allowed`.\n6. `limit` is defaulted to `DEFAULT_LIMIT` (`1000`) and validated as a positive finite integer. `hidden` defaults to `true`. The tool also creates a 5 s timeout via `AbortSignal.timeout(GLOB_TIMEOUT_MS)`.\n7. Execution then branches:\n - **Custom operations branch**: if `FindToolOptions.operations.glob` exists, the tool checks existence with `operations.exists()`, short-circuits exact-file inputs via `operations.stat()` when available, then calls `operations.glob(globPattern, searchPath, { ignore: [\"**/node_modules/**\", \"**/.git/**\"], limit })`.\n - **Built-in local branch**: the tool stats `searchPath`. Exact-file inputs return immediately. Directory inputs call `natives.glob()` with `fileType: File`, `hidden`, `maxResults: limit`, `sortByMtime: true`, `gitignore: true`, and the combined abort signal.\n8. In the local branch, optional `onMatch` callbacks convert each match to a cwd-relative display path and emit throttled progress updates.\n9. After native glob returns, JS sorts `result.matches` by `mtime` descending (`(b.mtime ?? 0) - (a.mtime ?? 0)`) before formatting paths.\n10. `buildResult()` applies `applyListLimit()` to cap the array again at `limit`, joins paths with `\\n`, then runs `truncateHead()` with `maxLines: Number.MAX_SAFE_INTEGER`. In practice this leaves the 50 KB byte cap in place while disabling the default 3000-line cap.\n11. `toolResult()` packages text plus `details`, and records result-limit / truncation metadata for renderers.\n\n## Modes / Variants\n- **Exact file path**: if the parsed input has no glob and the resolved path stats as a file, output is that one path.\n- **Directory path**: if the parsed input has no glob and stats as a directory, the tool searches it with implicit `**/*`.\n- **Single glob path**: one input parsed by `parseFindPattern()`.\n- **Merged multi-path search**: multiple inputs resolved by `resolveExplicitFindPatterns()` into one brace-union glob rooted at a common base path.\n- **Partial multi-path search with missing inputs**: local multi-path calls skip missing base paths and surface them as `missingPaths` / `Skipped missing paths: ...`.\n- **Custom delegated search**: uses injected `FindOperations` instead of local fs + native glob.\n\n## Side Effects\n- Filesystem\n - Stats the resolved base path, and in local multi-path mode stats every candidate base path up front.\n - Does not write files.\n- Subprocesses / native bindings\n - Built-in local mode calls the native `@oh-my-pi/pi-natives` glob implementation.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Emits structured progress updates when `onUpdate` is provided.\n - Adds truncation / limit metadata to the tool result.\n- Background work / cancellation\n - Local globbing is cancellable through the caller abort signal plus an internal 5 s timeout.\n\n## Limits & Caps\n- Default result limit: `1000` (`DEFAULT_LIMIT` in `packages/coding-agent/src/tools/find.ts`).\n- Local glob timeout: `5000` ms (`GLOB_TIMEOUT_MS` in `packages/coding-agent/src/tools/find.ts`).\n- Output byte cap: `50 * 1024` bytes (`DEFAULT_MAX_BYTES` in `packages/coding-agent/src/session/streaming-output.ts`).\n- Default generic line cap in `truncateHead()` is `3000`, but `find` overrides `maxLines` to `Number.MAX_SAFE_INTEGER`, so byte size — not line count — is the practical output truncation cap.\n- Streaming update throttle: `200` ms between `onUpdate` emissions.\n- Sort order: most recent `mtime` first in the built-in local branch and promised in the prompt. The tool re-sorts in JS even though native glob receives `sortByMtime: true` so native code can still stop early at `maxResults`.\n\n## Errors\n- User-facing `ToolError`s from `FindTool.execute()` include:\n - `` `paths` must contain non-empty globs or paths ``\n - `Path not found: ...`\n - `Searching from root directory '/' is not allowed`\n - `Limit must be a positive number`\n - `Path is not a directory: ...`\n - `find timed out after 5s`\n- If the caller aborts, the local branch converts `AbortError` into `ToolAbortError`.\n- Non-`ENOENT` stat failures and other unexpected errors are rethrown.\n- Empty matches are not errors; they return the no-files text result.\n\n## Notes\n- Reach for `find` for filename / path discovery. Reach for `search` when the selection criterion is file contents or regex matches; `search` takes a `pattern` and returns anchored content matches, while `find` only returns matching paths (`packages/coding-agent/src/prompts/tools/find.md`, `packages/coding-agent/src/prompts/tools/search.md`).\n- Bare top-level globs are made recursive. `*.ts` is parsed as base `.` plus glob `**/*.ts`; `src/*.ts` stays rooted at `src` with a non-recursive `*.ts` segment; `src/**/*.ts` preserves explicit recursion.\n- `.gitignore` is always enabled in the built-in local branch (`gitignore: true`). There is no model-facing flag to disable it.\n- `hidden` defaults to `true`; hidden-file exclusion is opt-out, not opt-in.\n- Multi-path missing-input tolerance only applies in the built-in local branch. The custom-operations branch hard-fails the first missing `searchPath` it checks.\n- The custom `FindOperations.glob()` hook receives `ignore` and `limit`, but not the `hidden` flag or an explicit `.gitignore` toggle. A remote delegate must account for that itself if it wants parity with the local branch.\n- Built-in local globbing asks the native layer for `fileType: File`, so recursive directory searches yield files, not directories. Directory outputs are only possible through exact-path passthrough or custom delegates that return them.\n",
|
|
72
|
-
"tools/github.md": "# github\n\n> Dispatch GitHub CLI operations for repositories, issues, pull requests, search, and Actions run watching.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/gh.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/github.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/gh-format.ts` — shorten commit SHAs for summaries.\n - `packages/coding-agent/src/tools/gh-renderer.ts` — TUI rendering, especially `run_watch` live/result views.\n - `packages/coding-agent/src/utils/git.ts` — `gh`/`git` process wrappers, repo locking, branch config writes.\n - `packages/utils/src/dirs.ts` — base directory for dedicated PR worktrees.\n - `packages/coding-agent/src/sdk.ts` — session artifact allocation hook.\n - `packages/coding-agent/src/session/artifacts.ts` — artifact filename format `<id>.<toolType>.log`.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `op` | `\"repo_view\" \\| \"pr_create\" \\| \"pr_checkout\" \\| \"pr_push\" \\| \"search_issues\" \\| \"search_prs\" \\| \"search_code\" \\| \"search_commits\" \\| \"search_repos\" \\| \"run_watch\"` | Yes | Dispatch selector. `GithubTool.execute()` switches only on this field. |\n| `repo` | `string` | No | `owner/repo` override. Ignored when the identifier argument is already a full GitHub URL. Required in practice when `gh` cannot infer repo context from the current checkout. |\n| `branch` | `string` | No | Used by `repo_view`, `pr_push`, and `run_watch`. `run_watch` falls back to current git branch when `run` is omitted; `pr_push` falls back to current branch. |\n| `pr` | `string \\| string[]` | No | Used by `pr_checkout`. Each item may be a PR number, branch name, or GitHub PR URL. Array form enables batching. Omitted means current branch PR. |\n| `force` | `boolean` | No | Used only by `pr_checkout`. Defaults to `false`; allows resetting an existing `pr-<number>` local branch to the PR head commit. |\n| `forceWithLease` | `boolean` | No | Used only by `pr_push`; passed through to git push. |\n| `title` | `string` | No | Used only by `pr_create`. Required unless `fill` is `true`. |\n| `body` | `string` | No | Used only by `pr_create`. Mutually exclusive with `fill`. Empty/omitted body becomes `--body \"\"` to suppress the interactive editor. Non-empty body is written to a temp file and passed as `--body-file`. |\n| `base` | `string` | No | Used only by `pr_create`; passed as `--base`. |\n| `head` | `string` | No | Used only by `pr_create`; passed as `--head`. |\n| `draft` | `boolean` | No | Used only by `pr_create`. Defaults to `false`. |\n| `fill` | `boolean` | No | Used only by `pr_create`. Defaults to `false`. Mutually exclusive with `title` and `body`. |\n| `reviewer` | `string[]` | No | Used only by `pr_create`; each entry becomes `--reviewer`. |\n| `assignee` | `string[]` | No | Used only by `pr_create`; each entry becomes `--assignee`. |\n| `label` | `string[]` | No | Used only by `pr_create`; each entry becomes `--label`. |\n| `query` | `string` | No | Used by all `search_*` ops. Required there. |\n| `limit` | `number` | No | Used by all `search_*` ops. Defaults to `10`, floored, clamped to `50`, and must be `> 0`. |\n| `run` | `string` | No | Used only by `run_watch`. Must be a numeric run ID or full GitHub Actions run URL. |\n| `tail` | `number` | No | Used only by `run_watch`. Defaults to `15`, floored, clamped to `200`, and must be `> 0`. |\n\n## Outputs\nThe tool returns a single text result built by `buildTextResult()` in `packages/coding-agent/src/tools/gh.ts`.\n\n- `content`: one text block. Multi-item ops join sections with blank lines and `---` separators.\n- `sourceUrl`: set for single repo/PR/run results when a canonical URL is known.\n- `details`: optional structured metadata used by the TUI renderer.\n - Common fields: `artifactId`, `repo`, `branch`, `worktreePath`, `remote`, `remoteBranch`, `headSha`, `runId`, `runIds`, `status`, `conclusion`, `failedJobs`.\n - `pr_checkout` adds `checkouts: GhPrCheckoutSummary[]`.\n - `run_watch` adds `watch: GhRunWatchViewDetails`, which drives the custom live/result renderer in `packages/coding-agent/src/tools/gh-renderer.ts`.\n- Artifact trailer: when `artifactId` is present, the text body gets an appended line like `Full failed-job logs: artifact://<id>`.\n - `run_watch` allocates artifacts with `session.allocateOutputArtifact(\"github\")`; persistent sessions therefore save failed-log bodies as `<artifact-dir>/<id>.github.log`.\n\n`run_watch` is the only streaming op. It emits `onUpdate` snapshots while polling, then returns one final text result.\n\n## Flow\n1. `GithubTool.createIf()` exposes the tool only when `git.github.available()` finds `gh` on `PATH`.\n2. `GithubTool.execute()` wraps dispatch in `untilAborted()` and switches on `params.op`.\n3. Each op normalizes optional strings, arrays, booleans, and numeric caps locally in `packages/coding-agent/src/tools/gh.ts`.\n4. CLI execution goes through `git.github.run/json/text()` in `packages/coding-agent/src/utils/git.ts`:\n - spawns `gh ...` with `Bun.spawn()`;\n - trims stdout/stderr unless `trimOutput: false`;\n - maps common auth/repo-context failures into tool-facing `ToolError` messages;\n - `json()` rejects empty or invalid JSON.\n5. Read-style ops (`repo_view`, `search_*`) fetch JSON and format Markdown-like text summaries. Single-issue and single-PR views were moved out of the tool and now resolve through the `issue://` / `pr://` internal URL schemes, which share the same SQLite cache.\n7. PR diffs moved out of the tool. `pr://<N>/diff` lists changed files, `pr://<N>/diff/<i>` slices a single file, and `pr://<N>/diff/all` returns the full unified diff — see `docs/tools/read.md`. All three variants share one `gh pr diff` invocation through the `pr-diff` cache row.\n8. `pr_checkout` resolves PR metadata first, then enters `git.withRepoLock()` before any git mutation so parallel checkout calls for the same primary repo do not race on shared `.git` state.\n9. `pr_push` reads PR head metadata back from git branch config, derives a refspec, then pushes with `git.push()`.\n10. `pr_create` shells out once, then best-effort re-reads the created PR for a richer summary.\n11. `run_watch` chooses either run mode (`run` supplied) or commit mode (`run` omitted), polls GitHub Actions APIs every 3 seconds, emits streaming updates, and may save a full failed-log artifact before returning.\n12. Final text goes through `toolResult().text(...)`; if `session.allocateOutputArtifact()` returns a slot, failed-log text is persisted with `Bun.write()`.\n\n## Modes / Variants\n\n### `repo_view`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` |\n| Optional fields | `repo`, `branch` |\n| `gh` command | `gh repo view [<repo>] [--branch <branch>] --json <GH_REPO_FIELDS>` |\n| Batching | None |\n| Output | `# <owner/repo>` header, description, URL, default branch, requested branch, visibility, permission, primary language, stars, forks, archive/fork flags, updated timestamp, homepage, topics. `sourceUrl = data.url`. |\n\nIf `repo` is omitted, `gh` repository resolution is used.\n\nSingle-issue and single-PR reads live in the `issue://<N>` / `pr://<N>` URL schemes (see `docs/tools/read.md`). They share `~/.omp/cache/github-cache.db` (override via `OMP_GITHUB_CACHE_DB`) and the `github.cache.softTtlSec` / `github.cache.hardTtlSec` / `github.cache.enabled` settings. The cache retains rendered Markdown plus the raw JSON payload returned by `gh`, including private bodies, comments, reviews, and review comments when comments are enabled; rows are scoped by the local GitHub credential fingerprint. Root and repo-scoped reads (`issue://`, `pr://owner/repo`) issue a live `gh issue list` / `gh pr list` for browsing; query params `state`, `limit`, `author`, `label` pass through to `gh` (`issue://` accepts `state=open|closed|all`; `pr://` also accepts `merged`). PR diffs ride the same cache under `pr://<N>/diff[/…]`: the listing, full diff, and per-file slices all share one `pr-diff` row keyed by repo and PR number.\n\n### `pr_create`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` plus either `fill=true` or `title` |\n| Optional fields | `repo`, `title`, `body`, `base`, `head`, `draft`, `fill`, `reviewer[]`, `assignee[]`, `label[]` |\n| `gh` command | `gh pr create ...` with flags assembled from provided fields |\n| Batching | None |\n| Output | `# Created Pull Request ...` summary with URL, state, draft flag, base/head, author, created time, labels, optional body. `sourceUrl` is the created PR URL. |\n\nBranches:\n- `fill && (title || body !== undefined)` throws.\n- Non-empty `body` is written under a temp dir `gh-pr-body-*` in `os.tmpdir()`, passed as `--body-file`, then removed in `finally`.\n- After creation, the tool parses the returned URL and best-effort runs `gh pr view <number> --repo <repo> --json <GH_PR_FIELDS_NO_COMMENTS>`; failures there are swallowed.\n\n### `pr_checkout`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` |\n| Optional fields | `repo`, `pr`, `force` |\n| `gh` command | For each requested PR: `gh pr view [<pr>] [--repo <repo>] --json <GH_PR_CHECKOUT_FIELDS>`; cross-repo PRs may also call `gh repo view <headRepository> --json <GH_REPO_CLONE_FIELDS>`. |\n| Batching | Yes. `pr` may be `string[]`; each PR is resolved in parallel, but git mutations are serialized per primary repo by `git.withRepoLock()`. |\n| Output | Single PR: checkout/worktree summary plus `details.repo`, `details.branch`, `details.worktreePath`, `details.remote`, `details.remoteBranch`, `details.checkouts`. Batched: `# <n> Pull Request Worktrees (...)` plus one section per PR and aggregated `details.checkouts`. |\n\nWorktree and metadata behavior:\n- Local branch name is always `pr-<number>`.\n- Worktree path is `path.join(getWorktreesDir(), encodeRepoPathForFilesystem(primaryRepoRoot), localBranch)`, where `getWorktreesDir()` is `~/.omp/wt`; effective path is `~/.omp/wt/<encoded-primary-repo-root>/pr-<number>`.\n- Existing worktree detection is by branch ref `refs/heads/pr-<number>` from `git.worktree.list()`.\n- New worktree creation calls `git.worktree.add(repoRoot, finalWorktreePath, localBranch, { signal })` after verifying the path is neither already registered nor already present on disk.\n- For same-repo PRs, remote is `origin`. For cross-repo PRs, the tool resolves a clone URL for the head repo, reuses an existing remote with the same URL when possible, or creates `fork-<owner>` / `fork-<owner>-<n>`.\n- The branch push metadata is persisted with `git config` under the repository's shared `.git/config` as:\n - `branch.pr-<number>.remote`\n - `branch.pr-<number>.merge`\n - `branch.pr-<number>.pushRemote`\n - `branch.pr-<number>.ompPrHeadRef`\n - `branch.pr-<number>.ompPrUrl`\n - `branch.pr-<number>.ompPrIsCrossRepository`\n - `branch.pr-<number>.ompPrMaintainerCanModify`\n- If `refs/heads/pr-<number>` already exists at a different commit, checkout fails unless `force=true`, in which case `git branch --force` resets it to the fetched PR head.\n- If a matching worktree already exists, the tool reuses it and reports `reused: true`.\n\n### `pr_push`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` |\n| Optional fields | `branch`, `forceWithLease` |\n| `gh` command | None. This path uses git, not `gh`. |\n| Batching | None |\n| Output | `# Pushed Pull Request Branch` summary with local branch, remote, remote branch, remote URL, PR URL, and force-with-lease flag. `sourceUrl = prUrl` when known. |\n\nPush target resolution reads the `branch.<name>.ompPrHeadRef`, `pushRemote`/`remote`, `ompPrUrl`, `ompPrMaintainerCanModify`, and `ompPrIsCrossRepository` git-config keys written by `pr_checkout`. If the current checked-out branch matches the target branch, the source ref is `HEAD`; otherwise it pushes `refs/heads/<branch>`. The refspec is `HEAD:refs/heads/<headRef>` or `refs/heads/<branch>:refs/heads/<headRef>`.\n\n### `search_issues`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op`, `query` |\n| Optional fields | `repo`, `limit` |\n| `gh` command | `gh api -X GET /search/issues -f q=\"<query> [repo:<repo>] is:issue\" -F per_page=<limit>` |\n| Batching | None |\n| Output | `# GitHub issues search`, echoed query, optional repo, result count, then one bullet per issue with repo/state/author/labels/timestamps/URL. |\n\n### `search_prs`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op`, `query` |\n| Optional fields | `repo`, `limit` |\n| `gh` command | `gh api -X GET /search/issues -f q=\"<query> [repo:<repo>] is:pr\" -F per_page=<limit>` |\n| Batching | None |\n| Output | Same shape as `search_issues`, labeled as pull requests. |\n\n### `search_code`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op`, `query` |\n| Optional fields | `repo`, `limit` |\n| `gh` command | `gh api -X GET /search/code -f q=\"<query> [repo:<repo>]\" -F per_page=<limit> -H \"Accept: application/vnd.github.text-match+json\"` |\n| Batching | None |\n| Output | `# GitHub code search`, result count, then one bullet per match with path, repo, short commit SHA, URL, and first normalized text-match fragment line when present. |\n\n### `search_commits`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op`, `query` |\n| Optional fields | `repo`, `limit` |\n| `gh` command | `gh api -X GET /search/commits -f q=\"<query> [repo:<repo>]\" -F per_page=<limit>` |\n| Batching | None |\n| Output | `# GitHub commits search`, result count, then one bullet per commit: short SHA + first commit-message line, repo, author, date, URL. |\n\n### `search_repos`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op`, `query` |\n| Optional fields | `limit` |\n| `gh` command | `gh api -X GET /search/repositories -f q=\"<query>\" -F per_page=<limit>` |\n| Batching | None |\n| Output | `# GitHub repositories search`, result count, then one bullet per repo with first description line, language, stars, forks, open issues, visibility, archive/fork flags, updated time, URL. |\n\n`repo` is intentionally not used for this op.\n\n### `run_watch`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` |\n| Optional fields | `repo`, `branch`, `run`, `tail` |\n| `gh` command | Repo resolution: `gh repo view --json nameWithOwner -q .nameWithOwner` when `repo` and run URL repo are both absent. Single-run mode uses `gh api --method GET /repos/<repo>/actions/runs/<runId>` and `gh api --method GET /repos/<repo>/actions/runs/<runId>/jobs`. Commit mode uses `gh api --method GET /repos/<repo>/branches/<branch>`, `gh api --method GET /repos/<repo>/actions/runs`, `gh api --method GET /repos/<repo>/actions/runs/<runId>/jobs`, and `gh api /repos/<repo>/actions/jobs/<jobId>/logs` for failed jobs. |\n| Batching | Implicit batching only in commit mode: all workflow runs for one commit are tracked together. |\n| Output | Streaming watch snapshots via `onUpdate`, then a final text report. On failure, appends `Full failed-job logs: artifact://<id>` and sets `details.artifactId`. |\n\nWatch flow:\n- `run` parsing accepts either a decimal run ID or a full run URL. URL repo must match explicit `repo` when both are given.\n- Poll interval is fixed at 3 seconds (`RUN_WATCH_INTERVAL_DEFAULT`).\n- Failure grace period is fixed at 5 seconds (`RUN_WATCH_GRACE_DEFAULT`). When any failed job appears before completion, the tool emits a note, waits once, re-fetches state, then collects logs so concurrent failures are included.\n- Failed-job logs are fetched with `gh api /repos/<repo>/actions/jobs/<jobId>/logs` via `git.github.run()`, not `json()`. Non-zero exit leaves `available: false` instead of failing the whole watch.\n- Inline result includes only the last `tail` lines per failed job. The saved artifact contains full logs (`mode: \"full\"`).\n- In commit mode, success is intentionally double-checked: once all known runs are successful, the tool waits one more poll interval and succeeds only if the set of run IDs is unchanged. This avoids returning before late workflow runs appear for the same commit.\n- `details.watch` drives a specialized renderer in `packages/coding-agent/src/tools/gh-renderer.ts`; non-watch results fall back to generic text rendering.\n\n## Side Effects\n- Filesystem\n - `pr_create` may create a temp dir under `os.tmpdir()` named `gh-pr-body-*`, write `body.md`, then remove the dir in `finally`.\n - `pr_checkout` may create directories under `~/.omp/wt/<encoded-primary-repo-root>/` and add git worktrees there.\n - `run_watch` may write a session artifact with full failed-job logs.\n- Network\n - Every op shells out to `gh`, which then talks to GitHub APIs except `pr_push`.\n - `pr_push` uses git network transport to the configured remote.\n- Subprocesses / native bindings\n - All `gh` calls use `Bun.spawn([\"gh\", ...args])`.\n - `pr_checkout` and `pr_push` also invoke git helpers from `packages/coding-agent/src/utils/git.ts`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - `run_watch` consumes `session.allocateOutputArtifact()` when failed-job logs are persisted.\n - Returned `details` objects carry run/checkouts metadata for the renderer/UI.\n- User-visible prompts / interactive UI\n - `gh` interactive editor fallback is suppressed for `pr_create` by forcing either `--body-file` or `--body \"\"`.\n - `gh-renderer` provides compact headers for all ops and a custom live watch view for `run_watch`.\n- Background work / cancellation\n - `run_watch` loops until success/failure and uses `scheduler.wait()` between polls.\n - `GithubTool.execute()` is wrapped in `untilAborted()`; `git.github.run()` forwards the abort signal into `Bun.spawn()`.\n\n## Limits & Caps\n- Search result default: `10` (`SEARCH_LIMIT_DEFAULT` in `packages/coding-agent/src/tools/gh.ts`).\n- Search result max: `50` (`SEARCH_LIMIT_MAX`).\n- PR file preview inside the `pr://` view: first `50` files only (`FILE_PREVIEW_LIMIT` in `gh.ts`).\n- Run-watch poll interval: `3s` (`RUN_WATCH_INTERVAL_DEFAULT`).\n- Run-watch failure grace period: `5s` (`RUN_WATCH_GRACE_DEFAULT`).\n- Run-watch failed-log tail default: `15` lines (`RUN_WATCH_TAIL_DEFAULT`).\n- Run-watch failed-log tail max: `200` lines (`RUN_WATCH_TAIL_MAX`).\n- PR review comments page size: `100` (`REVIEW_COMMENTS_PAGE_SIZE`).\n- Actions jobs page size: `100` (`RUN_JOBS_PAGE_SIZE`).\n- Search and tail numeric inputs are floored with `Math.floor()`, clamped to the max, and rejected when non-finite or `<= 0`.\n- `pr_checkout` batch fan-out is unbounded in tool code; all requested PRs are launched with `Promise.all()`.\n\n## Errors\n- Tool creation is skipped entirely when `gh` is not installed.\n- `git.github.run()` throws `ToolError(\"GitHub CLI (gh) is not installed...\")` if `gh` is missing at execution time.\n- `git.github.text/json()` map common failures to model-facing messages:\n - not authenticated → `GitHub CLI is not authenticated. Run \\`gh auth login\\`.`\n - missing repo context without explicit `repo` → `GitHub repository context is unavailable. Pass \\`repo\\` explicitly or run the tool inside a GitHub checkout.`\n - otherwise stderr/stdout text, or fallback `GitHub CLI command failed: gh ...`\n- `json()` also throws on empty stdout or invalid JSON.\n- Local validation errors throw `ToolError`, including:\n - missing required per-op fields (`query`, `title unless fill=true`)\n - invalid numeric `limit` / `tail`\n - invalid `run` format\n - `fill` combined with `title` or `body`\n - missing git repo / branch / HEAD context for checkout, push, or watch\n - `pr_push` on a branch without `ompPrHeadRef` metadata\n - conflicting existing worktree path or branch without `force`\n- `run_watch` treats failed-job log fetches specially: missing log content does not fail the watch; it marks that log `available: false` and prints `Log tail unavailable.` / `Full log unavailable.`.\n- `pr_create` swallows only the post-create best-effort `gh pr view` refresh; the create step itself still fails normally.\n\n## Notes\n- `appendRepoFlag()` intentionally skips `--repo` when the identifier argument is already a full GitHub URL; that lets `gh` derive repo/number from the URL.\n- `normalizePrIdentifierList()` accepts `reviewer`, `assignee`, and `label` arrays too; the helper name is broader than its callers.\n- `pr_push` depends on `pr_checkout` having run first for that local branch; there is no alternate metadata source.\n- `pr_checkout` stores push metadata in branch config, not in the worktree directory. Reusing the same `pr-<number>` branch reuses those config keys.\n- Worktree write serialization is keyed by the primary repo root, not the current worktree path, because git worktrees share `.git/config`, `packed-refs`, commit-graph, and worktree metadata files.\n- `search_repos` is the only search op that never forwards `repo`; repository scoping must be expressed in the query itself.\n- `run_watch` success on commit mode means “all observed runs succeeded and no additional runs appeared one poll later”, not merely “latest poll looked green”.\n- The TUI renderer collapses failed log previews unless the result view is expanded; the underlying text result still contains the same tailed lines plus any artifact reference.\n",
|
|
72
|
+
"tools/github.md": "# github\n\n> Dispatch GitHub CLI operations for repositories, issues, pull requests, search, and Actions run watching.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/gh.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/github.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/gh-format.ts` — shorten commit SHAs for summaries.\n - `packages/coding-agent/src/tools/gh-renderer.ts` — TUI rendering, especially `run_watch` live/result views.\n - `packages/coding-agent/src/utils/git.ts` — `gh`/`git` process wrappers, repo locking, branch config writes.\n - `packages/utils/src/dirs.ts` — base directory for dedicated PR worktrees.\n - `packages/coding-agent/src/sdk.ts` — session artifact allocation hook.\n - `packages/coding-agent/src/session/artifacts.ts` — artifact filename format `<id>.<toolType>.log`.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `op` | `\"repo_view\" \\| \"pr_create\" \\| \"pr_checkout\" \\| \"pr_push\" \\| \"search_issues\" \\| \"search_prs\" \\| \"search_code\" \\| \"search_commits\" \\| \"search_repos\" \\| \"run_watch\"` | Yes | Dispatch selector. `GithubTool.execute()` switches only on this field. |\n| `repo` | `string` | No | `owner/repo` override. Ignored when the identifier argument is already a full GitHub URL. For `search_issues`/`search_prs`/`search_code`/`search_commits`, defaults to the current checkout's `owner/repo` when omitted (skipped when the query already contains a `repo:`/`org:`/`user:`/`owner:` qualifier or when current-repo resolution fails). Required in practice when `gh` cannot infer repo context from the current checkout. |\n| `branch` | `string` | No | Used by `repo_view`, `pr_push`, and `run_watch`. `run_watch` falls back to current git branch when `run` is omitted; `pr_push` falls back to current branch. |\n| `pr` | `string \\| string[]` | No | Used by `pr_checkout`. Each item may be a PR number, branch name, or GitHub PR URL. Array form enables batching. Omitted means current branch PR. |\n| `force` | `boolean` | No | Used only by `pr_checkout`. Defaults to `false`; allows resetting an existing `pr-<number>` local branch to the PR head commit. |\n| `forceWithLease` | `boolean` | No | Used only by `pr_push`; passed through to git push. |\n| `title` | `string` | No | Used only by `pr_create`. Required unless `fill` is `true`. |\n| `body` | `string` | No | Used only by `pr_create`. Mutually exclusive with `fill`. Empty/omitted body becomes `--body \"\"` to suppress the interactive editor. Non-empty body is written to a temp file and passed as `--body-file`. |\n| `base` | `string` | No | Used only by `pr_create`; passed as `--base`. |\n| `head` | `string` | No | Used only by `pr_create`; passed as `--head`. |\n| `draft` | `boolean` | No | Used only by `pr_create`. Defaults to `false`. |\n| `fill` | `boolean` | No | Used only by `pr_create`. Defaults to `false`. Mutually exclusive with `title` and `body`. |\n| `reviewer` | `string[]` | No | Used only by `pr_create`; each entry becomes `--reviewer`. |\n| `assignee` | `string[]` | No | Used only by `pr_create`; each entry becomes `--assignee`. |\n| `label` | `string[]` | No | Used only by `pr_create`; each entry becomes `--label`. |\n| `query` | `string` | No | Used by all `search_*` ops. Required there. |\n| `limit` | `number` | No | Used by all `search_*` ops. Defaults to `10`, floored, clamped to `50`, and must be `> 0`. |\n| `run` | `string` | No | Used only by `run_watch`. Must be a numeric run ID or full GitHub Actions run URL. |\n| `tail` | `number` | No | Used only by `run_watch`. Defaults to `15`, floored, clamped to `200`, and must be `> 0`. |\n\n## Outputs\nThe tool returns a single text result built by `buildTextResult()` in `packages/coding-agent/src/tools/gh.ts`.\n\n- `content`: one text block. Multi-item ops join sections with blank lines and `---` separators.\n- `sourceUrl`: set for single repo/PR/run results when a canonical URL is known.\n- `details`: optional structured metadata used by the TUI renderer.\n - Common fields: `artifactId`, `repo`, `branch`, `worktreePath`, `remote`, `remoteBranch`, `headSha`, `runId`, `runIds`, `status`, `conclusion`, `failedJobs`.\n - `pr_checkout` adds `checkouts: GhPrCheckoutSummary[]`.\n - `run_watch` adds `watch: GhRunWatchViewDetails`, which drives the custom live/result renderer in `packages/coding-agent/src/tools/gh-renderer.ts`.\n- Artifact trailer: when `artifactId` is present, the text body gets an appended line like `Full failed-job logs: artifact://<id>`.\n - `run_watch` allocates artifacts with `session.allocateOutputArtifact(\"github\")`; persistent sessions therefore save failed-log bodies as `<artifact-dir>/<id>.github.log`.\n\n`run_watch` is the only streaming op. It emits `onUpdate` snapshots while polling, then returns one final text result.\n\n## Flow\n1. `GithubTool.createIf()` exposes the tool only when `git.github.available()` finds `gh` on `PATH`.\n2. `GithubTool.execute()` wraps dispatch in `untilAborted()` and switches on `params.op`.\n3. Each op normalizes optional strings, arrays, booleans, and numeric caps locally in `packages/coding-agent/src/tools/gh.ts`.\n4. CLI execution goes through `git.github.run/json/text()` in `packages/coding-agent/src/utils/git.ts`:\n - spawns `gh ...` with `Bun.spawn()`;\n - trims stdout/stderr unless `trimOutput: false`;\n - maps common auth/repo-context failures into tool-facing `ToolError` messages;\n - `json()` rejects empty or invalid JSON.\n5. Read-style ops (`repo_view`, `search_*`) fetch JSON and format Markdown-like text summaries. Single-issue and single-PR views were moved out of the tool and now resolve through the `issue://` / `pr://` internal URL schemes, which share the same SQLite cache.\n7. PR diffs moved out of the tool. `pr://<N>/diff` lists changed files, `pr://<N>/diff/<i>` slices a single file, and `pr://<N>/diff/all` returns the full unified diff — see `docs/tools/read.md`. All three variants share one `gh pr diff` invocation through the `pr-diff` cache row.\n8. `pr_checkout` resolves PR metadata first, then enters `git.withRepoLock()` before any git mutation so parallel checkout calls for the same primary repo do not race on shared `.git` state.\n9. `pr_push` reads PR head metadata back from git branch config, derives a refspec, then pushes with `git.push()`.\n10. `pr_create` shells out once, then best-effort re-reads the created PR for a richer summary.\n11. `run_watch` chooses either run mode (`run` supplied) or commit mode (`run` omitted), polls GitHub Actions APIs every 3 seconds, emits streaming updates, and may save a full failed-log artifact before returning.\n12. Final text goes through `toolResult().text(...)`; if `session.allocateOutputArtifact()` returns a slot, failed-log text is persisted with `Bun.write()`.\n\n## Modes / Variants\n\n### `repo_view`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` |\n| Optional fields | `repo`, `branch` |\n| `gh` command | `gh repo view [<repo>] [--branch <branch>] --json <GH_REPO_FIELDS>` |\n| Batching | None |\n| Output | `# <owner/repo>` header, description, URL, default branch, requested branch, visibility, permission, primary language, stars, forks, archive/fork flags, updated timestamp, homepage, topics. `sourceUrl = data.url`. |\n\nIf `repo` is omitted, `gh` repository resolution is used.\n\nSingle-issue and single-PR reads live in the `issue://<N>` / `pr://<N>` URL schemes (see `docs/tools/read.md`). They share `~/.omp/cache/github-cache.db` (override via `OMP_GITHUB_CACHE_DB`) and the `github.cache.softTtlSec` / `github.cache.hardTtlSec` / `github.cache.enabled` settings. The cache retains rendered Markdown plus the raw JSON payload returned by `gh`, including private bodies, comments, reviews, and review comments when comments are enabled; rows are scoped by the local GitHub credential fingerprint. Root and repo-scoped reads (`issue://`, `pr://owner/repo`) issue a live `gh issue list` / `gh pr list` for browsing; query params `state`, `limit`, `author`, `label` pass through to `gh` (`issue://` accepts `state=open|closed|all`; `pr://` also accepts `merged`). PR diffs ride the same cache under `pr://<N>/diff[/…]`: the listing, full diff, and per-file slices all share one `pr-diff` row keyed by repo and PR number.\n\n### `pr_create`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` plus either `fill=true` or `title` |\n| Optional fields | `repo`, `title`, `body`, `base`, `head`, `draft`, `fill`, `reviewer[]`, `assignee[]`, `label[]` |\n| `gh` command | `gh pr create ...` with flags assembled from provided fields |\n| Batching | None |\n| Output | `# Created Pull Request ...` summary with URL, state, draft flag, base/head, author, created time, labels, optional body. `sourceUrl` is the created PR URL. |\n\nBranches:\n- `fill && (title || body !== undefined)` throws.\n- Non-empty `body` is written under a temp dir `gh-pr-body-*` in `os.tmpdir()`, passed as `--body-file`, then removed in `finally`.\n- After creation, the tool parses the returned URL and best-effort runs `gh pr view <number> --repo <repo> --json <GH_PR_FIELDS_NO_COMMENTS>`; failures there are swallowed.\n\n### `pr_checkout`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` |\n| Optional fields | `repo`, `pr`, `force` |\n| `gh` command | For each requested PR: `gh pr view [<pr>] [--repo <repo>] --json <GH_PR_CHECKOUT_FIELDS>`; cross-repo PRs may also call `gh repo view <headRepository> --json <GH_REPO_CLONE_FIELDS>`. |\n| Batching | Yes. `pr` may be `string[]`; each PR is resolved in parallel, but git mutations are serialized per primary repo by `git.withRepoLock()`. |\n| Output | Single PR: checkout/worktree summary plus `details.repo`, `details.branch`, `details.worktreePath`, `details.remote`, `details.remoteBranch`, `details.checkouts`. Batched: `# <n> Pull Request Worktrees (...)` plus one section per PR and aggregated `details.checkouts`. |\n\nWorktree and metadata behavior:\n- Local branch name is always `pr-<number>`.\n- Worktree path is `path.join(getWorktreesDir(), encodeRepoPathForFilesystem(primaryRepoRoot), localBranch)`, where `getWorktreesDir()` is `~/.omp/wt`; effective path is `~/.omp/wt/<encoded-primary-repo-root>/pr-<number>`.\n- Existing worktree detection is by branch ref `refs/heads/pr-<number>` from `git.worktree.list()`.\n- New worktree creation calls `git.worktree.add(repoRoot, finalWorktreePath, localBranch, { signal })` after verifying the path is neither already registered nor already present on disk.\n- For same-repo PRs, remote is `origin`. For cross-repo PRs, the tool resolves a clone URL for the head repo, reuses an existing remote with the same URL when possible, or creates `fork-<owner>` / `fork-<owner>-<n>`.\n- The branch push metadata is persisted with `git config` under the repository's shared `.git/config` as:\n - `branch.pr-<number>.remote`\n - `branch.pr-<number>.merge`\n - `branch.pr-<number>.pushRemote`\n - `branch.pr-<number>.ompPrHeadRef`\n - `branch.pr-<number>.ompPrUrl`\n - `branch.pr-<number>.ompPrIsCrossRepository`\n - `branch.pr-<number>.ompPrMaintainerCanModify`\n- If `refs/heads/pr-<number>` already exists at a different commit, checkout fails unless `force=true`, in which case `git branch --force` resets it to the fetched PR head.\n- If a matching worktree already exists, the tool reuses it and reports `reused: true`.\n\n### `pr_push`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` |\n| Optional fields | `branch`, `forceWithLease` |\n| `gh` command | None. This path uses git, not `gh`. |\n| Batching | None |\n| Output | `# Pushed Pull Request Branch` summary with local branch, remote, remote branch, remote URL, PR URL, and force-with-lease flag. `sourceUrl = prUrl` when known. |\n\nPush target resolution reads the `branch.<name>.ompPrHeadRef`, `pushRemote`/`remote`, `ompPrUrl`, `ompPrMaintainerCanModify`, and `ompPrIsCrossRepository` git-config keys written by `pr_checkout`. If the current checked-out branch matches the target branch, the source ref is `HEAD`; otherwise it pushes `refs/heads/<branch>`. The refspec is `HEAD:refs/heads/<headRef>` or `refs/heads/<branch>:refs/heads/<headRef>`.\n\n### `search_issues`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op`, `query` |\n| Optional fields | `repo`, `limit` |\n| `gh` command | `gh api -X GET /search/issues -f q=\"<query> [repo:<repo>] is:issue\" -F per_page=<limit>` |\n| Batching | None |\n| Output | `# GitHub issues search`, echoed query, optional repo, result count, then one bullet per issue with repo/state/author/labels/timestamps/URL. |\n\n`repo` defaults to the current checkout's `owner/repo` via `resolveSearchRepoScope()` when omitted. The default is suppressed when the query already contains a leading `repo:`/`org:`/`user:`/`owner:` qualifier or when `gh repo view` fails to resolve the current checkout (e.g. outside a github remote).\n\n### `search_prs`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op`, `query` |\n| Optional fields | `repo`, `limit` |\n| `gh` command | `gh api -X GET /search/issues -f q=\"<query> [repo:<repo>] is:pr\" -F per_page=<limit>` |\n| Batching | None |\n| Output | Same shape as `search_issues`, labeled as pull requests. |\n\n`repo` defaults to the current checkout's `owner/repo` as in `search_issues`.\n\n### `search_code`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op`, `query` |\n| Optional fields | `repo`, `limit` |\n| `gh` command | `gh api -X GET /search/code -f q=\"<query> [repo:<repo>]\" -F per_page=<limit> -H \"Accept: application/vnd.github.text-match+json\"` |\n| Batching | None |\n| Output | `# GitHub code search`, result count, then one bullet per match with path, repo, short commit SHA, URL, and first normalized text-match fragment line when present. |\n\n`repo` defaults to the current checkout's `owner/repo` as in `search_issues`.\n\n### `search_commits`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op`, `query` |\n| Optional fields | `repo`, `limit` |\n| `gh` command | `gh api -X GET /search/commits -f q=\"<query> [repo:<repo>]\" -F per_page=<limit>` |\n| Batching | None |\n| Output | `# GitHub commits search`, result count, then one bullet per commit: short SHA + first commit-message line, repo, author, date, URL. |\n\n`repo` defaults to the current checkout's `owner/repo` as in `search_issues`.\n\n### `search_repos`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op`, `query` |\n| Optional fields | `limit` |\n| `gh` command | `gh api -X GET /search/repositories -f q=\"<query>\" -F per_page=<limit>` |\n| Batching | None |\n| Output | `# GitHub repositories search`, result count, then one bullet per repo with first description line, language, stars, forks, open issues, visibility, archive/fork flags, updated time, URL. |\n\n`repo` is intentionally not used for this op.\n\n### `run_watch`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` |\n| Optional fields | `repo`, `branch`, `run`, `tail` |\n| `gh` command | Repo resolution: `gh repo view --json nameWithOwner -q .nameWithOwner` when `repo` and run URL repo are both absent. Single-run mode uses `gh api --method GET /repos/<repo>/actions/runs/<runId>` and `gh api --method GET /repos/<repo>/actions/runs/<runId>/jobs`. Commit mode uses `gh api --method GET /repos/<repo>/branches/<branch>`, `gh api --method GET /repos/<repo>/actions/runs`, `gh api --method GET /repos/<repo>/actions/runs/<runId>/jobs`, and `gh api /repos/<repo>/actions/jobs/<jobId>/logs` for failed jobs. |\n| Batching | Implicit batching only in commit mode: all workflow runs for one commit are tracked together. |\n| Output | Streaming watch snapshots via `onUpdate`, then a final text report. On failure, appends `Full failed-job logs: artifact://<id>` and sets `details.artifactId`. |\n\nWatch flow:\n- `run` parsing accepts either a decimal run ID or a full run URL. URL repo must match explicit `repo` when both are given.\n- Poll interval is fixed at 3 seconds (`RUN_WATCH_INTERVAL_DEFAULT`).\n- Failure grace period is fixed at 5 seconds (`RUN_WATCH_GRACE_DEFAULT`). When any failed job appears before completion, the tool emits a note, waits once, re-fetches state, then collects logs so concurrent failures are included.\n- Failed-job logs are fetched with `gh api /repos/<repo>/actions/jobs/<jobId>/logs` via `git.github.run()`, not `json()`. Non-zero exit leaves `available: false` instead of failing the whole watch.\n- Inline result includes only the last `tail` lines per failed job. The saved artifact contains full logs (`mode: \"full\"`).\n- In commit mode, success is intentionally double-checked: once all known runs are successful, the tool waits one more poll interval and succeeds only if the set of run IDs is unchanged. This avoids returning before late workflow runs appear for the same commit.\n- `details.watch` drives a specialized renderer in `packages/coding-agent/src/tools/gh-renderer.ts`; non-watch results fall back to generic text rendering.\n\n## Side Effects\n- Filesystem\n - `pr_create` may create a temp dir under `os.tmpdir()` named `gh-pr-body-*`, write `body.md`, then remove the dir in `finally`.\n - `pr_checkout` may create directories under `~/.omp/wt/<encoded-primary-repo-root>/` and add git worktrees there.\n - `run_watch` may write a session artifact with full failed-job logs.\n- Network\n - Every op shells out to `gh`, which then talks to GitHub APIs except `pr_push`.\n - `pr_push` uses git network transport to the configured remote.\n- Subprocesses / native bindings\n - All `gh` calls use `Bun.spawn([\"gh\", ...args])`.\n - `pr_checkout` and `pr_push` also invoke git helpers from `packages/coding-agent/src/utils/git.ts`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - `run_watch` consumes `session.allocateOutputArtifact()` when failed-job logs are persisted.\n - Returned `details` objects carry run/checkouts metadata for the renderer/UI.\n- User-visible prompts / interactive UI\n - `gh` interactive editor fallback is suppressed for `pr_create` by forcing either `--body-file` or `--body \"\"`.\n - `gh-renderer` provides compact headers for all ops and a custom live watch view for `run_watch`.\n- Background work / cancellation\n - `run_watch` loops until success/failure and uses `scheduler.wait()` between polls.\n - `GithubTool.execute()` is wrapped in `untilAborted()`; `git.github.run()` forwards the abort signal into `Bun.spawn()`.\n\n## Limits & Caps\n- Search result default: `10` (`SEARCH_LIMIT_DEFAULT` in `packages/coding-agent/src/tools/gh.ts`).\n- Search result max: `50` (`SEARCH_LIMIT_MAX`).\n- PR file preview inside the `pr://` view: first `50` files only (`FILE_PREVIEW_LIMIT` in `gh.ts`).\n- Run-watch poll interval: `3s` (`RUN_WATCH_INTERVAL_DEFAULT`).\n- Run-watch failure grace period: `5s` (`RUN_WATCH_GRACE_DEFAULT`).\n- Run-watch failed-log tail default: `15` lines (`RUN_WATCH_TAIL_DEFAULT`).\n- Run-watch failed-log tail max: `200` lines (`RUN_WATCH_TAIL_MAX`).\n- PR review comments page size: `100` (`REVIEW_COMMENTS_PAGE_SIZE`).\n- Actions jobs page size: `100` (`RUN_JOBS_PAGE_SIZE`).\n- Search and tail numeric inputs are floored with `Math.floor()`, clamped to the max, and rejected when non-finite or `<= 0`.\n- `pr_checkout` batch fan-out is unbounded in tool code; all requested PRs are launched with `Promise.all()`.\n\n## Errors\n- Tool creation is skipped entirely when `gh` is not installed.\n- `git.github.run()` throws `ToolError(\"GitHub CLI (gh) is not installed...\")` if `gh` is missing at execution time.\n- `git.github.text/json()` map common failures to model-facing messages:\n - not authenticated → `GitHub CLI is not authenticated. Run \\`gh auth login\\`.`\n - missing repo context without explicit `repo` → `GitHub repository context is unavailable. Pass \\`repo\\` explicitly or run the tool inside a GitHub checkout.`\n - otherwise stderr/stdout text, or fallback `GitHub CLI command failed: gh ...`\n- `json()` also throws on empty stdout or invalid JSON.\n- Local validation errors throw `ToolError`, including:\n - missing required per-op fields (`query`, `title unless fill=true`)\n - invalid numeric `limit` / `tail`\n - invalid `run` format\n - `fill` combined with `title` or `body`\n - missing git repo / branch / HEAD context for checkout, push, or watch\n - `pr_push` on a branch without `ompPrHeadRef` metadata\n - conflicting existing worktree path or branch without `force`\n- `run_watch` treats failed-job log fetches specially: missing log content does not fail the watch; it marks that log `available: false` and prints `Log tail unavailable.` / `Full log unavailable.`.\n- `pr_create` swallows only the post-create best-effort `gh pr view` refresh; the create step itself still fails normally.\n\n## Notes\n- `appendRepoFlag()` intentionally skips `--repo` when the identifier argument is already a full GitHub URL; that lets `gh` derive repo/number from the URL.\n- `normalizePrIdentifierList()` accepts `reviewer`, `assignee`, and `label` arrays too; the helper name is broader than its callers.\n- `pr_push` depends on `pr_checkout` having run first for that local branch; there is no alternate metadata source.\n- `pr_checkout` stores push metadata in branch config, not in the worktree directory. Reusing the same `pr-<number>` branch reuses those config keys.\n- Worktree write serialization is keyed by the primary repo root, not the current worktree path, because git worktrees share `.git/config`, `packed-refs`, commit-graph, and worktree metadata files.\n- `search_repos` is the only search op that never forwards `repo`; repository scoping must be expressed in the query itself.\n- `run_watch` success on commit mode means “all observed runs succeeded and no additional runs appeared one poll later”, not merely “latest poll looked green”.\n- The TUI renderer collapses failed log previews unless the result view is expanded; the underlying text result still contains the same tailed lines plus any artifact reference.\n",
|
|
73
73
|
"tools/inspect_image.md": "# inspect_image\n\n> Send a local image file to a vision-capable model and return text analysis.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/inspect-image.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/inspect-image.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/inspect-image-renderer.ts` — TUI call/result rendering.\n - `packages/coding-agent/src/utils/image-loading.ts` — path resolution, type detection, size gate, optional resize.\n - `packages/coding-agent/src/utils/image-resize.ts` — downscale and recompress oversized images.\n - `packages/coding-agent/src/tools/path-utils.ts` — resolve input path relative to session cwd.\n - `packages/utils/src/mime.ts` — detect supported image formats from file bytes.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `path` | `string` | Yes | Image path passed to `loadImageInput`; resolved relative to `session.cwd` by `resolveReadPath(...)`. |\n| `question` | `string` | Yes | User prompt sent as a text content block alongside the image. |\n\n## Outputs\nThe tool returns a single `AgentToolResult`:\n\n- `content`: one text block, `[{ type: \"text\", text }]`, where `text` is the concatenated assistant text content from the model response.\n- `details`:\n - `model`: `<provider>/<id>` of the selected model.\n - `imagePath`: resolved filesystem path returned by `loadImageInput(...)`.\n - `mimeType`: MIME type actually sent to the model after optional resize/re-encode.\n\nModel-visible output is single-shot, not streamed by this tool.\n\nTUI rendering adds presentation-only truncation from `packages/coding-agent/src/tools/inspect-image-renderer.ts`:\n\n- call preview truncates `question` to 100 columns,\n- result view shows 4 lines collapsed or 16 lines expanded,\n- each rendered output line is truncated to 120 columns,\n- footer metadata shows `model · mimeType` when present.\n\n## Flow\n1. `InspectImageTool.execute(...)` rejects immediately if `images.blockImages` is enabled in session settings.\n2. It reads `session.modelRegistry`; missing registry, empty registry, missing API key, or unresolved model each raise `ToolError` from `packages/coding-agent/src/tools/inspect-image.ts`.\n3. Model selection tries, in order, `pi/vision`, `pi/default`, the active model string from the session, then `availableModels[0]`. `expandRoleAlias(...)` and `resolveModelFromString(...)` handle each lookup.\n4. The chosen model must advertise `input.includes(\"image\")`; otherwise execution fails before reading the file.\n5. `loadImageInput(...)` in `packages/coding-agent/src/utils/image-loading.ts` resolves the path with `resolveReadPath(...)`, detects MIME type with `readImageMetadata(...)`, and rejects files larger than `MAX_IMAGE_INPUT_BYTES` (`20 * 1024 * 1024`, 20 MiB) using `ImageInputTooLargeError`.\n6. `readImageMetadata(...)` in `packages/utils/src/mime.ts` inspects file headers only. Supported detected MIME types are `image/png`, `image/jpeg`, `image/gif`, and `image/webp`.\n7. If `images.autoResize` is true, `loadImageInput(...)` calls `resizeImage(...)`. Resize failures are swallowed there and the original bytes are kept.\n8. If MIME detection returned no supported image type, `execute(...)` throws `ToolError(\"inspect_image only supports PNG, JPEG, GIF, and WEBP files detected by file content.\")`.\n9. The tool calls `completeSimple(...)` with one user message containing two content parts in order:\n - `{ type: \"image\", data: imageInput.data, mimeType: imageInput.mimeType }`\n - `{ type: \"text\", text: params.question }`\n10. `systemPrompt` is a one-element array rendered from `packages/coding-agent/src/prompts/tools/inspect-image-system.md`.\n11. If the model response stop reason is `error` or `aborted`, the tool maps that to `ToolError`.\n12. `extractResponseText(...)` concatenates only `text` content blocks from the assistant message, trims the result, and fails if nothing remains.\n13. Success returns the text plus `details`; `inspectImageToolRenderer` formats the result for the TUI.\n\n## Modes / Variants\n- **Original image path**: `images.autoResize` disabled. The original file bytes are base64-encoded and sent with the detected MIME type.\n- **Auto-resized path**: `images.autoResize` enabled. `resizeImage(...)` may downscale and re-encode the image before upload.\n- **Unsupported image path**: file exists but header sniffing does not identify PNG/JPEG/GIF/WEBP. The tool returns a `ToolError` before any model call.\n- **Oversize image path**: file size exceeds 20 MiB before upload. The tool returns a `ToolError` before any model call.\n\n## Side Effects\n- Filesystem\n - Resolves and reads the target image from disk.\n - Stats the file once with `Bun.file(...).stat()` and reads it fully with `fs.readFile(...)`.\n- Network\n - Sends the final base64 image payload plus question text to the selected model through `completeSimple(...)`.\n- Session state\n - Reads session settings, active model preferences, cwd, and model registry.\n- Background work / cancellation\n - Passes the caller `AbortSignal` into `completeSimple(...)`.\n - Image preprocessing is local and not cancellation-aware in these helpers.\n\n## Limits & Caps\n- Supported detected input formats: `image/png`, `image/jpeg`, `image/gif`, `image/webp` (`SUPPORTED_IMAGE_MIME_TYPES` in `packages/utils/src/mime.ts`).\n- Metadata sniff cap: `DEFAULT_IMAGE_METADATA_HEADER_BYTES = 256 * 1024` bytes. Format detection only reads up to 256 KiB from the file header.\n- Upload input cap: `MAX_IMAGE_INPUT_BYTES = 20 * 1024 * 1024` bytes (20 MiB) in `packages/coding-agent/src/utils/image-loading.ts`.\n- Auto-resize defaults in `packages/coding-agent/src/utils/image-resize.ts`:\n - `maxWidth: 1568`\n - `maxHeight: 1568`\n - `maxBytes: 500 * 1024` bytes (500 KiB target)\n - `jpegQuality: 75`\n- Resize fast path: if the original image is already within `1568x1568` and within `maxBytes / 4` (125 KiB by default), `resizeImage(...)` returns the original bytes unchanged.\n- Resize quality ladder: after the first encode pass, lossy retries use qualities `[70, 60, 50, 40]`.\n- Resize dimension ladder: if quality reduction still misses the byte target, retries scale dimensions by `[1.0, 0.75, 0.5, 0.35, 0.25]` and stop if either dimension would fall below `100` pixels.\n- First resize pass encodes PNG, JPEG, and WebP, then keeps the smallest encoded buffer. Fallback passes encode JPEG and WebP only, again keeping the smaller output.\n- Renderer caps:\n - `INSPECT_QUESTION_PREVIEW_WIDTH = 100`\n - `INSPECT_OUTPUT_COLLAPSED_LINES = 4`\n - `INSPECT_OUTPUT_EXPANDED_LINES = 16`\n - `INSPECT_OUTPUT_LINE_WIDTH = 120`\n\n## Errors\n- Settings gate:\n - `Image submission is disabled by settings (images.blockImages=true). Disable it to use inspect_image.`\n- Model resolution / capability:\n - `Model registry is unavailable for inspect_image.`\n - `No models available for inspect_image.`\n - `Unable to resolve a model for inspect_image.`\n - `Resolved model <provider>/<id> does not support image input. Configure a vision-capable model for modelRoles.vision.`\n - `No API key available for <provider>/<id>. Configure credentials for this provider or choose another vision-capable model.`\n- Input file:\n - `Image file too large: <size> exceeds <limit> limit.` from `ImageInputTooLargeError`, remapped to `ToolError`.\n - `inspect_image only supports PNG, JPEG, GIF, and WEBP files detected by file content.` when header sniffing fails.\n- Model call:\n - `inspect_image request failed.` if the response stop reason is `error` without a provider message.\n - Provider `errorMessage` is passed through when present.\n - `inspect_image request aborted.` on aborted responses.\n - `inspect_image model returned no text output.` when the assistant message contains no text blocks after filtering.\n\nFailures surface as thrown `ToolError`s from `execute(...)`; the normal success return shape is not used for error reporting.\n\n## Notes\n- The model-facing prompt path on disk is `packages/coding-agent/src/prompts/tools/inspect-image.md`; the assignment's underscore form does not exist.\n- Format support is based on file content, not filename extension. Renaming a non-image file to `.png` does not make it valid.\n- `resolveReadPath(...)` tries macOS-specific path variants: shell-unescaped spaces, AM/PM narrow no-break-space filenames, NFD normalization, and curly-quote variants.\n- `loadImageInput(...)` also computes `textNote`, `dimensionNote`, and final `bytes`, but `inspect_image` does not include those in tool output.\n- Auto-resize can change the MIME type sent to the model. A JPEG or GIF input may be uploaded as PNG, JPEG, or WebP depending on which encoder output is smallest.\n- If `resizeImage(...)` throws or cannot decode the image, `loadImageInput(...)` silently keeps the original base64 payload instead of failing.\n",
|
|
74
74
|
"tools/irc.md": "# irc\n\n> Send short prose messages to other live agents in the current process.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/irc.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/irc.md`\n- Key collaborators:\n - `packages/coding-agent/src/registry/agent-registry.ts` — process-global live agent directory.\n - `packages/coding-agent/src/session/agent-session.ts` — side-channel reply generation and history injection.\n - `packages/coding-agent/src/prompts/system/irc-incoming.md` — no-tools auto-reply prompt.\n - `packages/coding-agent/src/tools/index.ts` — tool availability gating.\n - `packages/coding-agent/src/config/settings-schema.ts` — `irc.enabled` default.\n - `packages/coding-agent/src/modes/controllers/event-controller.ts` — renders IRC events into chat UI.\n - `packages/coding-agent/src/modes/utils/ui-helpers.ts` — formats `[IRC]` transcript lines.\n - `packages/coding-agent/src/task/executor.ts` — carries `irc.enabled` into subagents.\n\n## Inputs\n\n### `op: \"list\"`\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `op` | `\"list\"` | Yes | Lists peers visible to the caller. |\n\n### `op: \"send\"`\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `op` | `\"send\"` | Yes | Sends one message to one peer or to `\"all\"`. |\n| `to` | `string` | Yes | Peer id such as `0-Main`, or `\"all\"` for broadcast. Whitespace is trimmed. |\n| `message` | `string` | Yes | Message body. Whitespace is trimmed; empty-after-trim is rejected. |\n| `awaitReply` | `boolean` | No | Wait for prose replies. Defaults to `true` for direct messages and `false` for `to: \"all\"`. |\n\n## Outputs\n- Single-shot `AgentToolResult`; no streaming updates.\n- `content` is one text block.\n - `list` returns either `No other live agents.` or a bullet list headed by `<n> peer(s):`.\n - `send` returns delivery summary text, then optional `## Replies`, `## Failed`, and `Unknown / unavailable peers:` sections.\n- `details` is structured metadata:\n - `list`: `{ op, from, peers, channels }`\n - `send`: `{ op, from, to, delivered, replies?, failed?, notFound? }`\n- The tool does not return raw IRC frames, message ids, or a transcript object.\n\n## Flow\n1. `IrcTool.createIf` only constructs the tool when `irc.enabled` is on and the session has both an `AgentRegistry` and `getAgentId` (`packages/coding-agent/src/tools/irc.ts`).\n2. Tool discovery adds another gate in `packages/coding-agent/src/tools/index.ts`: if the caller is `0-Main` and `async.enabled` is off, `irc` is hidden because the main agent cannot talk to concurrent peers in sync mode.\n3. `execute` resolves the process-global registry and sender id. Missing either returns a text error result instead of throwing.\n4. `op: \"list\"` calls `registry.listVisibleTo(senderId)`, which exposes every other agent in flat namespace whose status is `running` or `idle` (`packages/coding-agent/src/registry/agent-registry.ts`).\n5. `list` formats human-readable lines and returns `channels` as `['all', ...peerIds]`. These are logical targets only; there is no channel join state.\n6. `op: \"send\"` trims `to` and `message`; missing values produce text errors.\n7. `send` resolves targets:\n - `to === \"all\"`: all visible peers.\n - otherwise: one exact registry id, excluding self and excluding peers not in `running`/`idle`.\n8. `send` chooses `awaitReply = params.awaitReply ?? !isBroadcast`.\n9. Each target is dispatched in parallel via `target.session.respondAsBackground(...)`. One slow or failing peer does not block dispatch to the others.\n10. `respondAsBackground` emits an `irc_message` session event, forwards a display-only relay to the main session UI, and either:\n - queues just the incoming message for later history injection when `awaitReply === false`, or\n - renders `packages/coding-agent/src/prompts/system/irc-incoming.md`, runs `runEphemeralTurn` with `toolChoice: \"none\"`, emits an auto-reply event, then queues both incoming and reply messages for history injection.\n11. Deferred injection waits until the recipient is no longer streaming; `#flushPendingBackgroundExchanges` appends the custom messages through normal `message_start`/`message_end` external events so persistence and listeners see them.\n12. `send` aggregates `delivered`, `replies`, `failed`, and `notFound`, then returns one text summary plus matching `details`.\n\n## Modes / Variants\n- `list`: enumerate visible peers and logical channels.\n- `send` direct message: one exact peer id, default synchronous auto-reply.\n- `send` broadcast: `to: \"all\"`, default fire-and-forget (`awaitReply: false`) to every visible peer.\n- `send` with `awaitReply: false`: recipient records the incoming message but does not generate a reply.\n- `send` with `awaitReply: true`: recipient performs a no-tools ephemeral LLM turn and returns prose.\n\n## Side Effects\n- Session state\n - Reads from the process-global `AgentRegistry`.\n - Emits `irc_message` session events on recipient sessions.\n - Queues IRC custom messages into recipient persisted history after the current stream finishes.\n - For non-main recipients, forwards display-only relay observations into the main session UI; these relays are not persisted to the main agent history.\n - Subagents inherit `irc.enabled` from task executor settings.\n- User-visible prompts / interactive UI\n - IRC events render as `[IRC]` transcript lines in the TUI.\n - Auto-replies are generated from `packages/coding-agent/src/prompts/system/irc-incoming.md` and explicitly forbid tool use.\n- Background work / cancellation\n - `send` starts one background `respondAsBackground` call per target.\n - The caller's `AbortSignal` is forwarded into each background reply turn.\n- Network\n - No IRC server connection.\n - When `awaitReply: true`, the recipient may make model-provider API calls through `runEphemeralTurn`.\n- Filesystem\n - No direct filesystem writes in the tool itself.\n\n## Limits & Caps\n- Availability gates:\n - `irc.enabled` defaults to `true` in `packages/coding-agent/src/config/settings-schema.ts`.\n - Main agent tool discovery suppresses `irc` when `async.enabled` is off (`packages/coding-agent/src/tools/index.ts`).\n- Visibility scope: only peers in status `running` or `idle` are addressable via `listVisibleTo`.\n- Reply execution:\n - No tools are available in auto-reply turns (`toolChoice: \"none\"` in `runEphemeralTurn`).\n - No internal timeout, retry, backoff, rate limit, or reply length cap is defined in `irc.ts`; behavior relies on the underlying model stream and any upstream API limits.\n- Flush scheduling: deferred history injection polls every `50` ms while the recipient is still streaming (`#scheduleBackgroundExchangeFlush` in `packages/coding-agent/src/session/agent-session.ts`).\n\n## Errors\n- The tool returns text errors, not thrown exceptions, for:\n - missing registry: `IRC is unavailable in this session.`\n - missing sender id: `IRC is unavailable: caller has no agent id.`\n - missing `to`: `` `to` is required for op=\"send\". ``\n - missing `message`: `` `message` is required for op=\"send\". ``\n - unknown op: `Unknown irc op.`\n- Unknown, self-addressed, non-running, and non-idle direct targets are reported under `details.notFound` and in the text footer `Unknown / unavailable peers:`.\n- If a target has no attached session, it is treated as not found.\n- Exceptions thrown by `respondAsBackground` or `runEphemeralTurn` are caught per-target and surfaced under `details.failed` as `{ id, error }`; other recipients still complete.\n- If no target succeeds, `send` still returns normally with `No recipients received the message.` and optional `failed`/`notFound` metadata.\n\n## Notes\n- This is IRC-like naming only. There are no servers, sockets, nick registration, auth handshakes, channels beyond `all`, or commands such as join/part/topic.\n- Addressing is by exact agent id from the registry; there is no fuzzy lookup or aliasing.\n- `channels` in `list` is synthetic output: `all` plus visible peer ids. Nothing is persisted across calls as channel membership.\n- Persistence is per recipient history, not per sender history. The sender gets the tool result; the recipient later sees injected custom messages on its next turn.\n- The main UI may show IRC relays for conversations it was not part of, but those relay records are explicitly display-only.\n- Because reply generation snapshots in-flight assistant text, a recipient can answer based on partially streamed context.\n- Direct self-messaging is rejected by resolving the target as unavailable.",
|
|
75
|
-
"tools/job.md": "# job\n\n> Wait for or cancel background jobs managed by the session async runtime.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/job.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/job.md`\n- Key collaborators:\n - `packages/coding-agent/src/async/job-manager.ts` — job registry, cancellation, delivery suppression.\n - `packages/coding-agent/src/async/support.ts` — feature gating for background jobs.\n - `packages/coding-agent/src/internal-urls/jobs-protocol.ts` — `jobs://` listing and per-job detail.\n - `packages/coding-agent/src/tools/bash.ts` — explicit async bash and auto-backgrounded bash jobs.\n - `packages/coding-agent/src/task/index.ts` — async task-job scheduling.\n - `packages/coding-agent/src/sdk.ts` — automatic follow-up delivery for unsuppressed completions.\n - `packages/coding-agent/src/config/settings-schema.ts` — `async.pollWaitDuration` options.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `poll` | `string[]` | No | Job ids to watch. If omitted and `cancel` is also omitted, the tool watches all running jobs. If provided, missing ids are silently filtered out before waiting. |\n| `cancel` | `string[]` | No | Job ids to cancel before any polling. Missing ids are reported as `not_found`; non-running ids as `already_completed`. |\n\n## Outputs\nThe tool returns one text block plus `details`.\n\n- `content[0].text`: markdown-like plain text sections assembled by `#buildResult(...)`:\n - `## Cancelled (N)` for cancel outcomes.\n - `## Completed (N)` for non-running jobs, including stored `resultText` and `errorText`.\n - `## Still Running (N)` for jobs still in `running`.\n- `details.jobs`: array of snapshots:\n - `id: string`\n - `type: \"bash\" | \"task\"`\n - `status: \"running\" | \"completed\" | \"failed\" | \"cancelled\"`\n - `label: string`\n - `durationMs: number`\n - optional `resultText`, `errorText`\n- `details.cancelled` appears only when `cancel` was passed; each item is `{ id, status }` where status is `\"cancelled\" | \"not_found\" | \"already_completed\"`.\n\nStreaming behavior:\n- During a polling wait, `execute(...)` emits `onUpdate(...)` every 500 ms with an empty text block and fresh `details.jobs` snapshots.\n- Final return is single-shot after a completion, timeout, abort, or immediate fast path.\n\nRelated read path:\n- Reading `jobs://` lists all current jobs.\n- Reading `jobs://<id>` renders one job with status, label, start time, duration, and stored result/error text.\n\n## Flow\n1. `JobTool.createIf(...)` in `packages/coding-agent/src/tools/job.ts` only exposes the tool when `isBackgroundJobSupportEnabled(...)` returns true for either `async.enabled` or `bash.autoBackground.enabled`.\n2. `execute(...)` fetches `session.asyncJobManager`. If absent, it returns `Async execution is disabled; no background jobs are available.`\n3. `cancel` ids are processed first:\n - `manager.getJob(id)` missing → `not_found`.\n - existing job with `status !== \"running\"` → `already_completed`.\n - running job → `manager.cancel(id)`, which sets `job.status = \"cancelled\"`, aborts the controller, and schedules eviction.\n4. Polling mode is chosen with `const shouldPoll = requestedPollIds !== undefined || cancelIds.length === 0`:\n - only `cancel` present → return immediately, no wait.\n - explicit `poll`, or no args at all → proceed to watch jobs.\n5. Watch set resolution:\n - explicit `poll` → map ids through `manager.getJob(...)` and drop missing ones.\n - no `poll` and no `cancel` → `manager.getRunningJobs()`.\n6. Empty watch set returns immediately:\n - if cancellations happened, return snapshots for the cancelled ids that still exist.\n - else return either `No matching jobs found for IDs: ...` or `No running background jobs to wait for.`\n7. If every watched job is already non-running, `#buildResult(...)` returns immediately without waiting.\n8. Otherwise the tool waits on `Promise.race(...)` across:\n - every watched running job's `job.promise`,\n - a timeout promise for `async.pollWaitDuration`,\n - the tool-call abort signal when present.\n9. Before waiting, it calls `manager.watchJobs(watchedJobIds)`. This suppresses automatic completion delivery for those ids while they are being watched.\n10. If `onUpdate` exists, a 500 ms interval sends progress snapshots from `#snapshotJobs(...)`; one snapshot is emitted immediately before entering the race.\n11. In `finally`, the tool always calls `manager.unwatchJobs(...)`, clears the timeout, and stops the progress interval.\n12. `#buildResult(...)` deduplicates jobs, snapshots current manager state, then calls `manager.acknowledgeDeliveries(...)` for every non-running job in the result. That suppresses later automatic follow-up delivery for the same completions and removes queued deliveries for those ids.\n13. The final text groups jobs by non-running vs still-running state. A timeout is not an error path; it simply returns the current snapshot.\n\n## Modes / Variants\n- Poll all running jobs: call with neither `poll` nor `cancel`.\n- Poll explicit ids: call with `poll` only.\n- Cancel only: call with `cancel` only; cancellations happen and the tool returns immediately.\n- Cancel then poll: call with both. Cancellations are applied first, then the tool watches the remaining resolved `poll` ids.\n- Read-only inspection outside the tool: `jobs://` and `jobs://<id>` expose the same manager state without waiting.\n\nSpawn paths that produce jobs:\n- `packages/coding-agent/src/tools/bash.ts`\n - `async: true` always registers a `type: \"bash\"` job with `AsyncJobManager.register(...)` and returns a start message.\n - auto-background mode (`bash.autoBackground.enabled`) starts the same managed job path for non-PTY commands, waits up to `min(bash.autoBackground.thresholdMs, timeoutMs - 1000)`, and if the command is still running returns a background-job start result instead of inline command output.\n- `packages/coding-agent/src/task/index.ts`\n - when `async.enabled` is on, the chosen agent is not blocking, and `tasks.length > 0`, each task item is registered as a `type: \"task\"` job.\n\nLifecycle and exact state names:\n- Conceptual scheduling path: `pending` (only task-progress bookkeeping before work starts) → `running` → `completed` / `failed`; cancellation changes a running async job to `cancelled`.\n- Exact `AsyncJob.status` values in `packages/coding-agent/src/async/job-manager.ts`: `\"running\" | \"completed\" | \"failed\" | \"cancelled\"`.\n- Exact per-task progress values in `packages/coding-agent/src/task/types.ts`: `\"pending\" | \"running\" | \"completed\" | \"failed\" | \"aborted\"`.\n\n## Side Effects\n- Filesystem\n - None in `job.ts` itself.\n - Jobs being observed may already have written artifacts/results through their own tool runtimes.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Reads and mutates `session.asyncJobManager` state.\n - `watchJobs(...)` / `unwatchJobs(...)` toggle delivery suppression for the watched ids.\n - `acknowledgeDeliveries(...)` marks completed ids as suppressed and removes queued deliveries for them.\n - `cancel(...)` aborts running jobs through each job's `AbortController`.\n- User-visible prompts / interactive UI\n - Polling emits periodic `onUpdate` snapshots every 500 ms.\n - Automatic job completion follow-ups are generated by `packages/coding-agent/src/sdk.ts` only for unsuppressed deliveries.\n- Background work / cancellation\n - Waiting uses a timeout plus optional tool-call abort signal.\n - Cancelling a job does not synchronously await teardown; it flips state, aborts, and returns control to the manager/job promise.\n\n## Limits & Caps\n- Poll wait duration comes from `async.pollWaitDuration` in `packages/coding-agent/src/config/settings-schema.ts`:\n - allowed values: `5s`, `10s`, `30s`, `1m`, `5m`\n - default: `30s`\n- Progress update cadence while polling: `PROGRESS_INTERVAL_MS = 500` in `packages/coding-agent/src/tools/job.ts`.\n- Async job retention default: `DEFAULT_RETENTION_MS = 5 * 60 * 1000` in `packages/coding-agent/src/async/job-manager.ts`.\n- Manager fallback max-running limit: `DEFAULT_MAX_RUNNING_JOBS = 15` in `packages/coding-agent/src/async/job-manager.ts`.\n- Session wiring clamps `async.maxJobs` to `1..100` before constructing the manager in `packages/coding-agent/src/sdk.ts`; settings default is `100` in `packages/coding-agent/src/config/settings-schema.ts`.\n- Async completion delivery retry backoff in `packages/coding-agent/src/async/job-manager.ts`:\n - base `500` ms\n - max `30_000` ms\n - jitter `< 200` ms\n - exponent capped at 8 doublings\n\n## Errors\n- Tool-disabled path is returned as normal text, not thrown: `Async execution is disabled; no background jobs are available.`\n- Polling a nonexistent id is not an exception:\n - with `poll` only, missing ids are dropped; if none remain the tool returns `No matching jobs found for IDs: ...`.\n - with `cancel`, each missing id is reported as `not_found` in `details.cancelled` and text.\n- Cancelling a non-running job is not an exception; it reports `already_completed` even if the actual status is `completed`, `failed`, or `cancelled`.\n- Tool-call abort during polling stops waiting and returns a final snapshot through `#buildResult(...)`; it does not cancel watched jobs.\n- Failures inside the underlying async work are stored on the job (`status: \"failed\"`, `errorText`) and reported in normal tool output, not rethrown by `job`.\n- Reading `jobs://<id>` for a missing job returns markdown content headed `# Job Not Found` rather than throwing.\n\n## Notes\n- `job` waits for the first watched running job to settle, not for all watched jobs. If others remain `running`, they are reported under `## Still Running`; the caller must invoke `job` again to continue waiting.\n- Delivery suppression is the key difference between snapshot and automatic delivery:\n - snapshots (`job`, reads of `jobs://`) read current manager state;\n - follow-up delivery comes from `AsyncJobManager.#enqueueDelivery(...)` and `sdk.ts` `onJobComplete`;\n - watched or acknowledged ids are suppressed via `isDeliverySuppressed(...)`.\n- `manager.cancel(id)` sets `status = \"cancelled\"` before the underlying promise settles. The job function may later populate `resultText` or `errorText`; `job-manager.ts` preserves that text but does not transition the status away from `cancelled`.\n- `jobs://` is implemented by `JobsProtocolHandler` with `immutable = true`, but each resolve call reads live manager state at access time.\n- `jobs://<id>` shows a cancellation section only when a cancelled job has `errorText`; cancelled jobs with `resultText` are not rendered with a result section there.\n- Retention eviction removes the job record, suppression flags, and watch flag together. After eviction, both `job` and reads of `jobs://<id>` behave as if the id never existed.\n",
|
|
75
|
+
"tools/job.md": "# job\n\n> Wait for or cancel background jobs managed by the session async runtime.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/job.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/job.md`\n- Key collaborators:\n - `packages/coding-agent/src/async/job-manager.ts` — job registry, cancellation, delivery suppression.\n - `packages/coding-agent/src/async/support.ts` — feature gating for background jobs.\n - `packages/coding-agent/src/tools/bash.ts` — explicit async bash and auto-backgrounded bash jobs.\n - `packages/coding-agent/src/task/index.ts` — async task-job scheduling.\n - `packages/coding-agent/src/sdk.ts` — automatic follow-up delivery for unsuppressed completions.\n - `packages/coding-agent/src/config/settings-schema.ts` — `async.pollWaitDuration` options.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `poll` | `string[]` | No | Job ids to watch. Cannot be combined with `list`. If omitted (and `cancel` is also omitted), the tool watches all running jobs. If provided, missing ids are silently filtered out before waiting. |\n| `cancel` | `string[]` | No | Job ids to cancel before any polling. Missing ids are reported as `not_found`; non-running ids as `already_completed`. |\n| `list` | `boolean` | No | Return an immediate snapshot of every job spawned by the calling agent (running + completed within retention) without waiting. Read-only — cannot be combined with `poll` or `cancel`. |\n\n## Outputs\nThe tool returns one text block plus `details`.\n\n- `content[0].text`: markdown-like plain text sections assembled by `#buildResult(...)`:\n - `## Cancelled (N)` for cancel outcomes.\n - `## Completed (N)` for non-running jobs, including stored `resultText` and `errorText`.\n - `## Still Running (N)` for jobs still in `running`.\n- `details.jobs`: array of snapshots:\n - `id: string`\n - `type: \"bash\" | \"task\"`\n - `status: \"running\" | \"completed\" | \"failed\" | \"cancelled\"`\n - `label: string`\n - `durationMs: number`\n - optional `resultText`, `errorText`\n- `details.cancelled` appears only when `cancel` was passed; each item is `{ id, status }` where status is `\"cancelled\" | \"not_found\" | \"already_completed\"`.\n\nStreaming behavior:\n- During a polling wait, `execute(...)` emits `onUpdate(...)` every 500 ms with an empty text block and fresh `details.jobs` snapshots.\n- Final return is single-shot after a completion, timeout, abort, or immediate fast path.\n\nRead-only snapshot path:\n- Calling `job` with `list: true` returns a markdown summary of every job spawned by the calling agent (running + completed within retention) without waiting.\n\n## Flow\n1. `JobTool.createIf(...)` in `packages/coding-agent/src/tools/job.ts` only exposes the tool when `isBackgroundJobSupportEnabled(...)` returns true for either `async.enabled` or `bash.autoBackground.enabled`.\n2. `execute(...)` fetches `session.asyncJobManager`. If absent, it returns `Async execution is disabled; no background jobs are available.`\n3. `cancel` ids are processed first:\n - `manager.getJob(id)` missing → `not_found`.\n - existing job with `status !== \"running\"` → `already_completed`.\n - running job → `manager.cancel(id)`, which sets `job.status = \"cancelled\"`, aborts the controller, and schedules eviction.\n4. Polling mode is chosen with `const shouldPoll = requestedPollIds !== undefined || cancelIds.length === 0`:\n - only `cancel` present → return immediately, no wait.\n - explicit `poll`, or no args at all → proceed to watch jobs.\n5. Watch set resolution:\n - explicit `poll` → map ids through `manager.getJob(...)` and drop missing ones.\n - no `poll` and no `cancel` → `manager.getRunningJobs()`.\n6. Empty watch set returns immediately:\n - if cancellations happened, return snapshots for the cancelled ids that still exist.\n - else return either `No matching jobs found for IDs: ...` or `No running background jobs to wait for.`\n7. If every watched job is already non-running, `#buildResult(...)` returns immediately without waiting.\n8. Otherwise the tool waits on `Promise.race(...)` across:\n - every watched running job's `job.promise`,\n - a timeout promise for `async.pollWaitDuration`,\n - the tool-call abort signal when present.\n9. Before waiting, it calls `manager.watchJobs(watchedJobIds)`. This suppresses automatic completion delivery for those ids while they are being watched.\n10. If `onUpdate` exists, a 500 ms interval sends progress snapshots from `#snapshotJobs(...)`; one snapshot is emitted immediately before entering the race.\n11. In `finally`, the tool always calls `manager.unwatchJobs(...)`, clears the timeout, and stops the progress interval.\n12. `#buildResult(...)` deduplicates jobs, snapshots current manager state, then calls `manager.acknowledgeDeliveries(...)` for every non-running job in the result. That suppresses later automatic follow-up delivery for the same completions and removes queued deliveries for those ids.\n13. The final text groups jobs by non-running vs still-running state. A timeout is not an error path; it simply returns the current snapshot.\n\n## Modes / Variants\n- Poll all running jobs: call with neither `poll` nor `cancel`.\n- Poll explicit ids: call with `poll` only.\n- Cancel only: call with `cancel` only; cancellations happen and the tool returns immediately.\n- Cancel then poll: call with both. Cancellations are applied first, then the tool watches the remaining resolved `poll` ids.\n- Read-only inspection: call with `list: true` for the same snapshot data without waiting on completion.\n\nSpawn paths that produce jobs:\n- `packages/coding-agent/src/tools/bash.ts`\n - `async: true` always registers a `type: \"bash\"` job with `AsyncJobManager.register(...)` and returns a start message.\n - auto-background mode (`bash.autoBackground.enabled`) starts the same managed job path for non-PTY commands, waits up to `min(bash.autoBackground.thresholdMs, timeoutMs - 1000)`, and if the command is still running returns a background-job start result instead of inline command output.\n- `packages/coding-agent/src/task/index.ts`\n - when `async.enabled` is on, the chosen agent is not blocking, and `tasks.length > 0`, each task item is registered as a `type: \"task\"` job.\n\nLifecycle and exact state names:\n- Conceptual scheduling path: `pending` (only task-progress bookkeeping before work starts) → `running` → `completed` / `failed`; cancellation changes a running async job to `cancelled`.\n- Exact `AsyncJob.status` values in `packages/coding-agent/src/async/job-manager.ts`: `\"running\" | \"completed\" | \"failed\" | \"cancelled\"`.\n- Exact per-task progress values in `packages/coding-agent/src/task/types.ts`: `\"pending\" | \"running\" | \"completed\" | \"failed\" | \"aborted\"`.\n\n## Side Effects\n- Filesystem\n - None in `job.ts` itself.\n - Jobs being observed may already have written artifacts/results through their own tool runtimes.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Reads and mutates `session.asyncJobManager` state.\n - `watchJobs(...)` / `unwatchJobs(...)` toggle delivery suppression for the watched ids.\n - `acknowledgeDeliveries(...)` marks completed ids as suppressed and removes queued deliveries for them.\n - `cancel(...)` aborts running jobs through each job's `AbortController`.\n- User-visible prompts / interactive UI\n - Polling emits periodic `onUpdate` snapshots every 500 ms.\n - Automatic job completion follow-ups are generated by `packages/coding-agent/src/sdk.ts` only for unsuppressed deliveries.\n- Background work / cancellation\n - Waiting uses a timeout plus optional tool-call abort signal.\n - Cancelling a job does not synchronously await teardown; it flips state, aborts, and returns control to the manager/job promise.\n\n## Limits & Caps\n- Poll wait duration comes from `async.pollWaitDuration` in `packages/coding-agent/src/config/settings-schema.ts`:\n - allowed values: `5s`, `10s`, `30s`, `1m`, `5m`\n - default: `30s`\n- Progress update cadence while polling: `PROGRESS_INTERVAL_MS = 500` in `packages/coding-agent/src/tools/job.ts`.\n- Async job retention default: `DEFAULT_RETENTION_MS = 5 * 60 * 1000` in `packages/coding-agent/src/async/job-manager.ts`.\n- Manager fallback max-running limit: `DEFAULT_MAX_RUNNING_JOBS = 15` in `packages/coding-agent/src/async/job-manager.ts`.\n- Session wiring clamps `async.maxJobs` to `1..100` before constructing the manager in `packages/coding-agent/src/sdk.ts`; settings default is `100` in `packages/coding-agent/src/config/settings-schema.ts`.\n- Async completion delivery retry backoff in `packages/coding-agent/src/async/job-manager.ts`:\n - base `500` ms\n - max `30_000` ms\n - jitter `< 200` ms\n - exponent capped at 8 doublings\n\n## Errors\n- Tool-disabled path is returned as normal text, not thrown: `Async execution is disabled; no background jobs are available.`\n- Polling a nonexistent id is not an exception:\n - with `poll` only, missing ids are dropped; if none remain the tool returns `No matching jobs found for IDs: ...`.\n - with `cancel`, each missing id is reported as `not_found` in `details.cancelled` and text.\n- Cancelling a non-running job is not an exception; it reports `already_completed` even if the actual status is `completed`, `failed`, or `cancelled`.\n- Tool-call abort during polling stops waiting and returns a final snapshot through `#buildResult(...)`; it does not cancel watched jobs.\n- Failures inside the underlying async work are stored on the job (`status: \"failed\"`, `errorText`) and reported in normal tool output, not rethrown by `job`.\n- Calling `list: true` against an empty manager returns a normal empty-list result rather than throwing; missing ids passed to `poll` are silently filtered.\n\n## Notes\n- `job` waits for the first watched running job to settle, not for all watched jobs. If others remain `running`, they are reported under `## Still Running`; the caller must invoke `job` again to continue waiting.\n- Delivery suppression is the key difference between snapshot and automatic delivery:\n - snapshots (`job` calls with `poll` or `list: true`) read current manager state;\n - follow-up delivery comes from `AsyncJobManager.#enqueueDelivery(...)` and `sdk.ts` `onJobComplete`;\n - watched or acknowledged ids are suppressed via `isDeliverySuppressed(...)`.\n- `manager.cancel(id)` sets `status = \"cancelled\"` before the underlying promise settles. The job function may later populate `resultText` or `errorText`; `job-manager.ts` preserves that text but does not transition the status away from `cancelled`.\n- Retention eviction removes the job record, suppression flags, and watch flag together. After eviction, both `job` calls and `list: true` snapshots behave as if the id never existed.\n",
|
|
76
76
|
"tools/lsp.md": "# lsp\n\n> Query language servers for diagnostics, navigation, symbols, renames, code actions, capabilities, and raw requests.\n\n## Source\n- Entry: `packages/coding-agent/src/lsp/index.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/lsp.md`\n- Key collaborators:\n - `packages/coding-agent/src/lsp/client.ts` — client process lifecycle and JSON-RPC\n - `packages/coding-agent/src/lsp/config.ts` — config loading, auto-detect, server selection\n - `packages/coding-agent/src/lsp/lspmux.ts` — optional `lspmux` command wrapping\n - `packages/coding-agent/src/lsp/edits.ts` — apply `WorkspaceEdit` and text edits\n - `packages/coding-agent/src/lsp/utils.ts` — URI conversion, symbol resolution, formatting, glob expansion\n - `packages/coding-agent/src/lsp/types.ts` — tool schema and protocol types\n - `packages/coding-agent/src/lsp/clients/index.ts` — custom linter client cache/factory\n - `packages/coding-agent/src/lsp/clients/lsp-linter-client.ts` — LSP-backed linter adapter\n - `packages/coding-agent/src/lsp/clients/biome-client.ts` — Biome CLI diagnostics/formatting adapter\n - `packages/coding-agent/src/lsp/clients/swiftlint-client.ts` — SwiftLint CLI diagnostics adapter\n - `packages/coding-agent/src/tools/index.ts` — tool registration and `lsp.enabled` gating\n - `packages/coding-agent/src/tools/tool-timeouts.ts` — timeout defaults and clamping\n - `packages/coding-agent/src/lsp/defaults.json` — built-in server definitions for auto-detect\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `action` | string enum | Yes | One of `diagnostics`, `definition`, `references`, `hover`, `symbols`, `rename`, `rename_file`, `code_actions`, `type_definition`, `implementation`, `status`, `reload`, `capabilities`, `request`. |\n| `file` | string | No | File path; for `diagnostics` also a glob; for workspace forms use `\"*\"`; for `rename_file` this is the source path. |\n| `line` | number | No | 1-indexed line number for position-based actions. Defaults to `1` on the single-file action path. |\n| `symbol` | string | No | Substring used to resolve the column on `line`. Supports `name#N` occurrence selectors; `N` is 1-indexed and defaults to `1`. |\n| `query` | string | No | Workspace symbol query, code-action selector/filter, or LSP method name for `action=request`. |\n| `new_name` | string | No | Required for `rename` and `rename_file`. |\n| `apply` | boolean | No | For `rename`/`rename_file`, apply unless explicitly `false`. For `code_actions`, list unless explicitly `true`. |\n| `timeout` | number | No | Seconds, clamped by `clampTimeout(\"lsp\", ...)` to `5..60`, default `20`. |\n| `payload` | string | No | JSON string for `action=request`; overrides auto-built params. |\n\n## Outputs\n- Single-shot `AgentToolResult`.\n- `content` is always one text block: `[{ type: \"text\", text: string }]`.\n- `details` is `LspToolDetails`: `action`, `success`, optional `serverName`, optional original `request`.\n- No streaming updates.\n- No artifact URIs or background jobs.\n- Many validation failures are returned as ordinary text results with `details.success: false`; aborts throw `ToolAbortError` instead.\n\n## Flow\n1. `packages/coding-agent/src/tools/index.ts` registers `lsp: LspTool.createIf`; session creation also gates it behind `session.enableLsp !== false` and `settings.get(\"lsp.enabled\")`.\n2. `LspTool.execute()` in `packages/coding-agent/src/lsp/index.ts` clamps `timeout` with `clampTimeout(\"lsp\", ...)`, builds an `AbortSignal.timeout(...)`, and combines it with the caller signal.\n3. `getConfig()` loads and caches `LspConfig` per cwd, applies idle-timeout config via `setIdleTimeout()`, and reuses the cached config on later calls.\n4. Config loading in `packages/coding-agent/src/lsp/config.ts` merges `defaults.json` with JSON/YAML overrides from project, project config dirs, user config dirs, plugin roots, and home; if there are no overrides it auto-detects servers from root markers plus executable discovery.\n5. Server routing uses `getServersForFile()` / `getServerForFile()` from `config.ts`: extension or basename match, then sort primary servers before linters. `index.ts` further filters custom linter clients out of navigation/refactor paths with `getLspServersForFile()` / `getLspServerForFile()`.\n6. `getOrCreateClient()` in `client.ts` creates one process per `command:cwd`, optionally wraps supported commands with `lspmux`, spawns the server, starts the background message reader, sends `initialize`, stores server capabilities, then sends `initialized`.\n7. The message reader in `client.ts` parses LSP frames, resolves pending requests, caches `publishDiagnostics`, tracks `$/progress` tokens for project-load completion, answers `workspace/configuration`, and applies `workspace/applyEdit` requests through `applyWorkspaceEdit()`.\n8. File-scoped actions call `ensureFileOpen()` before requests. Column resolution uses `resolveSymbolColumn()` from `utils.ts`: read the target file, pick first non-whitespace when `symbol` is omitted, otherwise find the exact or case-insensitive match on the target line and honor `#N` occurrence selectors.\n9. Actions dispatch in `LspTool.execute()` through dedicated branches: workspace-only branches (`status`, some `diagnostics`, workspace `symbols`, workspace `reload`, `capabilities`, `request`) run before the single-file switch; all other single-file actions share one client lookup and `switch(action)`.\n10. Requests go through `sendRequest()` in `client.ts`, which allocates an incrementing JSON-RPC id, installs abort and timeout handling, sends `$/cancelRequest` on abort, and rejects on timeout or process exit.\n11. Actions that return edits either preview with `formatWorkspaceEdit()` or apply with `applyWorkspaceEdit()` from `edits.ts`; `rename_file` also performs the filesystem rename and then sends `workspace/didRenameFiles`.\n12. Non-abort failures inside the single-file action block are converted to `LSP error: ...`; many precondition failures return explicit text without throwing.\n\n## Modes / Variants\n### Routing and workspace scope\n- `file: \"*\"` is only special for `diagnostics`, `symbols`, and `reload`.\n- `status` ignores `file`.\n- `capabilities` with omitted `file` or `\"*\"` inspects all non-custom LSP servers; with a concrete file it scopes to matching non-custom servers.\n- `request` with omitted `file` or `\"*\"` chooses the first available non-custom LSP server; with a concrete file it chooses that file's primary non-linter server.\n- `rename_file` sends `workspace/willRenameFiles` and `workspace/didRenameFiles` to every non-custom LSP server from `getLspServers(config)`, not just one file-scoped server.\n- Diagnostics are the only tool action that queries both normal LSP servers and custom linter clients (`BiomeClient`, `SwiftLintClient`, or `LspLinterClient`).\n\n### `diagnostics`\n**Inputs**\n- Required: `file`, unless using workspace mode with `file: \"*\"`.\n- Optional: `timeout`.\n\n**Execution**\n- `file: \"*\"`: `runWorkspaceDiagnostics()` detects project type from root markers and runs one subprocess command: Rust `cargo check --message-format=short`, TypeScript `npx tsc --noEmit`, Go `go build ./...`, Python `pyright`.\n- Concrete file or glob: `resolveDiagnosticTargets()` treats non-globs as one target, otherwise expands a `Bun.Glob` up to `MAX_GLOB_DIAGNOSTIC_TARGETS`.\n- Per file, every matching server runs: custom clients call `lint(file)`; real LSP servers optionally wait for project load, capture `diagnosticsVersion`, `refreshFile()`, then `waitForDiagnostics()` for fresh `publishDiagnostics`.\n- Results are deduplicated by range+message and severity-sorted.\n\n**Output text**\n- Single target with no issues: `OK`.\n- Single target with issues: `<summary>:\\n<grouped diagnostics>`.\n- Batch/glob target: one section per file, plus an initial truncation warning when the glob exceeds the file cap.\n- Workspace mode: `Workspace diagnostics (<detected description>):\\n<command output>`.\n\n### `definition`\n**Inputs**\n- Required: `file`.\n- Optional: `line`, `symbol`, `timeout`.\n\n**Execution**\n- Sends `textDocument/definition` with `{ textDocument, position }`.\n- Accepts `Location`, `Location[]`, `LocationLink`, or `LocationLink[]`; `normalizeLocationResult()` converts `LocationLink` to `targetSelectionRange ?? targetRange`.\n- Waits for project load before the request.\n\n**Output text**\n- `No definition found` or `Found N definition(s):` followed by `file:line:col` and one context line above/below each location.\n\n### `type_definition`\nSame as `definition`, but sends `textDocument/typeDefinition` and reports `type definition(s)`.\n\n### `implementation`\nSame as `definition`, but sends `textDocument/implementation` and reports `implementation(s)`.\n\n### `references`\n**Inputs**\n- Required: `file`.\n- Optional: `line`, `symbol`, `timeout`.\n\n**Execution**\n- Sends `textDocument/references` with `includeDeclaration: true`.\n- For project-aware servers, retries up to `REFERENCES_RETRY_COUNT` times when the only hit is the queried declaration; between retries it waits for project load and sleeps `REFERENCES_RETRY_DELAY_MS`.\n- First `REFERENCE_CONTEXT_LIMIT` references include surrounding context; the rest are location-only.\n\n**Output text**\n- `No references found` or `Found N reference(s):` with contextual entries first, then `... M additional reference(s) shown without context` when truncated.\n\n### `hover`\n**Inputs**\n- Required: `file`.\n- Optional: `line`, `symbol`, `timeout`.\n\n**Execution**\n- Sends `textDocument/hover`.\n- `extractHoverText()` flattens strings, markup content, marked-string objects, or arrays into plain text.\n\n**Output text**\n- `No hover information` or the extracted hover text.\n\n### `symbols`\n**Inputs**\n- Workspace mode: `file: \"*\"` or omitted file on the early workspace branch, plus required `query`.\n- Document mode: required `file`.\n- Optional: `timeout`.\n\n**Execution**\n- Workspace mode sends `workspace/symbol` to every non-custom LSP server, post-filters matches with `filterWorkspaceSymbols()`, deduplicates with `dedupeWorkspaceSymbols()`, then truncates to `WORKSPACE_SYMBOL_LIMIT`.\n- Document mode sends `textDocument/documentSymbol` to the primary server. If the first item has `selectionRange`, it formats hierarchical `DocumentSymbol`s; otherwise it formats flat `SymbolInformation`s.\n\n**Output text**\n- Workspace mode: `Found N symbol(s) matching \"query\":` plus formatted `name @ file:line:col`, with an omission line when over the limit.\n- Document mode: `Symbols in <file>:` plus hierarchical or flat symbol lines.\n\n### `rename`\n**Inputs**\n- Required: `file`, `new_name`.\n- Optional: `line`, `symbol`, `apply`, `timeout`.\n\n**Execution**\n- Waits for project load, sends `textDocument/rename`, receives a `WorkspaceEdit`.\n- `apply !== false` applies edits immediately with `applyWorkspaceEdit()`.\n- `apply === false` renders a preview with `formatWorkspaceEdit()`.\n\n**Output text**\n- `Rename returned no edits`, `Applied rename:` plus applied change lines, or `Rename preview:` plus summarized edits.\n\n### `rename_file`\n**Inputs**\n- Required: `file` source path, `new_name` destination path.\n- Optional: `apply`, `timeout`.\n\n**Execution**\n- Resolves absolute source and destination, rejects identical paths, missing source, existing destination, empty rename set, or directories with more than `MAX_RENAME_PAIRS` files.\n- `enumerateRenamePairs()` returns one `{oldUri,newUri}` pair for a file or walks every regular file in a directory tree.\n- Sends `workspace/willRenameFiles` with `{ files: pairs }` to every non-custom LSP server; collects returned `WorkspaceEdit`s and server notes.\n- Preview mode (`apply === false`) only formats those edits.\n- Apply mode runs each returned `WorkspaceEdit`, renames the source path on disk, sends `textDocument/didClose` for every renamed open file, deletes those `openFiles` entries, then sends `workspace/didRenameFiles`.\n\n**Output text**\n- Preview: `Rename preview: <file-count label> → <dest>` plus per-server edit summaries and optional server notes.\n- Apply: `Renamed <file-count label> → <dest>` plus applied edit summaries, filesystem rename line, and optional server notes.\n\n### `code_actions`\n**Inputs**\n- Required: `file`.\n- Optional: `line`, `symbol`, `query`, `apply`, `timeout`.\n\n**Execution**\n- Reads cached diagnostics for the open URI from `client.diagnostics` and sends `textDocument/codeAction` for a zero-width range at the resolved position.\n- When `apply !== true`, `query` is passed as `context.only: [query]`; this is a server-side kind filter.\n- When `apply === true`, `query` becomes a required client-side selector: either a zero-based numeric index or a case-insensitive substring of the action title.\n- Applying a `CodeAction` uses `applyCodeAction()`: optionally `codeAction/resolve`, then `applyWorkspaceEdit(edit)`, then optional `workspace/executeCommand`.\n- Applying a bare `Command` only runs `workspace/executeCommand`.\n\n**Output text**\n- List mode: `N code action(s):` plus `index: [kind] title` lines.\n- Apply mode success: `Applied \"title\":` plus `Workspace edit:` and/or `Executed command(s):` sections.\n- Apply mode miss: `No code action matches \"query\". Available actions:`.\n- Apply mode with no edit/command: `Action \"title\" has no workspace edit or command to apply`.\n\n### `status`\n**Inputs**\n- None.\n\n**Execution**\n- Reads configured servers from cached `LspConfig`, not `getActiveClients()`.\n- Calls `detectLspmux()` and appends status text when `lspmux` is installed.\n\n**Output text**\n- `Active language servers: ...` or `No language servers configured for this project`, optionally followed by `lspmux: active (multiplexing enabled)` or `lspmux: installed but server not running`.\n\n### `reload`\n**Inputs**\n- Workspace mode: `file: \"*\"` or omitted `file`.\n- Single-file mode: required `file`.\n- Optional: `timeout`.\n\n**Execution**\n- Workspace mode reloads every non-custom LSP server.\n- Single-file mode reloads the primary server for that file.\n- `reloadServer()` tries `rust-analyzer/reloadWorkspace`, then `workspace/didChangeConfiguration` with `{ settings: {} }`; if neither works it kills the process so the next request cold-starts a new client.\n\n**Output text**\n- One line per server: `Reloaded <server>`, `Restarted <server>`, or `Failed to reload <server>: ...`.\n\n### `capabilities`\n**Inputs**\n- Optional: `file`, `timeout`.\n\n**Execution**\n- With a concrete `file`, inspects matching non-custom servers for that file.\n- With omitted `file` or `\"*\"`, inspects every non-custom configured server.\n- Starts servers as needed and dumps `client.serverCapabilities ?? {}` as pretty JSON.\n\n**Output text**\n- Per server: `<server>:` followed by indented `capabilities: { ... }`, or `<server>: failed to start (...)`.\n\n### `request`\n**Inputs**\n- Required: `query` method name.\n- Optional: `file`, `line`, `symbol`, `payload`, `timeout`.\n\n**Execution**\n- Chooses one non-custom server: file-scoped primary server, otherwise the first configured non-custom server.\n- Param building precedence:\n 1. If `payload` is present, parse JSON and use it verbatim.\n 2. Else if `file` is concrete and `line` is present, build `{ textDocument: { uri }, position: { line: line - 1, character } }` using `resolveSymbolColumn()`.\n 3. Else if `file` is concrete, build `{ textDocument: { uri } }`.\n 4. Else use `{}`.\n- Opens the file first when `file` is concrete.\n\n**Output text**\n- Success: `<server> ← <method>:\\n<formatted result>`, where non-string results are `JSON.stringify(..., null, 2)` and nullish values become `null`.\n- Failure: `LSP error from <server> on <method>: ...`.\n\n## Side Effects\n- Filesystem\n - Reads config files, target files, and root markers.\n - `rename` and `code_actions` may edit/create/delete/rename files via `applyWorkspaceEdit()`.\n - `rename_file` always renames the source path on disk in apply mode.\n - Server-initiated `workspace/applyEdit` requests also mutate files through `applyWorkspaceEdit()`.\n- Network\n - None directly; communication is local stdio JSON-RPC to subprocesses.\n- Subprocesses / native bindings\n - Spawns language servers with `ptree.spawn()`.\n - Workspace diagnostics spawns `cargo`, `npx`, `go`, or `pyright`.\n - `BiomeClient` and `SwiftLintClient` spawn CLI tools.\n - Optional `lspmux` detection spawns `lspmux status`; supported servers may be wrapped through `lspmux client`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Caches config per cwd in `configCache`.\n - Caches LSP clients per `command:cwd`, with `pendingRequests`, `diagnostics`, `openFiles`, `serverCapabilities`, and project-load state.\n - Caches custom linter clients by `serverName:cwd`.\n - Updates client `lastActivity`; optional idle-timeout cleanup is driven by `setIdleTimeout()`.\n- Background work / cancellation\n - Every request has an abortable timeout signal.\n - Aborting an in-flight LSP request sends `$/cancelRequest`.\n - Background message readers persist for each live client until process exit/shutdown.\n\n## Limits & Caps\n- Tool timeout clamp: default `20`, min `5`, max `60` seconds — `TOOL_TIMEOUTS.lsp` in `packages/coding-agent/src/tools/tool-timeouts.ts`.\n- LSP request default timeout inside `sendRequest()`: `30_000ms` — `DEFAULT_REQUEST_TIMEOUT_MS` in `packages/coding-agent/src/lsp/client.ts`.\n- Warmup initialize timeout default: `5_000ms` — `WARMUP_TIMEOUT_MS` in `packages/coding-agent/src/lsp/client.ts`.\n- Project-load wait fallback: `15_000ms` — `PROJECT_LOAD_TIMEOUT_MS` in `packages/coding-agent/src/lsp/client.ts`.\n- Idle-client sweep interval when enabled: `60_000ms` — `IDLE_CHECK_INTERVAL_MS` in `packages/coding-agent/src/lsp/client.ts`.\n- Diagnostic message output cap: first `50` messages — `DIAGNOSTIC_MESSAGE_LIMIT` in `packages/coding-agent/src/lsp/index.ts`.\n- Single-file diagnostics wait: `3_000ms` — `SINGLE_DIAGNOSTICS_WAIT_TIMEOUT_MS`.\n- Batch/glob diagnostics wait per file: `400ms` — `BATCH_DIAGNOSTICS_WAIT_TIMEOUT_MS`.\n- Glob diagnostic target cap: first `20` matches — `MAX_GLOB_DIAGNOSTIC_TARGETS`.\n- Workspace symbol cap: first `200` entries — `WORKSPACE_SYMBOL_LIMIT`.\n- Reference context cap: first `50` references include source context — `REFERENCE_CONTEXT_LIMIT`.\n- References retry count: `2` retries, `250ms` backoff — `REFERENCES_RETRY_COUNT`, `REFERENCES_RETRY_DELAY_MS`.\n- Directory rename cap: `1_000` file pairs — `MAX_RENAME_PAIRS`.\n- `detectLspmux()` state cache TTL: `5 * 60 * 1000ms`; liveness check timeout: `1_000ms` — `STATE_CACHE_TTL_MS`, `LIVENESS_TIMEOUT_MS` in `packages/coding-agent/src/lsp/lspmux.ts`.\n- Workspace diagnostics output cap: first `50` lines from the subprocess.\n\n## Errors\n- Missing or invalid inputs are usually returned as text with `details.success: false`, not thrown:\n - missing `file`/`query`/`new_name`\n - invalid JSON in `payload`\n - no matching server\n - invalid `rename_file` source/destination conditions\n- `resolveSymbolColumn()` throws explicit errors for missing files, missing symbols, and out-of-bounds `#N` selectors; these surface as `LSP error: ...` or request-specific error text.\n- `sendRequest()` rejects on timeout with `LSP request <method> timed out after <ms>ms`.\n- Client process exit rejects all pending requests with an exit-code/stderr error assembled in `getOrCreateClient()`.\n- Single-file action failures inside the main `try` become `LSP error: <message>`.\n- `request` has its own error envelope: `LSP error from <server> on <method>: <message>`.\n- Some server failures are intentionally softened:\n - diagnostics continue when one server fails\n - `rename_file` suppresses `workspace/willRenameFiles` “method not found” errors and records other server errors as notes\n - `code_actions` ignores `codeAction/resolve` failures and applies unresolved actions when possible\n- Aborts are not converted to text: `ToolAbortError` is rethrown.\n\n## Notes\n- `status` reports configured/available servers from `LspConfig`, not currently active client processes from `getActiveClients()`.\n- `getLspServerForFile()` excludes `createClient` adapters and linter-only servers; navigation/refactor actions never target Biome/SwiftLint custom clients.\n- `getServersForFile()` matches both file extensions and exact basenames from `fileTypes`; config can target names like `Dockerfile` if present.\n- `symbol` matching is exact first, then case-insensitive, and falls back to the Nth occurrence on the specified line only; it never scans other lines.\n- `code_actions` uses `query` in two different ways: server-side `context.only` filter in list mode, client-side title/index selector in apply mode.\n- `rename` and `rename_file` default to apply. Preview requires `apply: false`.\n- `request` with `file: \"*\"` is treated the same as omitted `file`: it does not build workspace-specific params.\n- `reload` does not recreate a client immediately after killing it; the next request triggers reinitialization.\n- `workspace/applyEdit` can apply edits initiated by the server outside the direct tool action result path.\n- `detectLspmux()` can be disabled with `PI_DISABLE_LSPMUX=1`; only `rust-analyzer` is in `DEFAULT_SUPPORTED_SERVERS`.\n- `configCache` is per-process and never auto-invalidated; config changes require a fresh process to be observed by `getConfig()` callers.",
|
|
77
|
-
"tools/read.md": "# read\n\n> Read files, directories, archives, SQLite databases, internal resources, images, documents, and URLs through one `path` string.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/read.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/read.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/path-utils.ts` — split `path` from trailing selectors; normalize local paths.\n - `packages/coding-agent/src/tools/archive-reader.ts` — detect `archive.ext:inner/path`, index archives, list/read entries.\n - `packages/coding-agent/src/tools/sqlite-reader.ts` — detect SQLite targets, parse selectors, render tables.\n - `packages/coding-agent/src/tools/fetch.ts` — URL parsing, fetch/render pipeline, URL cache/artifacts.\n - `packages/coding-agent/src/internal-urls/router.ts` — resolve `agent://`, `artifact://`, `jobs://`, `local://`, `mcp://`, `memory://`, `pi://`, `rule://`, `skill://`.\n - `packages/coding-agent/src/edit/notebook.ts` — convert `.ipynb` to editable `# %% [...] cell:N` text.\n - `packages/coding-agent/src/utils/file-display-mode.ts` — decide hashline vs line-number vs raw display.\n - `packages/coding-agent/src/workspace-tree.ts` — render directory trees.\n - `packages/coding-agent/src/edit/file-read-cache.ts` — cache read lines for later hashline edit recovery.\n - `packages/coding-agent/src/tools/index.ts` — registers `read: s => new ReadTool(s)`.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `path` | `string` | Yes | Filesystem path, internal URL, or web URL. May end with a trailing selector such as `:50-100` or `:raw`. |\n\n### Selector grammar\n\nFor normal file-like reads, `splitPathAndSel()` in `packages/coding-agent/src/tools/path-utils.ts` recognizes the final suffix only when it matches one of these forms:\n\n| Suffix | Meaning |\n| --- | --- |\n| `:raw` | Raw/verbatim mode. Disables structural summaries and line prefixes. |\n| `:N` / `:LN` | Start at 1-indexed line `N`, open-ended. |\n| `:A-B` / `:LA-LB` | Inclusive 1-indexed line range. |\n| `:A+C` / `:LA+LC` | `C` lines starting at `A`; tool converts this to end line `A + C - 1`. |\n| `:range:raw` or `:raw:range` | Same line selection, but raw output. |\n\nValidation in `parseLineRangeChunk()`:\n- line numbers are 1-indexed; `:0` throws.\n- `+` counts must be `>= 1`.\n- `-` end must be `>= start`.\n\nSelector parsing intentionally falls through for unrecognized trailing `:...`; archive and SQLite paths consume their own colon syntax.\n\nURL selectors are parsed separately in `packages/coding-agent/src/tools/fetch.ts` and support only `:raw`, `:N`, `:A-B`, and `:A+C` — no optional `L` prefix there.\n\n## Outputs\n- Single-shot `AgentToolResult` built through `toolResult()` in `packages/coding-agent/src/tools/tool-result.ts`.\n- `content` is usually one text block. Image reads may return `[text, image]`.\n- `details` is path-dependent. `ReadToolDetails` may include:\n - `kind: \"file\" | \"url\"` (URL path uses `kind: \"url\"`; file reads usually omit `kind`)\n - `isDirectory`\n - `resolvedPath`\n - `suffixResolution`\n - URL fields: `url`, `finalUrl`, `contentType`, `method`, `notes`\n - `truncation`\n - `displayContent` (unprefixed text + starting line for TUI rendering)\n - `summary` (`lines`, `elidedSpans`) for structural summaries\n - `meta` from `packages/coding-agent/src/tools/output-meta.ts`\n- `details.meta.source` is set to the backing path, URL, or internal URL.\n- `details.meta.truncation` carries shown range, total lines/bytes, next offset, and optional `artifactId` for cached URL output.\n- Directory/archive listings and SQLite table lists also set `details.meta.limits` when list limits trigger.\n\n## Flow\n1. `ReadTool.execute()` accepts `{ path }`. `file://...` inputs are expanded first with `expandPath()`.\n2. It tries URL handling first via `parseReadUrlTarget()` from `packages/coding-agent/src/tools/fetch.ts`.\n - Plain URL reads call `executeReadUrl()`.\n - URL reads with line selectors load or refresh the URL cache with `loadReadUrlCacheEntry()` and paginate the cached text locally with `#buildInMemoryTextResult()`.\n3. If not a web URL, it checks `session.internalRouter.canHandle(...)`.\n - Internal URLs are resolved with `internalRouter.resolve()`.\n - `agent://` query extraction (`/path` or `?q=`) bypasses pagination and returns the extracted content directly.\n - Other internal resources are paginated in-memory by `#buildInMemoryTextResult()`.\n4. It tries archive resolution next with `#resolveArchiveReadPath()`.\n - `parseArchivePathCandidates()` scans for `.tar`, `.tar.gz`, `.tgz`, or `.zip` anywhere before `:sub/path`.\n - On success, `#readArchive()` either lists a directory or decodes an entry as UTF-8 text.\n5. It tries SQLite resolution with `#resolveSqliteReadPath()`.\n - `parseSqlitePathCandidates()` scans for `.sqlite`, `.sqlite3`, `.db`, `.db3` before any `:table`, `:key`, or `?query` suffix.\n - `#readSqlite()` dispatches on `parseSqliteSelector()`.\n6. Otherwise it treats the input as a local filesystem path.\n - `resolveReadPath()` expands `~`, resolves relative to session cwd, treats bare `/` as session cwd, and retries macOS screenshot/NFD/curly-quote variants.\n - If the path does not exist, `findUniqueSuffixMatch()` does a workspace glob-based unique suffix lookup (skipped for remote mounts).\n7. Directories go through `#readDirectory()`.\n8. Non-directories branch by content type:\n - image metadata / inline image\n - editable notebook text\n - markit-converted document\n - structural summary for parseable code/prose\n - streamed text/line-range read\n9. Local text reads are streamed by `streamLinesFromFile()` rather than loading the whole file. The tool adds up to 3 lines of context before/after explicit bounded ranges.\n10. Non-empty contiguous local reads are recorded into `getFileReadCache(session)` for later hashline edit recovery.\n11. If suffix resolution happened, the first text block is prefixed with `[Path '...' not found; resolved to '...' via suffix match]`.\n\n## Modes / Variants\n\n### Local text files\n- No selector: if summarization is enabled and the file is small enough, `#trySummarize()` calls `summarizeCode()`.\n - Guards: file size `<= 2 MiB` (`MAX_SUMMARY_BYTES`), line count `<= 20_000` (`MAX_SUMMARY_LINES`).\n - Summary output keeps selected declarations and replaces elided spans with `...`.\n - When an elided block sits between matching brace lines, `#renderSummary()` may merge them into one anchored line rather than emitting separate opener/closer lines.\n- Explicit selector or summarization miss: streamed text read.\n - Default open-ended limit is `min(session setting read.defaultLimit, DEFAULT_MAX_LINES)`.\n - Explicit ranges expand by `RANGE_LEADING_CONTEXT_LINES = 1` / `RANGE_TRAILING_CONTEXT_LINES = 3` on the constrained sides only.\n - Non-raw output uses `resolveFileDisplayMode()`:\n - hashline anchors when edit mode is hashline, read is not raw, source is mutable, edit tool exists, and `readHashLines !== false`\n - otherwise optional line numbers when `readLineNumbers === true`\n - raw mode suppresses both\n- Prefix format in hashline mode is `lineNumber + 2-char line hash + \"|\"`, e.g. `41th|def alpha():`, from `formatHashLine()` in `packages/coding-agent/src/hashline/hash.ts`.\n- Those anchors are what the `edit`/hashline path consumes later; immutable sources and `:raw` intentionally suppress them.\n\n### Directory listings\n- `#readDirectory()` calls `buildDirectoryTree()` with:\n - `maxDepth = 2`\n - `perDirLimit = 12`\n - `rootLimit = null`\n - `lineCap = limit` when a line selector was present, else unlimited at this layer\n- `buildDirectoryTree()` sorts siblings by recency, shows file sizes and relative ages, and may mark `limits.resultLimit` when the tree truncates.\n- Empty directories render as `(empty directory)`.\n\n### Archives\n- Supported archive containers: `.tar`, `.tar.gz`, `.tgz`, `.zip`.\n- Syntax: `archive.ext`, `archive.ext:path/inside`, `archive.ext:path/inside:50-60`.\n- `openArchive()` reads the whole archive into memory, then:\n - tar/tgz uses `new Bun.Archive(bytes)`\n - zip uses `fflate.unzipSync()`\n- Archive paths normalize `/`, drop `.` segments, and reject `..`.\n- Directory reads list immediate children; files show `name` plus ` (size)` when size > 0.\n- Directory listing default limit is `500` entries in `#readArchiveDirectory()`.\n- File entries are UTF-8 decoded. Non-UTF-8 entries return `[Cannot read binary archive entry '...' (...)]` instead of bytes.\n- Text archive entries reuse the normal in-memory pagination/anchoring path.\n\n### SQLite databases\n- Database detection requires both a matching extension and a valid SQLite file header (`isSqliteFile()`).\n- Selector forms from `parseSqliteSelector()`:\n\n#### `db.sqlite`\n- `kind: \"list\"`\n- Lists non-`sqlite_%` tables with row counts.\n- `#readSqlite()` caps the rendered list to `500` tables via `applyListLimit()`.\n\n#### `db.sqlite:table`\n- `kind: \"schema\"`\n- Returns `sqlite_master.sql` plus sample rows.\n- Sample size is `DEFAULT_SCHEMA_SAMPLE_LIMIT = 5`.\n\n#### `db.sqlite:table:key`\n- `kind: \"row\"`\n- Resolves by primary key when the table has exactly one PK column; otherwise falls back to `rowid` lookup.\n- No query parameters allowed on row lookups.\n\n#### `db.sqlite:table?limit=...&offset=...&order=...&where=...`\n- `kind: \"query\"`\n- Defaults: `limit = 20`, `offset = 0`.\n- `limit` is capped at `500`.\n- `order` accepts `column` or `column:asc|desc` and must name an existing column.\n- `where` is accepted only after `validateWhereClause()` rejects comments, semicolons, and control keywords like `LIMIT`, `OFFSET`, `UNION`, `ATTACH`, `PRAGMA`.\n- Unknown query parameters throw.\n\n#### `db.sqlite?q=SELECT ...`\n- `kind: \"raw\"`\n- Cannot be combined with table selectors or any other query param.\n- Empty `q` throws.\n- `executeReadQuery()` runs `db.prepare(sql).all()` and rejects bound parameters; it does not verify that the SQL starts with `SELECT`.\n\n- Rendering caps in `packages/coding-agent/src/tools/sqlite-reader.ts`:\n - ASCII table width `120` (`MAX_RENDER_WIDTH`)\n - per-column width `40` (`MAX_COLUMN_WIDTH`)\n- `#readSqlite()` opens Bun SQLite in `{ readonly: true, strict: true }` and sets `PRAGMA busy_timeout = 3000`.\n\n### Documents\n- `CONVERTIBLE_EXTENSIONS` in `packages/coding-agent/src/tools/read.ts` covers `.pdf`, `.doc`, `.docx`, `.ppt`, `.pptx`, `.xls`, `.xlsx`, `.rtf`, `.epub`.\n- `convertFileWithMarkit()` converts the file to text/markdown.\n- Converted output is then head-truncated with normal shared limits; there is no line selector support inside the source document before conversion.\n- Conversion failures return a text block like `[Cannot read .pdf file: ...]`.\n\n### Jupyter notebooks\n- `.ipynb` goes through `readEditableNotebookText()` unless `:raw` was requested.\n- Output is editable plain text with markers like:\n\n```text\n# %% [code] cell:0\n...\n```\n\n- Raw mode bypasses that conversion and falls back to file-text reading.\n\n### Images\n- Image detection is metadata-based (`readImageMetadata()`).\n- Max accepted image size is `20 MiB` (`MAX_IMAGE_INPUT_BYTES`, re-exported as `MAX_IMAGE_SIZE`). Larger files throw.\n- If `inspect_image.enabled` is true, `read` returns metadata only (MIME, bytes, dimensions, channels, alpha) plus a suggestion to call `inspect_image`.\n- Otherwise it calls `loadImageInput()` and returns:\n - a text note from the image loader\n - an inline image block\n- Unsupported/undecodable image formats throw a `ToolError`.\n\n### Internal URLs\n- `read` does not resolve these itself; it delegates to `session.internalRouter.resolve()`.\n- Registered protocols are outside this file, but the router in `packages/coding-agent/src/internal-urls/router.ts` is built for `agent://`, `artifact://`, `issue://`, `jobs://`, `local://`, `mcp://`, `memory://`, `pi://`, `pr://`, `rule://`, and `skill://`.\n- `#handleInternalUrl()` behavior:\n - parses the URL with `parseInternalUrl()` so colons inside the host segment are legal\n - for `agent://`, treats non-root path extraction or `?q=` extraction as a special no-pagination mode\n - otherwise paginates the resolved text in memory\n - passes `immutable` through to `resolveFileDisplayMode()` so anchors are suppressed for immutable resources such as artifacts, skills, memory, and agent outputs\n - sets `ignoreResultLimits: true` for `skill://` so the full skill text is paginated only by explicit selectors, not by the normal default line limit\n- `issue://<N>` / `pr://<N>` (and the long form `issue://<owner>/<repo>/<N>` / `pr://<owner>/<repo>/<N>`) route through the same SQLite cache the `github` tool writes to; `?comments=0` selects the no-comments rendering. Bare `issue://` / `pr://` (and `issue://<owner>/<repo>` / `pr://<owner>/<repo>`) issue a live `gh issue list` / `gh pr list` for browsing, accepting `?state=`, `?limit=`, `?author=`, `?label=`. PR diffs share the same cache through `pr://<N>/diff` (numbered file listing with per-file hints), `pr://<N>/diff/<i>` (single file slice; 1-indexed), and `pr://<N>/diff/all` (verbatim unified diff); the listing and per-file slices are reconstructed from the cached unified-diff payload, so all three variants share one `gh pr diff` invocation per PR. Diff content is served as `text/plain`. Soft TTL `github.cache.softTtlSec` (default 5 minutes), hard TTL `github.cache.hardTtlSec` (default 7 days). Stale-hit returns the cached row and schedules a background refresh.\n\n### Web URLs\n- `parseReadUrlTarget()` accepts `http://`, `https://`, or `www.` targets.\n- Plain URL reads call `executeReadUrl()` in `packages/coding-agent/src/tools/fetch.ts`.\n- `:raw` means raw HTML/body fallback path; plain URL reads prefer rendered/reader-friendly output.\n- `:N`, `:A-B`, `:A+C` do not refetch. They page over cached output from the prior or current URL render.\n- URL render pipeline in `renderUrl()`:\n 1. normalize scheme (`https://` added for bare `www.`)\n 2. try special handlers for known sites unless raw\n 3. fetch with `loadPage()`\n 4. if content is image/PDF/DOCX/etc., try binary fetch + markit/image handling\n 5. handle JSON directly, feeds via feed parser, plain text directly\n 6. for HTML and non-raw mode, try markdown alternates, `URL.md`, content negotiation, feed alternates, HTML-to-text renderers, extracted linked documents, then `llms.txt`\n 7. fall back to raw body text/html\n- URL output is wrapped with a small header:\n\n```text\nURL: ...\nContent-Type: ...\nMethod: ...\nNotes: ...\n\n---\n```\n\n- `method` records the winning path (`json`, `feed`, `text`, `alternate-markdown`, `md-suffix`, `content-negotiation`, `image`, `markit`, `llms.txt`, `raw`, `raw-html`, etc.).\n- URL reads may return an inline image block when the fetched resource is a supported image and survives resizing.\n\n## Side Effects\n- Filesystem\n - Opens and streams local files.\n - Reads entire archives into memory before indexing.\n - May read URL-cache artifact files from the session artifacts directory.\n - Writes URL output artifacts when URL output is truncated or when line-range pagination needs a persisted cache body.\n- Network\n - URL mode performs HTTP fetches, binary refetches, and alternate-endpoint probes.\n- Subprocesses / native bindings\n - Uses Bun SQLite for `.db`/`.sqlite*`.\n - Uses `Bun.Archive` for tar/tgz and `fflate` for zip.\n - URL HTML rendering can delegate into site handlers and HTML-to-text backends from `packages/coding-agent/src/tools/fetch.ts`.\n- Session state\n - Records local text lines into `session.fileReadCache` for later stale-anchor recovery.\n - Uses `session.internalRouter` for internal URLs.\n - Uses `session.allocateOutputArtifact()` for cached/truncated URL output.\n- Background work / cancellation\n - Most branches honor `AbortSignal`; the tool itself is marked `nonAbortable = true`, but helper paths still call `throwIfAborted(signal)`.\n\n## Limits & Caps\n- Shared text truncation defaults from `packages/coding-agent/src/session/streaming-output.ts`:\n - `DEFAULT_MAX_LINES = 3000`\n - `DEFAULT_MAX_BYTES = 50 * 1024`\n- Local text open-ended default line limit: `read.defaultLimit`, clamped to `[1, DEFAULT_MAX_LINES]`.\n- Explicit line ranges add `1` leading and `3` trailing context lines on the constrained sides (`RANGE_LEADING_CONTEXT_LINES` / `RANGE_TRAILING_CONTEXT_LINES`).\n- File streaming chunk size: `8 * 1024` bytes (`READ_CHUNK_SIZE`).\n- Local streamed byte budget for line reads: `max(DEFAULT_MAX_BYTES, maxLinesToCollect * 512)`.\n- Structural summaries only run when file size `<= 2 MiB` and line count `<= 20_000`.\n- Image input max: `20 MiB`.\n- Directory tree caps for local directories: depth `2`, per-directory children `12`.\n- Archive directory default list cap: `500` entries.\n- SQLite:\n - default row query limit `20`\n - schema sample limit `5`\n - max query limit `500`\n - table list cap `500`\n - render width `120`, column width `40`\n - busy timeout `3000` ms\n- URL read result shown to the model is truncated to `300` lines and `50 KiB` in `executeReadUrl()`; full cached output can be attached as an artifact.\n- Inline fetched URL images:\n - source bytes cap `20 MiB`\n - post-resize inline output cap `300 KiB`\n- Unique suffix auto-resolution glob timeout: `5000` ms.\n- File-read cache holds `30` paths per session.\n\n## Errors\n- Validation and operational failures surface as `ToolError`.\n- Selector errors include:\n - `Line selector 0 is invalid; lines are 1-indexed. Use :1.`\n - invalid `A+B` / `A-B` shapes\n - `Cannot combine query extraction with offset/limit` for `agent://.../path:50`\n- Missing local/archive/sqlite paths first attempt unique suffix resolution; if no unique match exists they error.\n- Out-of-bounds line reads do not throw. They return explanatory text with a suggestion such as `Use :1 ...` or `Use :<last line> ...`.\n- Binary archive entries do not throw; they return a text notice.\n- Document conversion failure returns a text notice.\n- Image oversize/unsupported/invalid cases throw.\n- SQLite parser rejects unsupported parameter combinations early; DB/runtime errors are caught and rethrown as `ToolError(message)`.\n- URL fetch failure does not throw when HTTP fetch succeeds but `response.ok === false`; it returns a failed URL read with `method: \"failed\"` and explanatory notes.\n\n## Notes\n- `readSchema` examples include `https://example.com:L1-L40`, but URL selector parsing in `packages/coding-agent/src/tools/fetch.ts` does not accept `L` prefixes.\n- Hashline anchors are suppressed for raw reads and immutable internal resources because there is no editable backing target for later `edit` consumption.\n- `splitPathAndSel()` intentionally treats unknown trailing `:...` as part of the path so `archive.zip:inner/file` and `db.sqlite:table:key` still work.\n- `resolveReadPath()` contains macOS-specific filename fallbacks for screenshot timestamps, NFD Unicode normalization, and curly apostrophes.\n- A bare `/` resolves to the session cwd, not the filesystem root.\n- URL cache keys are session-scoped and normalized by requested URL + raw/rendered mode; both requested URL and final redirected URL are cached.\n- URL line-range reads request `ensureArtifact: true, preferCached: true` so a later paginated read can reopen the same rendered body from artifact storage.\n- Raw SQLite `q=` execution is not keyword-restricted beyond “no bound parameters”; the read tool relies on the surrounding contract to keep it read-only.\n- The file-read cache is not a read acceleration cache. It exists to recover hashline edits when the file changed after the read.",
|
|
77
|
+
"tools/read.md": "# read\n\n> Read files, directories, archives, SQLite databases, internal resources, images, documents, and URLs through one `path` string.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/read.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/read.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/path-utils.ts` — split `path` from trailing selectors; normalize local paths.\n - `packages/coding-agent/src/tools/archive-reader.ts` — detect `archive.ext:inner/path`, index archives, list/read entries.\n - `packages/coding-agent/src/tools/sqlite-reader.ts` — detect SQLite targets, parse selectors, render tables.\n - `packages/coding-agent/src/tools/fetch.ts` — URL parsing, fetch/render pipeline, URL cache/artifacts.\n - `packages/coding-agent/src/internal-urls/router.ts` — resolve `agent://`, `artifact://`, `local://`, `mcp://`, `memory://`, `pi://`, `rule://`, `skill://`.\n - `packages/coding-agent/src/edit/notebook.ts` — convert `.ipynb` to editable `# %% [...] cell:N` text.\n - `packages/coding-agent/src/utils/file-display-mode.ts` — decide hashline vs line-number vs raw display.\n - `packages/coding-agent/src/workspace-tree.ts` — render directory trees.\n - `packages/coding-agent/src/edit/file-read-cache.ts` — cache read lines for later hashline edit recovery.\n - `packages/coding-agent/src/tools/index.ts` — registers `read: s => new ReadTool(s)`.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `path` | `string` | Yes | Filesystem path, internal URL, or web URL. May end with a trailing selector such as `:50-100` or `:raw`. |\n\n### Selector grammar\n\nFor normal file-like reads, `splitPathAndSel()` in `packages/coding-agent/src/tools/path-utils.ts` recognizes the final suffix only when it matches one of these forms:\n\n| Suffix | Meaning |\n| --- | --- |\n| `:raw` | Raw/verbatim mode. Disables structural summaries and line prefixes. |\n| `:N` / `:LN` | Start at 1-indexed line `N`, open-ended. |\n| `:A-B` / `:LA-LB` | Inclusive 1-indexed line range. |\n| `:A+C` / `:LA+LC` | `C` lines starting at `A`; tool converts this to end line `A + C - 1`. |\n| `:range:raw` or `:raw:range` | Same line selection, but raw output. |\n\nValidation in `parseLineRangeChunk()`:\n- line numbers are 1-indexed; `:0` throws.\n- `+` counts must be `>= 1`.\n- `-` end must be `>= start`.\n\nSelector parsing intentionally falls through for unrecognized trailing `:...`; archive and SQLite paths consume their own colon syntax.\n\nURL selectors are parsed separately in `packages/coding-agent/src/tools/fetch.ts` and support only `:raw`, `:N`, `:A-B`, and `:A+C` — no optional `L` prefix there.\n\n## Outputs\n- Single-shot `AgentToolResult` built through `toolResult()` in `packages/coding-agent/src/tools/tool-result.ts`.\n- `content` is usually one text block. Image reads may return `[text, image]`.\n- `details` is path-dependent. `ReadToolDetails` may include:\n - `kind: \"file\" | \"url\"` (URL path uses `kind: \"url\"`; file reads usually omit `kind`)\n - `isDirectory`\n - `resolvedPath`\n - `suffixResolution`\n - URL fields: `url`, `finalUrl`, `contentType`, `method`, `notes`\n - `truncation`\n - `displayContent` (unprefixed text + starting line for TUI rendering)\n - `summary` (`lines`, `elidedSpans`, `elidedLines`) for structural summaries\n - `meta` from `packages/coding-agent/src/tools/output-meta.ts`\n- `details.meta.source` is set to the backing path, URL, or internal URL.\n- `details.meta.truncation` carries shown range, total lines/bytes, next offset, and optional `artifactId` for cached URL output.\n- Directory/archive listings and SQLite table lists also set `details.meta.limits` when list limits trigger.\n\n## Flow\n1. `ReadTool.execute()` accepts `{ path }`. `file://...` inputs are expanded first with `expandPath()`.\n2. It tries URL handling first via `parseReadUrlTarget()` from `packages/coding-agent/src/tools/fetch.ts`.\n - Plain URL reads call `executeReadUrl()`.\n - URL reads with line selectors load or refresh the URL cache with `loadReadUrlCacheEntry()` and paginate the cached text locally with `#buildInMemoryTextResult()`.\n3. If not a web URL, it checks `session.internalRouter.canHandle(...)`.\n - Internal URLs are resolved with `internalRouter.resolve()`.\n - `agent://` query extraction (`/path` or `?q=`) bypasses pagination and returns the extracted content directly.\n - Other internal resources are paginated in-memory by `#buildInMemoryTextResult()`.\n4. It tries archive resolution next with `#resolveArchiveReadPath()`.\n - `parseArchivePathCandidates()` scans for `.tar`, `.tar.gz`, `.tgz`, or `.zip` anywhere before `:sub/path`.\n - On success, `#readArchive()` either lists a directory or decodes an entry as UTF-8 text.\n5. It tries SQLite resolution with `#resolveSqliteReadPath()`.\n - `parseSqlitePathCandidates()` scans for `.sqlite`, `.sqlite3`, `.db`, `.db3` before any `:table`, `:key`, or `?query` suffix.\n - `#readSqlite()` dispatches on `parseSqliteSelector()`.\n6. Otherwise it treats the input as a local filesystem path.\n - `resolveReadPath()` expands `~`, resolves relative to session cwd, treats bare `/` as session cwd, and retries macOS screenshot/NFD/curly-quote variants.\n - If the path does not exist, `findUniqueSuffixMatch()` does a workspace glob-based unique suffix lookup (skipped for remote mounts).\n7. Directories go through `#readDirectory()`.\n8. Non-directories branch by content type:\n - image metadata / inline image\n - editable notebook text\n - markit-converted document\n - structural summary for parseable code/prose\n - streamed text/line-range read\n9. Local text reads are streamed by `streamLinesFromFile()` rather than loading the whole file. The tool adds up to 3 lines of context before/after explicit bounded ranges.\n10. Non-empty contiguous local reads are recorded into `getFileReadCache(session)` for later hashline edit recovery.\n11. If suffix resolution happened, the first text block is prefixed with `[Path '...' not found; resolved to '...' via suffix match]`.\n\n## Modes / Variants\n\n### Local text files\n- No selector: if summarization is enabled and the file is small enough, `#trySummarize()` calls `summarizeCode()`.\n - Guards: file size `<= 2 MiB` (`MAX_SUMMARY_BYTES`), line count `<= 20_000` (`MAX_SUMMARY_LINES`).\n - Summary output keeps selected declarations and replaces elided spans with `...`. When at least one span is elided, the text content ends with a footer like `[NN lines across MM elided regions; read <path>:raw or a line range like <path>:1-9999 for verbatim content]` so the agent has a concrete recovery selector instead of a bare marker.\n - When an elided block sits between matching brace lines, `#renderSummary()` may merge them into one anchored line rather than emitting separate opener/closer lines.\n- Explicit selector or summarization miss: streamed text read.\n - Default open-ended limit is `min(session setting read.defaultLimit, DEFAULT_MAX_LINES)`.\n - Explicit ranges expand by `RANGE_LEADING_CONTEXT_LINES = 1` / `RANGE_TRAILING_CONTEXT_LINES = 3` on the constrained sides only.\n - Non-raw output uses `resolveFileDisplayMode()`:\n - hashline anchors when edit mode is hashline, read is not raw, source is mutable, edit tool exists, and `readHashLines !== false`\n - otherwise optional line numbers when `readLineNumbers === true`\n - raw mode suppresses both\n- Prefix format in hashline mode is `lineNumber + 2-char line hash + \"|\"`, e.g. `41th|def alpha():`, from `formatHashLine()` in `packages/coding-agent/src/hashline/hash.ts`.\n- Those anchors are what the `edit`/hashline path consumes later; immutable sources and `:raw` intentionally suppress them.\n\n### Directory listings\n- `#readDirectory()` calls `buildDirectoryTree()` with:\n - `maxDepth = 2`\n - `perDirLimit = 12`\n - `rootLimit = null`\n - `lineCap = limit` when a line selector was present, else unlimited at this layer\n- `buildDirectoryTree()` sorts siblings by recency, shows file sizes and relative ages, and may mark `limits.resultLimit` when the tree truncates.\n- Empty directories render as `(empty directory)`.\n\n### Archives\n- Supported archive containers: `.tar`, `.tar.gz`, `.tgz`, `.zip`.\n- Syntax: `archive.ext`, `archive.ext:path/inside`, `archive.ext:path/inside:50-60`.\n- `openArchive()` reads the whole archive into memory, then:\n - tar/tgz uses `new Bun.Archive(bytes)`\n - zip uses `fflate.unzipSync()`\n- Archive paths normalize `/`, drop `.` segments, and reject `..`.\n- Directory reads list immediate children; files show `name` plus ` (size)` when size > 0.\n- Directory listing default limit is `500` entries in `#readArchiveDirectory()`.\n- File entries are UTF-8 decoded. Non-UTF-8 entries return `[Cannot read binary archive entry '...' (...)]` instead of bytes.\n- Text archive entries reuse the normal in-memory pagination/anchoring path.\n\n### SQLite databases\n- Database detection requires both a matching extension and a valid SQLite file header (`isSqliteFile()`).\n- Selector forms from `parseSqliteSelector()`:\n\n#### `db.sqlite`\n- `kind: \"list\"`\n- Lists non-`sqlite_%` tables with row counts.\n- `#readSqlite()` caps the rendered list to `500` tables via `applyListLimit()`.\n\n#### `db.sqlite:table`\n- `kind: \"schema\"`\n- Returns `sqlite_master.sql` plus sample rows.\n- Sample size is `DEFAULT_SCHEMA_SAMPLE_LIMIT = 5`.\n\n#### `db.sqlite:table:key`\n- `kind: \"row\"`\n- Resolves by primary key when the table has exactly one PK column; otherwise falls back to `rowid` lookup.\n- No query parameters allowed on row lookups.\n\n#### `db.sqlite:table?limit=...&offset=...&order=...&where=...`\n- `kind: \"query\"`\n- Defaults: `limit = 20`, `offset = 0`.\n- `limit` is capped at `500`.\n- `order` accepts `column` or `column:asc|desc` and must name an existing column.\n- `where` is accepted only after `validateWhereClause()` rejects comments, semicolons, and control keywords like `LIMIT`, `OFFSET`, `UNION`, `ATTACH`, `PRAGMA`.\n- Unknown query parameters throw.\n\n#### `db.sqlite?q=SELECT ...`\n- `kind: \"raw\"`\n- Cannot be combined with table selectors or any other query param.\n- Empty `q` throws.\n- `executeReadQuery()` runs `db.prepare(sql).all()` and rejects bound parameters; it does not verify that the SQL starts with `SELECT`.\n\n- Rendering caps in `packages/coding-agent/src/tools/sqlite-reader.ts`:\n - ASCII table width `120` (`MAX_RENDER_WIDTH`)\n - per-column width `40` (`MAX_COLUMN_WIDTH`)\n- `#readSqlite()` opens Bun SQLite in `{ readonly: true, strict: true }` and sets `PRAGMA busy_timeout = 3000`.\n\n### Documents\n- `CONVERTIBLE_EXTENSIONS` in `packages/coding-agent/src/tools/read.ts` covers `.pdf`, `.doc`, `.docx`, `.ppt`, `.pptx`, `.xls`, `.xlsx`, `.rtf`, `.epub`.\n- `convertFileWithMarkit()` converts the file to text/markdown.\n- Converted output is then head-truncated with normal shared limits; there is no line selector support inside the source document before conversion.\n- Conversion failures return a text block like `[Cannot read .pdf file: ...]`.\n\n### Jupyter notebooks\n- `.ipynb` goes through `readEditableNotebookText()` unless `:raw` was requested.\n- Output is editable plain text with markers like:\n\n```text\n# %% [code] cell:0\n...\n```\n\n- Raw mode bypasses that conversion and falls back to file-text reading.\n\n### Images\n- Image detection is metadata-based (`readImageMetadata()`).\n- Max accepted image size is `20 MiB` (`MAX_IMAGE_INPUT_BYTES`, re-exported as `MAX_IMAGE_SIZE`). Larger files throw.\n- If `inspect_image.enabled` is true, `read` returns metadata only (MIME, bytes, dimensions, channels, alpha) plus a suggestion to call `inspect_image`.\n- Otherwise it calls `loadImageInput()` and returns:\n - a text note from the image loader\n - an inline image block\n- Unsupported/undecodable image formats throw a `ToolError`.\n\n### Internal URLs\n- `read` does not resolve these itself; it delegates to `session.internalRouter.resolve()`.\n- Registered protocols are outside this file, but the router in `packages/coding-agent/src/internal-urls/router.ts` is built for `agent://`, `artifact://`, `issue://`, `local://`, `mcp://`, `memory://`, `pi://`, `pr://`, `rule://`, and `skill://`.\n- `#handleInternalUrl()` behavior:\n - parses the URL with `parseInternalUrl()` so colons inside the host segment are legal\n - for `agent://`, treats non-root path extraction or `?q=` extraction as a special no-pagination mode\n - otherwise paginates the resolved text in memory\n - passes `immutable` through to `resolveFileDisplayMode()` so anchors are suppressed for immutable resources such as artifacts, skills, memory, and agent outputs\n - sets `ignoreResultLimits: true` for `skill://` so the full skill text is paginated only by explicit selectors, not by the normal default line limit\n- `issue://<N>` / `pr://<N>` (and the long form `issue://<owner>/<repo>/<N>` / `pr://<owner>/<repo>/<N>`) route through the same SQLite cache the `github` tool writes to; `?comments=0` selects the no-comments rendering. Bare `issue://` / `pr://` (and `issue://<owner>/<repo>` / `pr://<owner>/<repo>`) issue a live `gh issue list` / `gh pr list` for browsing, accepting `?state=`, `?limit=`, `?author=`, `?label=`. PR diffs share the same cache through `pr://<N>/diff` (numbered file listing with per-file hints), `pr://<N>/diff/<i>` (single file slice; 1-indexed), and `pr://<N>/diff/all` (verbatim unified diff); the listing and per-file slices are reconstructed from the cached unified-diff payload, so all three variants share one `gh pr diff` invocation per PR. Diff content is served as `text/plain`. Soft TTL `github.cache.softTtlSec` (default 5 minutes), hard TTL `github.cache.hardTtlSec` (default 7 days). Stale-hit returns the cached row and schedules a background refresh.\n\n### Web URLs\n- `parseReadUrlTarget()` accepts `http://`, `https://`, or `www.` targets.\n- Plain URL reads call `executeReadUrl()` in `packages/coding-agent/src/tools/fetch.ts`.\n- `:raw` means raw HTML/body fallback path; plain URL reads prefer rendered/reader-friendly output.\n- `:N`, `:A-B`, `:A+C` do not refetch. They page over cached output from the prior or current URL render.\n- URL render pipeline in `renderUrl()`:\n 1. normalize scheme (`https://` added for bare `www.`)\n 2. try special handlers for known sites unless raw\n 3. fetch with `loadPage()`\n 4. if content is image/PDF/DOCX/etc., try binary fetch + markit/image handling\n 5. handle JSON directly, feeds via feed parser, plain text directly\n 6. for HTML and non-raw mode, try markdown alternates, `URL.md`, content negotiation, feed alternates, HTML-to-text renderers, extracted linked documents, then `llms.txt`\n 7. fall back to raw body text/html\n- URL output is wrapped with a small header:\n\n```text\nURL: ...\nContent-Type: ...\nMethod: ...\nNotes: ...\n\n---\n```\n\n- `method` records the winning path (`json`, `feed`, `text`, `alternate-markdown`, `md-suffix`, `content-negotiation`, `image`, `markit`, `llms.txt`, `raw`, `raw-html`, etc.).\n- URL reads may return an inline image block when the fetched resource is a supported image and survives resizing.\n\n## Side Effects\n- Filesystem\n - Opens and streams local files.\n - Reads entire archives into memory before indexing.\n - May read URL-cache artifact files from the session artifacts directory.\n - Writes URL output artifacts when URL output is truncated or when line-range pagination needs a persisted cache body.\n- Network\n - URL mode performs HTTP fetches, binary refetches, and alternate-endpoint probes.\n- Subprocesses / native bindings\n - Uses Bun SQLite for `.db`/`.sqlite*`.\n - Uses `Bun.Archive` for tar/tgz and `fflate` for zip.\n - URL HTML rendering can delegate into site handlers and HTML-to-text backends from `packages/coding-agent/src/tools/fetch.ts`.\n- Session state\n - Records local text lines into `session.fileReadCache` for later stale-anchor recovery.\n - Uses `session.internalRouter` for internal URLs.\n - Uses `session.allocateOutputArtifact()` for cached/truncated URL output.\n- Background work / cancellation\n - Most branches honor `AbortSignal`; the tool itself is marked `nonAbortable = true`, but helper paths still call `throwIfAborted(signal)`.\n\n## Limits & Caps\n- Shared text truncation defaults from `packages/coding-agent/src/session/streaming-output.ts`:\n - `DEFAULT_MAX_LINES = 3000`\n - `DEFAULT_MAX_BYTES = 50 * 1024`\n- Local text open-ended default line limit: `read.defaultLimit`, clamped to `[1, DEFAULT_MAX_LINES]`.\n- Explicit line ranges add `1` leading and `3` trailing context lines on the constrained sides (`RANGE_LEADING_CONTEXT_LINES` / `RANGE_TRAILING_CONTEXT_LINES`).\n- File streaming chunk size: `8 * 1024` bytes (`READ_CHUNK_SIZE`).\n- Local streamed byte budget for line reads: `max(DEFAULT_MAX_BYTES, maxLinesToCollect * 512)`.\n- Structural summaries only run when file size `<= 2 MiB` and line count `<= 20_000`.\n- Image input max: `20 MiB`.\n- Directory tree caps for local directories: depth `2`, per-directory children `12`.\n- Archive directory default list cap: `500` entries.\n- SQLite:\n - default row query limit `20`\n - schema sample limit `5`\n - max query limit `500`\n - table list cap `500`\n - render width `120`, column width `40`\n - busy timeout `3000` ms\n- URL read result shown to the model is truncated to `300` lines and `50 KiB` in `executeReadUrl()`; full cached output can be attached as an artifact.\n- Inline fetched URL images:\n - source bytes cap `20 MiB`\n - post-resize inline output cap `300 KiB`\n- Unique suffix auto-resolution glob timeout: `5000` ms.\n- File-read cache holds `30` paths per session.\n\n## Errors\n- Validation and operational failures surface as `ToolError`.\n- Selector errors include:\n - `Line selector 0 is invalid; lines are 1-indexed. Use :1.`\n - invalid `A+B` / `A-B` shapes\n - `Cannot combine query extraction with offset/limit` for `agent://.../path:50`\n- Missing local/archive/sqlite paths first attempt unique suffix resolution; if no unique match exists they error.\n- Out-of-bounds line reads do not throw. They return explanatory text with a suggestion such as `Use :1 ...` or `Use :<last line> ...`.\n- Binary archive entries do not throw; they return a text notice.\n- Document conversion failure returns a text notice.\n- Image oversize/unsupported/invalid cases throw.\n- SQLite parser rejects unsupported parameter combinations early; DB/runtime errors are caught and rethrown as `ToolError(message)`.\n- URL fetch failure does not throw when HTTP fetch succeeds but `response.ok === false`; it returns a failed URL read with `method: \"failed\"` and explanatory notes.\n\n## Notes\n- `readSchema` examples include `https://example.com:L1-L40`, but URL selector parsing in `packages/coding-agent/src/tools/fetch.ts` does not accept `L` prefixes.\n- Hashline anchors are suppressed for raw reads and immutable internal resources because there is no editable backing target for later `edit` consumption.\n- `splitPathAndSel()` intentionally treats unknown trailing `:...` as part of the path so `archive.zip:inner/file` and `db.sqlite:table:key` still work.\n- `resolveReadPath()` contains macOS-specific filename fallbacks for screenshot timestamps, NFD Unicode normalization, and curly apostrophes.\n- A bare `/` resolves to the session cwd, not the filesystem root.\n- URL cache keys are session-scoped and normalized by requested URL + raw/rendered mode; both requested URL and final redirected URL are cached.\n- URL line-range reads request `ensureArtifact: true, preferCached: true` so a later paginated read can reopen the same rendered body from artifact storage.\n- Raw SQLite `q=` execution is not keyword-restricted beyond “no bound parameters”; the read tool relies on the surrounding contract to keep it read-only.\n- The file-read cache is not a read acceleration cache. It exists to recover hashline edits when the file changed after the read.",
|
|
78
78
|
"tools/recall.md": "# recall\n\n> Search the active Hindsight bank and return raw matching memories.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/hindsight-recall.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/recall.md`\n- Key collaborators:\n - `packages/coding-agent/src/hindsight/state.ts` — session state, recall query defaults, prompt-side auto-recall.\n - `packages/coding-agent/src/hindsight/content.ts` — result formatting and UTC timestamp formatting.\n - `packages/coding-agent/src/hindsight/client.ts` — HTTP `recall` call and error mapping.\n - `packages/coding-agent/src/hindsight/bank.ts` — bank id and tag-filter scoping.\n - `docs/tools/retain.md` — shared backend, storage, seeding, and mental-model bootstrap.\n\n## Inputs\n\n| Field | Type | Required | Description |\n|---|---|---:|---|\n| `query` | `string` | Yes | Natural-language search query. The tool passes it through unchanged. |\n\n## Outputs\nReturns a single-shot tool result.\n\nWhen matches exist:\n- `content[0].type = \"text\"`\n- `content[0].text = \"Found <n> relevant memories (as of YYYY-MM-DD HH:MM UTC):\\n\\n<bullet list>\"`\n- each bullet is `- <text> [<type>] (<mentioned_at>)`; the type and timestamp suffixes appear only when those fields are present\n- `details = {}`\n\nWhen no matches exist:\n- `content[0].text = \"No relevant memories found.\"`\n- `details = {}`\n\n## Flow\n1. `HindsightRecallTool.createIf(...)` only exposes the tool when `memory.backend == \"hindsight\"`.\n2. `execute(...)` wraps the whole operation in `untilAborted(...)` from `@oh-my-pi/pi-utils`.\n3. It reads the active `HindsightSessionState`; missing state throws `Hindsight backend is not initialised for this session.`\n4. It calls `state.client.recall(...)` with:\n - `bankId` from session bootstrap,\n - the model-supplied `query`,\n - `budget`, `maxTokens`, and `types` from `HindsightConfig`,\n - tag filters from the bank scope (`recallTags`, `recallTagsMatch`).\n5. `HindsightApi.recall(...)` POSTs `/v1/default/banks/{bank_id}/memories/recall`.\n6. Results are formatted into a plain-text list with `formatMemories(...)`; empty results map to the fixed no-match string.\n7. Failures are logged with `logger.warn(\"recall failed\", ...)` and rethrown.\n\n## Modes / Variants\n- Tool path: explicit query-only recall. The tool does not compose context from recent turns; that richer path is reserved for backend auto-recall in `HindsightSessionState.beforeAgentStartPrompt(...)` / `maybeRecallOnAgentStart(...)`.\n- Bank scoping is inherited from the active `HindsightSessionState`:\n - `global` — no tag filter.\n - `per-project` — separate bank id per cwd basename.\n - `per-project-tagged` — shared bank id plus `project:<cwd basename>` filter with `tagsMatch = \"any\"`, so project-tagged and untagged global memories can both surface.\n- Session scope: reads cross-session server-side memories, but uses per-session cached config and scope.\n\n## Side Effects\n- Network\n - `POST /v1/default/banks/{bank_id}/memories/recall` via `packages/coding-agent/src/hindsight/client.ts`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - None on success. Unlike backend auto-recall, this tool does not update `lastRecallSnippet` or refresh the system prompt.\n- Background work / cancellation\n - Aborts through `untilAborted(...)` if the tool call signal is cancelled.\n\n## Limits & Caps\n- Client default budget for raw `HindsightApi.recall(...)` is `\"mid\"`; this tool overrides from config in `packages/coding-agent/src/hindsight/state.ts`.\n- Default recall settings from `packages/coding-agent/src/config/settings-schema.ts`:\n - `hindsight.recallBudget = \"mid\"`\n - `hindsight.recallMaxTokens = 1024`\n - `hindsight.recallTypes = [\"world\", \"experience\"]`\n- The explicit tool path does not apply `hindsight.recallContextTurns` or `hindsight.recallMaxQueryChars`; those caps only affect backend auto-recall query composition.\n\n## Errors\n- Throws `Hindsight backend is not initialised for this session.` when no state exists.\n- HTTP and fetch failures become `HindsightError` from `packages/coding-agent/src/hindsight/client.ts` with `statusCode` and parsed `details` when available.\n- Non-`Error` failures are normalized to `new Error(String(err))` before rethrow.\n\n## Notes\n- Shared backend details are in `docs/tools/retain.md`: server-side storage, subagent aliasing, bank scoping, mission setup, and mental-model bootstrap.\n- Mental models are not fetched by this tool. They may still already be present in the agent's developer instructions because the backend caches a `<mental_models>` block separately from recall results.\n- The tool returns raw memory hits; it does not synthesize across them. Use `reflect` for that path.\n",
|
|
79
79
|
"tools/recipe.md": "# recipe\n\n> Run a task exposed by a detected project task runner.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/recipe/index.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/recipe.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/recipe/runner.ts` — op parsing, task resolution, prompt model.\n - `packages/coding-agent/src/tools/recipe/render.ts` — shell-style call/result rendering.\n - `packages/coding-agent/src/tools/recipe/runners/index.ts` — runner registration order.\n - `packages/coding-agent/src/tools/recipe/runners/just.ts` — detect `just` recipes from justfiles.\n - `packages/coding-agent/src/tools/recipe/runners/pkg.ts` — detect `package.json` scripts and workspaces.\n - `packages/coding-agent/src/tools/recipe/runners/cargo.ts` — detect Cargo run/test targets.\n - `packages/coding-agent/src/tools/recipe/runners/make.ts` — parse make targets from makefiles.\n - `packages/coding-agent/src/tools/recipe/runners/task.ts` — detect Taskfile tasks via `task --list-all`.\n - `packages/coding-agent/src/tools/bash.ts` — actual command execution, truncation, cwd/env handling.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `op` | `string` | Yes | Single string containing the task selector plus trailing arguments. The first whitespace-delimited token selects the task; the remainder is appended verbatim to the resolved runner command. Examples from schema/prompt: `test`, `build --release`, `pkg-a/test`, `crate/bin/server`, `pkg:test --watch`. |\n\n### `op` grammar\n\n```text\nop := S* head (S+ tail)?\nhead := explicit-runner / implicit-task\nexplicit-runner := runner-id \":\" task-token\nimplicit-task := task-token\nrunner-id := detected runner id (`just` | `pkg` | `cargo` | `make` | `task`)\ntask-token := first non-whitespace token; may contain `/`\ntail := remaining characters after the first whitespace run\n```\n\nResolution rules from `resolveRunnerAndTask()`:\n- Leading whitespace is ignored; an empty `op` throws `ToolError` with the available task list.\n- Only the first token is parsed structurally. Everything after the first whitespace run becomes `tail` and is appended to the command unchanged.\n- If `head` contains `:` and the prefix matches a detected runner id, the suffix must exactly match a task in that runner.\n- Otherwise `head` is treated as a task name and matched across all detected runners.\n- If exactly one runner has that task, it is used.\n- If multiple runners have that task, the call is rejected and the error tells the model to use `<runner-id>:<task>`.\n- Namespaced task names generated by runners use `/`, not `:`. `/` is part of the task name, not a parser separator.\n\n## Outputs\n- Delegates directly to `BashTool.execute()` and returns the same `AgentToolResult<BashToolDetails>` shape.\n- Success path: one text content block containing merged command output (`result.output` from bash execution, or `(no output)`), plus any timeout clamp notice appended after a blank line.\n- Recipe does not return separate `stdout`, `stderr`, or `exitCode` fields. `stdout`/`stderr` are already merged into the text block by bash execution; `exitCode` is only observed indirectly (success requires `0`, non-zero becomes an error).\n- Error path: throws `ToolError`; for non-zero exits the message is the merged output followed by `Command exited with code <n>`.\n- `details` may include:\n - `timeoutSeconds`: effective timeout used by bash.\n - `requestedTimeoutSeconds`: only when bash clamped a requested timeout; recipe never sets one itself.\n - `meta`: output truncation metadata from bash execution.\n - `async`: defined by bash background execution paths, but recipe does not expose an `async` input.\n- When bash output is truncated, the full text is stored in an artifact and referenced via bash truncation metadata.\n- Call/result rendering in the TUI uses bash shell rendering with a resolved title, command preview, and optional task cwd.\n\n## Flow\n1. `RecipeTool.createIf()` in `packages/coding-agent/src/tools/recipe/index.ts` checks `session.settings.get(\"recipe.enabled\")`; disabled returns `null`.\n2. It probes every runner in `RUNNERS` from `packages/coding-agent/src/tools/recipe/runners/index.ts` with `Promise.all(...)` in this order: `just`, `pkg`, `cargo`, `make`, `task`.\n3. Each runner returns either `null` or a `DetectedRunner { id, label, commandPrefix, tasks }`; runners with zero tasks are discarded.\n4. If no runners remain, the tool is not registered.\n5. Constructor stores detected runners, instantiates `BashTool`, renders the model-facing description by passing `buildPromptModel(runners)` into `packages/coding-agent/src/prompts/tools/recipe.md`, and builds shell renderers from `createRecipeToolRenderer()`.\n6. On execution, `RecipeTool.execute()` calls `resolveCommand(op, this.#runners)`.\n7. `resolveCommand()` in `packages/coding-agent/src/tools/recipe/runner.ts`:\n 1. `parseOp()` trims only leading whitespace, extracts the first non-whitespace token as `head`, and keeps the remainder as `tail`.\n 2. `resolveRunnerAndTask()` resolves `head` either as `runnerId:taskName` or as an unqualified task name.\n 3. It throws `ToolError` for empty ops, missing explicit tasks, ambiguous task names, or unknown tasks; all error variants include the available task list.\n 4. It builds the final shell command with `buildCommand(commandPrefix, commandName, tail)`, joining non-empty parts with spaces.\n 5. If the task defines `cwd`, that relative path is returned alongside the command.\n8. `RecipeTool.execute()` forwards `{ command, cwd }` into `BashTool.execute()`; recipe does not pass timeout, env, async, or pty options.\n9. `BashTool.execute()` resolves internal URLs, validates/normalizes cwd against `session.cwd`, clamps timeout, applies bash interception rules, runs the command, and formats the final result.\n\n## Modes / Variants\n- Tool enablement:\n - Disabled by `recipe.enabled` setting: tool is absent.\n - Enabled but no detected tasks: tool is absent.\n- Task selection:\n - Unqualified task name: succeeds only when exactly one detected runner owns that task.\n - Explicit runner-qualified task: `<runner-id>:<task>`.\n- Runner detection paths:\n - `just`: requires `just` on `PATH`, a justfile, and successful `just --dump --dump-format=json`.\n - `pkg`: requires a readable root `package.json`; picks a package manager command from lockfiles or `bun` availability; discovers root scripts and workspace package scripts.\n - `cargo`: requires `cargo` on `PATH`, `Cargo.toml`, and successful `cargo metadata --no-deps --format-version=1`.\n - `make`: requires `make` on `PATH` and a makefile; parses targets statically.\n - `task`: requires `task` on `PATH`, a Taskfile, and successful `task --list-all --json`.\n- Execution path:\n - Always the synchronous `bash` call surface from recipe inputs.\n - Bash may still auto-background long-running work if `bash.autoBackground.enabled` and session async job support are enabled.\n\n## Side Effects\n- Filesystem\n - Reads manifests from the session cwd during detection: justfiles, `package.json`, workspace `package.json` files, `Cargo.toml`, makefiles, `Taskfile.yml` / `Taskfile.yaml`.\n - Command execution runs in `session.cwd` or a task-specific relative cwd resolved under it.\n - Bash may allocate output artifacts for truncated command output.\n- Subprocesses / native bindings\n - Detection may spawn `just --dump --dump-format=json`, `cargo metadata --no-deps --format-version=1`, and `task --list-all --json`.\n - Execution spawns the resolved shell command through `BashTool` / `executeBash()`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Tool availability depends on session settings.\n - Constructor prompt text is specialized to detected runners/tasks.\n - Bash execution may create async job records and output artifacts if bash auto-background triggers.\n- User-visible prompts / interactive UI\n - The model-facing tool description lists detected runners and up to 20 tasks per runner.\n - TUI rendering shows a shell-style preview using the resolved title/command/cwd.\n- Background work / cancellation\n - Detection is parallelized across runners.\n - Runtime command execution honors the passed abort signal through `BashTool`.\n\n## Limits & Caps\n- Prompt task listing is capped at `PROMPT_TASK_LIMIT = 20` per runner in `packages/coding-agent/src/tools/recipe/runner.ts`; this affects the rendered tool description, not execution.\n- Recipe itself defines no timeout input; delegated bash execution therefore uses bash's default `timeout = 300` seconds from `packages/coding-agent/src/tools/bash.ts`.\n- Bash clamps timeouts to the configured bash range (`clampTimeout(\"bash\", ...)` in `packages/coding-agent/src/tools/bash.ts`), but recipe cannot request a custom value.\n- `pkg` workspace discovery normalizes workspace globs to `.../package.json` and sorts matched package files lexicographically before task generation.\n- `cargo` deduplicates generated task names with a `Set`, so duplicate targets collapse to one recipe task.\n\n## Errors\n- Detection failures in runner modules are mostly soft-failed:\n - Missing binaries, missing manifests, parse failures, or non-zero probe exits usually return `null` and log with `logger.debug(...)`.\n - Result: the affected runner disappears instead of surfacing an error to the model.\n- Invocation failures are hard errors from `resolveRunnerAndTask()`:\n - Empty `op`.\n - Explicit runner prefix with missing/empty task.\n - Ambiguous unqualified task name across runners.\n - Unknown task name.\n- Execution failures come from `BashTool.execute()`:\n - Invalid cwd.\n - Bash interceptor blocks.\n - Aborts/timeouts.\n - Non-zero exit codes.\n - Missing exit status.\n- All `resolveRunnerAndTask()` errors include the current available task list to help the model retry.\n\n## Notes\n- `RecipeTool` sets `concurrency = \"exclusive\"`; calls do not run concurrently with other exclusive tools.\n- Tool registration is all-or-nothing per runner: a detected runner with zero tasks is dropped.\n- Runner ids are fixed string literals from the runner modules: `just`, `pkg`, `cargo`, `make`, `task`.\n- `buildPromptModel()` includes each task's rendered command (`commandPrefix` + `commandName`) and relative cwd when present; the prompt therefore exposes the exact shell form recipe will run.\n- `pkg` task names:\n - Root `package.json` scripts keep bare names like `test`.\n - Workspace scripts are always namespaced as `<package-name-or-dir>/<script>` and set `cwd` to that package directory.\n - Script names are shell-quoted into `commandName`, so a task like `build` becomes `bun run 'build'` / `npm run 'build'` / similar.\n- `pkg` command prefix selection prefers lockfiles in this order: `bun.lock`/`bun.lockb`, `pnpm-lock.yaml`, `yarn.lock`, `package-lock.json`/`npm-shrinkwrap.json`; otherwise it falls back to `bun run` if `bun` exists, else `npm run`.\n- `cargo` task names are generated from metadata targets:\n - Single-package manifests: `bin/<name>`, `example/<name>`, `test/<name>`.\n - Multi-package workspaces: `<package>/bin/<name>`, `<package>/example/<name>`, `<package>/test/<name>`.\n - Each task overrides `commandPrefix` to the full `cargo run ... --bin|--example` or `cargo test ... --test` prefix, and `commandName` to the quoted target name.\n- `make` target parsing is static text parsing, not `make -qp` output:\n - Recognizes makefiles named `Makefile`, `makefile`, `GNUmakefile`.\n - Uses `.PHONY` lines to decide whether to include undocumented file targets; without any `.PHONY`, all parsed targets are exposed.\n - If `.PHONY` exists, documented non-phony targets are kept with ` (file target)` appended to `doc`.\n- `just` detection ignores private recipes and preserves declared parameter names only for prompt display; execution still accepts arbitrary `tail` text.\n- `task` detection uses `desc` first, then `summary`, for task documentation.\n- Recipe has no env input of its own. Commands inherit whatever environment `BashTool` supplies for normal bash execution in the session.",
|
|
80
80
|
"tools/reflect.md": "# reflect\n\n> Ask the Hindsight server to synthesize an answer over the active memory bank.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/hindsight-reflect.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/reflect.md`\n- Key collaborators:\n - `packages/coding-agent/src/hindsight/bank.ts` — best-effort bank mission initialization.\n - `packages/coding-agent/src/hindsight/state.ts` — session state, shared bank scope, recall/reflect config.\n - `packages/coding-agent/src/hindsight/client.ts` — HTTP `reflect` call and error mapping.\n - `docs/tools/retain.md` — shared backend, storage, seeding, and mental-model bootstrap.\n\n## Inputs\n\n| Field | Type | Required | Description |\n|---|---|---:|---|\n| `query` | `string` | Yes | Question to answer from long-term memory. |\n| `context` | `string` | No | Extra guidance sent to the Hindsight reflect endpoint. |\n\n## Outputs\nReturns a single-shot tool result:\n\n- `content[0].type = \"text\"`\n- `content[0].text = response.text?.trim() || \"No relevant information found to reflect on.\"`\n- `details = {}`\n\nThe tool returns the Hindsight server's synthesized text directly; it does not expose raw recall hits.\n\n## Flow\n1. `HindsightReflectTool.createIf(...)` only exposes the tool when `memory.backend == \"hindsight\"`.\n2. `execute(...)` runs under `untilAborted(...)`.\n3. It reads the active `HindsightSessionState`; missing state throws `Hindsight backend is not initialised for this session.`\n4. Before reflecting, it calls `ensureBankMission(...)` with the current `bankId`, config, and process-local `missionsSet`.\n5. `ensureBankMission(...)` best-effort `PUT`s `/v1/default/banks/{bank_id}` with `reflect_mission` and optional `retain_mission` exactly once per bank/process; failures are swallowed.\n6. It calls `state.client.reflect(...)` with the model `query`, optional `context`, configured recall budget, and bank-scope tag filters.\n7. `HindsightApi.reflect(...)` POSTs `/v1/default/banks/{bank_id}/reflect` and defaults its own budget to `\"low\"` when callers omit one; this tool always passes the configured budget.\n8. Blank or whitespace-only responses are replaced with `No relevant information found to reflect on.`\n9. Failures are logged with `logger.warn(\"reflect failed\", ...)` and rethrown.\n\n## Modes / Variants\n- Tool path: one reflect request, optionally focused by `context`.\n- Bank scoping is inherited from the active `HindsightSessionState`:\n - `global` — no tag filter.\n - `per-project` — separate bank id per cwd basename.\n - `per-project-tagged` — shared bank id plus `project:<cwd basename>` filter with `tagsMatch = \"any\"`.\n- Session scope: reads cross-session server-side memories, but does not persist local output.\n\n## Side Effects\n- Network\n - Optional `PUT /v1/default/banks/{bank_id}` from `ensureBankMission(...)`.\n - `POST /v1/default/banks/{bank_id}/reflect` via `packages/coding-agent/src/hindsight/client.ts`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Reads session-held bank scope and config only. Does not update `lastRecallSnippet`, the mental-model cache, or the retain queue.\n- Background work / cancellation\n - Aborts through `untilAborted(...)` if the tool call signal is cancelled.\n\n## Limits & Caps\n- Tool-level params: only `query` is required; `context` is optional.\n- Default budget setting comes from `hindsight.recallBudget` in `packages/coding-agent/src/config/settings-schema.ts`; default `\"mid\"`.\n- `reflect` itself has no client-side token cap parameter here; unlike `recall`, the tool does not pass `maxTokens`.\n- Mission initialization tracks up to `MISSION_SET_CAP = 10_000` bank ids in `packages/coding-agent/src/hindsight/bank.ts`, then drops the oldest half of the sorted set.\n\n## Errors\n- Throws `Hindsight backend is not initialised for this session.` when no state exists.\n- HTTP and fetch failures become `HindsightError` from `packages/coding-agent/src/hindsight/client.ts` with `statusCode` and parsed `details` when available.\n- `ensureBankMission(...)` failures are silent to the tool caller; only the later reflect request can fail visibly.\n- Non-`Error` failures are normalized to `new Error(String(err))` before rethrow.\n\n## Notes\n- Shared backend details are in `docs/tools/retain.md`: server-side storage, subagent aliasing, bank scoping, seed mental models from `packages/coding-agent/src/hindsight/seeds.json`, and mental-model prompt injection.\n- `reflect` does not read the cached `<mental_models>` block directly. It queries the Hindsight server over the bank contents. The same session may also have separate mental-model context injected into its developer instructions.\n- Reflect mission and retain mission are bank-level server settings, not per-request payload. The tool just ensures they are present best-effort before reflecting.\n",
|
|
81
81
|
"tools/render_mermaid.md": "# render_mermaid\n\n> Convert Mermaid source into terminal-friendly ASCII/Unicode text.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/render-mermaid.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/render-mermaid.md`\n- Key collaborators:\n - `packages/utils/src/mermaid-ascii.ts` — thin wrapper over renderer package.\n - `packages/coding-agent/src/tools/index.ts` — tool registration and enablement gate.\n - `packages/coding-agent/src/sdk.ts` — session-facing artifact allocation hook.\n - `packages/coding-agent/src/session/session-manager.ts` — persistent-session artifact path allocation.\n - `packages/coding-agent/src/session/artifacts.ts` — artifact filename generation and writes.\n- Related user/runtime doc: `docs/render-mermaid.md`\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `mermaid` | `string` | Yes | Mermaid source text. Schema example: `graph TD; A-->B`. |\n| `config` | `object` | No | Optional renderer options. Sanitized before rendering; numeric fields are floored and clamped to `>= 0`. |\n\n`config` fields:\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `useAscii` | `boolean` | No | `true` for plain ASCII, `false`/omitted for Unicode box-drawing output. Passed through unchanged. |\n| `paddingX` | `number` | No | Horizontal spacing. `Math.floor`, then `Math.max(0, value)`. |\n| `paddingY` | `number` | No | Vertical spacing. `Math.floor`, then `Math.max(0, value)`. |\n| `boxBorderPadding` | `number` | No | Inner box padding. `Math.floor`, then `Math.max(0, value)`. |\n\n## Outputs\nThe tool returns a single text content block:\n\n- inline body: rendered diagram text\n- optional trailer: `Saved artifact: artifact://<id>` when artifact storage is available\n\n`details` may include:\n\n- `artifactId?: string`\n\nNo image path, SVG, PNG, or binary payload is returned. Stored artifacts are plain text `.log` files; artifact filenames are allocated as `<id>.render_mermaid.log` by `packages/coding-agent/src/session/artifacts.ts`.\n\n## Flow\n1. `RenderMermaidTool.execute()` in `packages/coding-agent/src/tools/render-mermaid.ts` receives `mermaid` and optional `config`.\n2. `sanitizeRenderConfig()` normalizes `paddingX`, `paddingY`, and `boxBorderPadding` to non-negative integers; `useAscii` is passed through.\n3. The tool calls `renderMermaidAscii()` from `@oh-my-pi/pi-utils`.\n4. `packages/utils/src/mermaid-ascii.ts` forwards directly to `renderMermaidASCII()` from the `beautiful-mermaid` package.\n5. The tool optionally asks the session for an artifact slot with `allocateOutputArtifact(\"render_mermaid\")`.\n6. If a path is returned, `Bun.write()` persists the full rendered text to that file.\n7. The tool returns the rendered text, plus an `artifact://` line and `details.artifactId` when persistence succeeded.\n\n## Modes / Variants\n- Default render: Unicode box-drawing output when `config.useAscii` is omitted or false.\n- ASCII render: plain ASCII output when `config.useAscii` is true.\n- Persistent-session path: artifact text is written when `allocateOutputArtifact()` returns a path.\n- Ephemeral-session path: no artifact is written; the inline text result is still returned.\n\n## Side Effects\n- Filesystem\n - May write one session artifact via `Bun.write()`.\n - Artifact filename format is `<id>.render_mermaid.log`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Consumes the session artifact allocator hook.\n - Returns `details.artifactId` for the tool result.\n\n## Limits & Caps\n- No tool-local timeout, retry, truncation, or streaming path.\n- Numeric config fields are quantized to integers with `Math.floor()` and clamped to `0` minimum in `sanitizeRenderConfig()`.\n- Renderer engine is `beautiful-mermaid@1.1.3` per root `package.json` / `bun.lock`.\n- The tool is registered as discoverable and gated by `renderMermaid.enabled` in `packages/coding-agent/src/tools/index.ts`.\n\n## Errors\n- `renderMermaidAscii()` is not wrapped in a local `try/catch`; renderer exceptions propagate out of `execute()`.\n- Invalid Mermaid syntax therefore fails the tool call rather than returning partial output.\n- Artifact allocation failures inside the SDK hook are swallowed there and converted to `{}` in `packages/coding-agent/src/sdk.ts`; rendering still succeeds, just without a saved artifact.\n- Artifact write failures from `Bun.write()` are not caught in the tool and will fail the call.\n\n## Notes\n- The tool summary string says `Render a Mermaid diagram to an image`, but the implementation and prompt both produce text, not images.\n- Despite the name, this tool does not use Puppeteer, browser rendering, Mermaid CLI, or native bindings; rendering stays in-process through the JS package wrapper.\n- `docs/render-mermaid.md` covers operator-facing behavior and enablement; keep this file focused on the tool contract and runtime path.\n",
|
|
82
82
|
"tools/resolve.md": "# resolve\n\n> Finalizes a queued preview action by applying or discarding it.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/resolve.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/resolve.md`\n- Key collaborators:\n - `docs/resolve-tool-runtime.md` — preview/apply runtime reference\n - `packages/coding-agent/src/extensibility/custom-tools/loader.ts` — forwards custom pending actions into the queue\n - `packages/coding-agent/src/tools/ast-edit.ts` — built-in preview producer example\n - `packages/coding-agent/src/session/agent-session.ts` — tool-choice queue and invoker access\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `action` | `\"apply\" | \"discard\"` | Yes | Whether to commit or reject the queued preview. |\n| `reason` | `string` | Yes | Required explanation passed through to the queued callback. |\n\n## Outputs\n- Single-shot result.\n- `execute()` returns whatever the queued invoker returns, with `details` wrapped/augmented to include:\n - `action`\n - `reason`\n - `sourceToolName?`\n - `label?`\n - `sourceResultDetails?` — original `result.details` from the apply/reject callback when present\n- If `discard` has no custom reject callback, the default success payload is `Discarded: <label>. Reason: <reason>`.\n- The TUI renderer is inline and merges call+result into one block.\n\n## Flow\n1. Preview-producing code calls `queueResolveHandler(...)` with a label, source tool name, and `apply(reason)` callback, plus optional `reject(reason)`.\n2. `queueResolveHandler(...)` asks the session for a forced `resolve` tool choice and pushes it into the tool-choice queue with `pushOnce(...)`.\n3. The queued entry is marked `now: true`; if the model rejects that forced tool choice, `onRejected` returns `requeue`, so the reminder comes back.\n4. `queueResolveHandler(...)` also injects a `resolve-reminder` steering message: `This is a preview. Call the resolve tool to apply or discard these changes.`\n5. When `resolve.execute()` runs, it wraps the call in `untilAborted(...)` and fetches the current queue invoker with `session.peekQueueInvoker()`.\n6. If no invoker exists, it throws `ToolError(\"No pending action to resolve. Nothing to apply or discard.\")`.\n7. Otherwise it invokes the queued callback with `{ action, reason }`.\n8. For `apply`, it always executes the producer's `apply(reason)` callback.\n9. For `discard`, it executes `reject(reason)` when provided; if that callback is absent or returns `undefined`, `resolve` fabricates the default discard message.\n10. Before returning, it merges resolve metadata into `result.details` so renderer/UI code can show the action, label, and originating tool.\n\n## Modes / Variants\n- `apply`: runs the queued `apply(reason)` callback and returns its content.\n- `discard` with reject callback: runs `reject(reason)` and returns that callback's content.\n- `discard` without reject callback: returns the built-in `Discarded: ...` text payload.\n\n## Side Effects\n- Session state\n - Consumes the current pending preview through the session tool-choice queue; there is no separate pending-action stack.\n - Adds a `resolve-reminder` steering message when a preview is queued.\n- User-visible prompts / interactive UI\n - No direct prompt. The visible effect depends on the preview-producing tool and the resolve renderer.\n- Background work / cancellation\n - `untilAborted(...)` lets abort signals interrupt resolution before invoking the callback completes.\n\n## Limits & Caps\n- Hidden tool: not discoverable in the normal tool index (`packages/coding-agent/src/tools/resolve.ts`, `packages/coding-agent/src/session/agent-session.ts`).\n- Exactly one active queue invoker is consulted per call via `session.peekQueueInvoker()`.\n- There is no independent queue depth cap in this tool; ordering follows the shared tool-choice queue (`docs/resolve-tool-runtime.md`).\n\n## Errors\n- No pending preview: throws `ToolError(\"No pending action to resolve. Nothing to apply or discard.\")`.\n- Any exception from the queued `apply` / `reject` callback propagates through `resolve`.\n- Aborts during `untilAborted(...)` surface as the underlying abort error from the utility.\n\n## Notes\n- `reason` is informational; `resolve` passes it through but does not interpret it.\n- `queueResolveHandler(...)` is the canonical built-in integration point; custom tools use `pushPendingAction(...)`, which the loader forwards into the same mechanism.\n- The tool only works because another tool already staged a preview and forced a one-shot `resolve` choice.\n- `sourceResultDetails` is added only when the apply/reject callback returned a non-null `details` field; custom pending-action `details` are not forwarded automatically by the loader.\n",
|
|
83
83
|
"tools/retain.md": "# retain\n\n> Queue durable facts for asynchronous write into the active Hindsight bank.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/hindsight-retain.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/retain.md`\n- Key collaborators:\n - `packages/coding-agent/src/hindsight/state.ts` — per-session queue, flush, auto-retain.\n - `packages/coding-agent/src/hindsight/backend.ts` — session bootstrap, prompt injection, subagent aliasing.\n - `packages/coding-agent/src/hindsight/bank.ts` — bank id derivation, tag scoping, mission setup.\n - `packages/coding-agent/src/hindsight/client.ts` — HTTP `retain` / `retainBatch` calls.\n - `packages/coding-agent/src/hindsight/content.ts` — retention transcript shaping, memory-tag stripping.\n - `packages/coding-agent/src/hindsight/mental-models.ts` — bank-scoped mental-model seeding and cache rendering.\n - `packages/coding-agent/src/hindsight/seeds.json` — built-in mental-model seed definitions.\n - `packages/coding-agent/src/hindsight/transcript.ts` — extracts user/assistant turns for auto-retain.\n\n## Inputs\n\n| Field | Type | Required | Description |\n|---|---|---:|---|\n| `items` | `Array<{ content: string; context?: string }>` | Yes | One or more memories to queue. `minItems: 1`. Each item must be self-contained; `context` is optional per-item provenance. |\n\n## Outputs\nReturns a single-shot tool result:\n\n- `content[0].type = \"text\"`\n- `content[0].text = \"<count> memory queued.\"` or `\"<count> memories queued.\"`\n- `details = { count: number }`\n\nThe write is not confirmed before the tool returns. The queue flushes later; flush failures emit a session warning notice and are not returned to the model.\n\n## Flow\n1. `HindsightRetainTool.createIf(...)` only exposes the tool when `memory.backend == \"hindsight\"` in `packages/coding-agent/src/tools/hindsight-retain.ts`.\n2. `execute(...)` fetches `session.getHindsightSessionState()` and throws if the Hindsight backend was not started.\n3. Each input item is handed to `HindsightSessionState.enqueueRetain(...)` in `packages/coding-agent/src/hindsight/state.ts`.\n4. `HindsightRetainQueue.enqueue(...)` appends the item and either:\n - flushes immediately when the queue reaches `RETAIN_FLUSH_BATCH_SIZE`, or\n - starts a debounce timer for `RETAIN_FLUSH_INTERVAL_MS`.\n5. On flush, `HindsightRetainQueue.#doFlush(...)`:\n - verifies the session still owns this state,\n - calls `ensureBankMission(...)` once per bank/process before writing,\n - maps queued items to `MemoryItemInput` with `context ?? config.retainContext`, `metadata.session_id`, and bank-scope tags,\n - sends one async `retainBatch(...)` request.\n6. The tool returns immediately after enqueueing; it does not await the HTTP write.\n\n## Modes / Variants\n- Tool path: queued batch write only.\n- Bank scoping comes from `computeBankScope(...)` in `packages/coding-agent/src/hindsight/bank.ts`:\n - `global` — one shared bank, no project tags.\n - `per-project` — bank id gets `-<cwd basename>` appended.\n - `per-project-tagged` — shared bank plus `project:<cwd basename>` tags on retained memories.\n- Session scope:\n - tool-called retains are per-session queued work in `HindsightSessionState`,\n - persisted memories are cross-session server-side bank data,\n - subagents alias the parent `HindsightSessionState`, so their `retain` calls write into the same bank and queue.\n\n## Side Effects\n- Filesystem\n - None for retained memories. No local memory file is written.\n- Network\n - `POST /v1/default/banks/{bank_id}/memories` via `retainBatch(...)` in `packages/coding-agent/src/hindsight/client.ts`.\n - Optional `PUT /v1/default/banks/{bank_id}` via `ensureBankMission(...)` before first write per bank/process.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Appends to the in-memory `HindsightRetainQueue` on the active `HindsightSessionState`.\n - Includes `metadata.session_id` on each retained item.\n - Shares parent state for subagents (`aliasOf` path in `packages/coding-agent/src/hindsight/backend.ts`).\n- User-visible prompts / interactive UI\n - On async flush failure, emits `session.emitNotice(\"warning\", ...)`; the model is not told.\n- Background work / cancellation\n - Flush runs later on timer, queue-size threshold, `agent_end`, backend `enqueue(...)`, or backend `clear(...)`.\n\n## Limits & Caps\n- Input schema requires `items.length >= 1` in `packages/coding-agent/src/tools/hindsight-retain.ts`.\n- Queue flush threshold: `RETAIN_FLUSH_BATCH_SIZE = 16` in `packages/coding-agent/src/hindsight/state.ts`.\n- Queue debounce: `RETAIN_FLUSH_INTERVAL_MS = 5_000` in `packages/coding-agent/src/hindsight/state.ts`.\n- Queue writes use `retainBatch(..., { async: true })`; the client does not wait for server-side consolidation.\n- Shared auto-retain settings on the same backend:\n - `hindsight.retainEveryNTurns` default `3`\n - `hindsight.retainOverlapTurns` default `2`\n - `hindsight.retainContext` default `\"omp\"`\n - `hindsight.retainMode` default `\"full-session\"`\n from `packages/coding-agent/src/config/settings-schema.ts`.\n\n## Errors\n- Throws `Hindsight backend is not initialised for this session.` when no state exists.\n- Queue enqueue on disposed state throws `Hindsight retain queue is closed.`\n- Flush-time API failures are caught in `HindsightRetainQueue.#doFlush(...)`, logged, and converted into a warning notice instead of a tool error.\n- Mission creation failures are swallowed in `ensureBankMission(...)`; writes continue.\n\n## Notes\n- Storage is server-side. `hindsightBackend.clear(...)` only clears local cache/state and warns that upstream deletion must happen in Hindsight UI or `deleteBank`; see `packages/coding-agent/src/hindsight/backend.ts`.\n- Auto-retain uses the same bank but a different path than this tool: `retainSession(...)` extracts plain user/assistant transcript from `packages/coding-agent/src/hindsight/transcript.ts`, strips `<memories>` / `<mental_models>` blocks via `stripMemoryTags(...)`, and calls single-item `retain(...)`.\n- `retain` itself does not seed or read mental models. Mental-model bootstrap lives in the shared backend: `HindsightSessionState.runMentalModelLoad(...)` optionally resolves seeds from `packages/coding-agent/src/hindsight/seeds.json`, creates missing models with `ensureMentalModels(...)`, then caches a rendered `<mental_models>` block for prompt injection.\n- Built-in seeds are `user-preferences`, `project-conventions`, and `project-decisions`. `projectTagged: true` seeds inherit the active scope's retain tags; untagged seeds read the whole bank.\n- Mental-model defaults from `packages/coding-agent/src/config/settings-schema.ts`: `hindsight.mentalModelsEnabled = true`, `hindsight.mentalModelAutoSeed = true`, `hindsight.mentalModelRefreshIntervalMs = 5 * 60 * 1000`, `hindsight.mentalModelMaxRenderChars = 16_000`. First-turn loading waits up to `MENTAL_MODEL_FIRST_TURN_DEADLINE_MS = 1500` in `packages/coding-agent/src/hindsight/mental-models.ts`.\n- Seed lifecycle is create-only. Changing `packages/coding-agent/src/hindsight/seeds.json` does not mutate existing server-side models.\n- `recall.md` and `reflect.md` rely on the same bank, scoping, and mental-model bootstrap; refer back here for the shared backend behavior.\n",
|
|
84
|
-
"tools/rewind.md": "# rewind\n\n> End an active checkpoint by pruning exploratory context and retaining a concise report.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/checkpoint.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/rewind.md`\n- Key collaborators:\n - `packages/coding-agent/src/session/agent-session.ts` — validates pending rewind state, applies the actual rewind, and injects the retained report.\n - `packages/coding-agent/src/session/session-manager.ts` — branches the persisted session tree and appends persisted summary/report entries.\n - `packages/coding-agent/src/session/messages.ts` — converts persisted `branch_summary` entries into LLM-visible branch-summary messages on rebuilt context.\n - `packages/coding-agent/src/tools/index.ts` — registers the tool and shares the `checkpoint.enabled` gate.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `report` | `string` | Yes | Investigation findings. `execute()` trims it and rejects the empty result. |\n\n## Outputs\nThe tool returns a single text result plus structured details:\n\n- text body:\n - `Rewind requested.`\n - `Report captured for context replacement.`\n- `details`:\n - `report: string` — trimmed report text\n - `rewound: true`\n\nThe returned tool result is not the final rewind. `AgentSession` waits until `turn_end`, then applies the rewind side effects asynchronously.\n\n## Flow\n1. `RewindTool.createIf()` in `packages/coding-agent/src/tools/checkpoint.ts` hides the tool from subagents.\n2. `RewindTool.execute()` rejects subagent calls with `ToolError(\"Checkpoint not available in subagents.\")`.\n3. It rejects calls with no active checkpoint using `ToolError(\"No active checkpoint.\")`.\n4. It trims `params.report`; if empty, it throws `ToolError(\"Report cannot be empty.\")`.\n5. It returns a `toolResult()` with `details.report` and `details.rewound = true`.\n6. On `tool_execution_end`, `AgentSession` extracts the report from `details.report` or the first text content block and stores it in `#pendingRewindReport`.\n7. On `turn_end`, if `#pendingRewindReport` is set, `AgentSession.#applyRewind()` runs.\n8. `#applyRewind()` computes `safeCount = clamp(checkpointMessageCount, 0, agent.state.messages.length)` and calls `agent.replaceMessages(agent.state.messages.slice(0, safeCount))`.\n9. It then calls `sessionManager.branchWithSummary(checkpointEntryId, report, { startedAt })`. That moves the persisted session leaf back to the checkpoint entry and appends a new `branch_summary` entry whose `summary` is the rewind report.\n10. If `checkpointEntryId` no longer resolves, it logs a warning and falls back to `branchWithSummary(null, report, { startedAt })`, branching from root instead.\n11. `#applyRewind()` appends a hidden in-memory custom message `{ customType: \"rewind-report\", content: report, display: false }` and persists the same payload through `sessionManager.appendCustomMessageEntry(\"rewind-report\", ...)` with `details = { startedAt, rewoundAt }`.\n12. Finally it clears `#checkpointState` and `#pendingRewindReport`.\n\n## Modes / Variants\n- Normal rewind: checkpoint entry exists; session history branches from that exact entry.\n- Fallback rewind: checkpoint entry ID is missing from the current session tree; rewind branches from root and logs a warning.\n- Immediate turn-end apply: rewind side effects happen only after the surrounding assistant turn finishes, not inside `RewindTool.execute()`.\n\n## Side Effects\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Replaces in-memory conversation history with the prefix ending at the checkpoint tool result.\n - Adds a hidden custom message `rewind-report` carrying the retained report.\n - Clears the active checkpoint state and pending rewind report.\n - Repositions the persisted session leaf to the checkpoint branch point and appends new session entries.\n- Filesystem\n - Persists the new `branch_summary` and `custom_message` entries into the session `.jsonl` file through normal `SessionManager` append persistence.\n - Session files are named `<ISO-timestamp-with-:-and-.-replaced>_<uuidv7>.jsonl` in the session directory; default directory selection is documented in `SessionManager.create()` as `~/.omp/agent/sessions/<encoded-cwd>/` when no override is passed.\n- User-visible prompts / interactive UI\n - The tool result itself is visible.\n - The persisted `branch_summary` becomes an LLM-visible `branchSummary` message when context is rebuilt from `SessionManager.buildSessionContext()`; `messages.ts` renders it as a user-role text message using `
|
|
84
|
+
"tools/rewind.md": "# rewind\n\n> End an active checkpoint by pruning exploratory context and retaining a concise report.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/checkpoint.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/rewind.md`\n- Key collaborators:\n - `packages/coding-agent/src/session/agent-session.ts` — validates pending rewind state, applies the actual rewind, and injects the retained report.\n - `packages/coding-agent/src/session/session-manager.ts` — branches the persisted session tree and appends persisted summary/report entries.\n - `packages/coding-agent/src/session/messages.ts` — converts persisted `branch_summary` entries into LLM-visible branch-summary messages on rebuilt context.\n - `packages/coding-agent/src/tools/index.ts` — registers the tool and shares the `checkpoint.enabled` gate.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `report` | `string` | Yes | Investigation findings. `execute()` trims it and rejects the empty result. |\n\n## Outputs\nThe tool returns a single text result plus structured details:\n\n- text body:\n - `Rewind requested.`\n - `Report captured for context replacement.`\n- `details`:\n - `report: string` — trimmed report text\n - `rewound: true`\n\nThe returned tool result is not the final rewind. `AgentSession` waits until `turn_end`, then applies the rewind side effects asynchronously.\n\n## Flow\n1. `RewindTool.createIf()` in `packages/coding-agent/src/tools/checkpoint.ts` hides the tool from subagents.\n2. `RewindTool.execute()` rejects subagent calls with `ToolError(\"Checkpoint not available in subagents.\")`.\n3. It rejects calls with no active checkpoint using `ToolError(\"No active checkpoint.\")`.\n4. It trims `params.report`; if empty, it throws `ToolError(\"Report cannot be empty.\")`.\n5. It returns a `toolResult()` with `details.report` and `details.rewound = true`.\n6. On `tool_execution_end`, `AgentSession` extracts the report from `details.report` or the first text content block and stores it in `#pendingRewindReport`.\n7. On `turn_end`, if `#pendingRewindReport` is set, `AgentSession.#applyRewind()` runs.\n8. `#applyRewind()` computes `safeCount = clamp(checkpointMessageCount, 0, agent.state.messages.length)` and calls `agent.replaceMessages(agent.state.messages.slice(0, safeCount))`.\n9. It then calls `sessionManager.branchWithSummary(checkpointEntryId, report, { startedAt })`. That moves the persisted session leaf back to the checkpoint entry and appends a new `branch_summary` entry whose `summary` is the rewind report.\n10. If `checkpointEntryId` no longer resolves, it logs a warning and falls back to `branchWithSummary(null, report, { startedAt })`, branching from root instead.\n11. `#applyRewind()` appends a hidden in-memory custom message `{ customType: \"rewind-report\", content: report, display: false }` and persists the same payload through `sessionManager.appendCustomMessageEntry(\"rewind-report\", ...)` with `details = { startedAt, rewoundAt }`.\n12. Finally it clears `#checkpointState` and `#pendingRewindReport`.\n\n## Modes / Variants\n- Normal rewind: checkpoint entry exists; session history branches from that exact entry.\n- Fallback rewind: checkpoint entry ID is missing from the current session tree; rewind branches from root and logs a warning.\n- Immediate turn-end apply: rewind side effects happen only after the surrounding assistant turn finishes, not inside `RewindTool.execute()`.\n\n## Side Effects\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Replaces in-memory conversation history with the prefix ending at the checkpoint tool result.\n - Adds a hidden custom message `rewind-report` carrying the retained report.\n - Clears the active checkpoint state and pending rewind report.\n - Repositions the persisted session leaf to the checkpoint branch point and appends new session entries.\n- Filesystem\n - Persists the new `branch_summary` and `custom_message` entries into the session `.jsonl` file through normal `SessionManager` append persistence.\n - Session files are named `<ISO-timestamp-with-:-and-.-replaced>_<uuidv7>.jsonl` in the session directory; default directory selection is documented in `SessionManager.create()` as `~/.omp/agent/sessions/<encoded-cwd>/` when no override is passed.\n- User-visible prompts / interactive UI\n - The tool result itself is visible.\n - The persisted `branch_summary` becomes an LLM-visible `branchSummary` message when context is rebuilt from `SessionManager.buildSessionContext()`; `messages.ts` renders it as a user-role text message using `packages/agent/src/compaction/prompts/branch-summary-context.md`.\n - The persisted `rewind-report` custom message also participates in rebuilt LLM context because `custom_message` entries are converted through `createCustomMessage()`.\n- Background work / cancellation\n - Rewind application is deferred to `turn_end`. There is no separate job object or cancel handle.\n\n## Limits & Caps\n- Availability is gated by `checkpoint.enabled`, default `false`, in `packages/coding-agent/src/config/settings-schema.ts`.\n- Top-level sessions only.\n- Requires exactly one active checkpoint; there is no path to name or choose among multiple checkpoints.\n- Report text must be non-empty after `trim()`.\n- Rewind restores only the message prefix recorded by `checkpointMessageCount`; there is no file restore, artifact restore, blob restore, or process restore path.\n- Persisted report/summary content is still subject to the global session persistence cap `MAX_PERSIST_CHARS = 500_000` in `packages/coding-agent/src/session/session-manager.ts`.\n\n## Errors\n- `ToolError(\"Checkpoint not available in subagents.\")` — thrown for subagent sessions.\n- `ToolError(\"No active checkpoint.\")` — thrown when no checkpoint state is present.\n- `ToolError(\"Report cannot be empty.\")` — thrown when the trimmed report is empty.\n- Missing checkpoint entry IDs during apply do not fail the tool call; `#applyRewind()` catches the error, logs `Rewind branch checkpoint missing, falling back to root`, and branches from root.\n- If the agent turn is aborted while a checkpoint is active, `AgentSession` clears checkpoint state rather than applying a delayed rewind.\n\n## Notes\n- Checkpoint selection is implicit. `rewind` always targets the single `#checkpointState` captured by the last successful `checkpoint`; there is no checkpoint list, label, or ID parameter.\n- Restored state is transcript/session-tree state only:\n - in-memory `agent.state.messages` prefix up to `checkpointMessageCount`\n - persisted session leaf reset to `checkpointEntryId` or root fallback\n - retained rewind report as `branch_summary` and hidden `rewind-report` custom message\n- Not restored:\n - filesystem contents\n - git state\n - artifacts under `packages/coding-agent/src/session/artifacts.ts`\n - blob-store payloads under `packages/coding-agent/src/session/blob-store.ts`\n - prompt history rows in `packages/coding-agent/src/session/history-storage.ts`\n - auth or other agent storage in `packages/coding-agent/src/session/agent-storage.ts`\n- There is no concurrent-edit reconciliation. If code or session-adjacent state changes during the checkpoint window, rewind does not merge or revert them; it only drops conversation context and rewires the session branch.\n- Rewind is not destructive to persisted session history. `branchWithSummary()` appends a new `branch_summary` entry and moves the leaf; it does not delete the abandoned path from the `.jsonl` session log. The active context is cut over to the new branch, but the old entries remain in session storage.\n",
|
|
85
85
|
"tools/search.md": "# search\n\n> Search file contents with a regex across files, directories, globs, and internal URLs.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/search.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/search.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/match-line-format.ts` — model-facing anchor formatting.\n - `packages/coding-agent/src/tools/path-utils.ts` — path normalization, glob splitting, internal URL resolution.\n - `packages/coding-agent/src/tools/file-recorder.ts` — file ordering for grouped output.\n - `packages/coding-agent/src/tools/grouped-file-output.ts` — grouped per-file text layout.\n - `packages/coding-agent/src/session/streaming-output.ts` — line truncation and final byte truncation.\n - `packages/coding-agent/src/config/settings-schema.ts` — default context lines.\n - `packages/natives/native/index.d.ts` — native `grep()` types exposed to TS.\n - `crates/pi-natives/src/grep.rs` — native regex/file search implementation.\n - `docs/natives-text-search-pipeline.md` — native search pipeline overview.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `pattern` | `string` | Yes | Regex pattern. `search.ts` trims it and rejects empty input. The native matcher enables multiline only when the pattern text contains a literal newline or the two-character sequence `\\\\n`. The model prompt explicitly documents literal-brace escaping such as ``interface\\\\{\\\\}``, although the native layer also auto-escapes braces that cannot be valid repetition quantifiers. |\n| `paths` | `string[]` | Yes | One or more file paths, directory paths, glob-like paths, or internal URLs. Empty strings are rejected after trimming/quote stripping. Internal URLs must resolve to a backing file and cannot contain glob characters. |\n| `i` | `boolean` | No | Case-insensitive search. Defaults to `false`. Passed to native `ignoreCase`. |\n| `gitignore` | `boolean` | No | Respect `.gitignore` during directory scans. Defaults to `true`. Passed to native `gitignore`. |\n| `skip` | `number` | No | Global match offset. Defaults to `0`. `search.ts` floors finite numbers and rejects negative or non-finite values. |\n\n## Outputs\nThe tool returns a single text block in `content[0].text` plus structured `details`.\n\n- Match lines are formatted by `formatMatchLine()` as `*<anchor>|<line>` for matches and ` <anchor>|<line>` for context.\n - Hashline mode: `*5th|content`, ` 9x}|content`.\n - Plain mode: `*5|content`, ` 9|content`.\n- Directory results are grouped by file, with `# <path>` headings and blank lines between groups.\n- `details` may include:\n - `scopePath` — formatted search scope.\n - `matchCount`, `fileCount`, `files`, `fileMatches` — counts for the returned page, not necessarily total corpus counts.\n - `matchLimitReached` — visible-page limit hit (`100`).\n - `resultLimitReached` — native preselection limit hit (`500`).\n - `linesTruncated` — one or more matched lines were shortened to `1024` chars plus `…`.\n - `truncated` and `meta.truncation` — final text output was head-truncated by `truncateHead()`.\n - `displayContent` — TUI-only rendering text with `│` gutters instead of model anchors.\n - `missingPaths` — multi-path entries skipped because their base path did not exist.\n- No-match result text is `No matches found`, optionally followed by `Skipped missing paths: ...`.\n\n## Flow\n1. `SearchTool.execute()` validates and normalizes input in `packages/coding-agent/src/tools/search.ts`:\n - trims `pattern`, rejects empty patterns;\n - normalizes `skip` to a non-negative integer;\n - reads `search.contextBefore` and `search.contextAfter` from session settings (`1` and `3` by default);\n - enables multiline only when `pattern` contains `\\n` or an actual newline.\n2. Each `paths` entry is normalized with `normalizePathLikeInput()`.\n3. Internal URLs are resolved through `session.internalRouter`:\n - glob metacharacters (`*`, `?`, `[`, `{`) are rejected for internal URLs;\n - URLs without `resource.sourcePath` fail;\n - immutable sources are tracked so output can suppress editable hashline anchors per file.\n4. For multi-path calls, `partitionExistingPaths()` skips only ENOENT entries. If every entry is missing, the tool errors.\n5. Path resolution branches:\n - one entry: `parseSearchPath()` splits `basePath` and optional glob;\n - multiple entries: `resolveExplicitSearchPaths()` computes a common base directory, brace-union glob, exact-file list, or degenerate-root target list.\n6. `search.ts` stats the resolved base path to decide file vs directory behavior.\n7. It calls native `grep()` from `@oh-my-pi/pi-natives` with:\n - `pattern`, `ignoreCase`, `multiline`, `gitignore`;\n - `hidden: true`;\n - `cache: false`;\n - `contextBefore` / `contextAfter` from settings;\n - `maxColumns: 1024`;\n - `mode: content`.\n8. Native execution happens in `crates/pi-natives/src/grep.rs`:\n - `build_matcher()` sanitizes non-quantifier braces before regex compile;\n - if compile fails with unopened/unclosed-group errors, it retries after escaping previously unescaped parentheses;\n - directory scans use the grep pipeline described in `docs/natives-text-search-pipeline.md`.\n9. Search dispatch differs by resolved path set:\n - exact explicit files or degenerate-root multi-targets: JS loops over targets and merges `grep()` results itself;\n - single file/directory base: one `grep()` call handles offset/limit natively.\n10. JS output shaping then:\n - round-robins directory matches down to `100` visible matches so one file does not monopolize the page;\n - keeps the first `100` file matches for single-file searches;\n - formats lines through `formatMatchLine()` for the model and `formatCodeFrameLine()` for TUI;\n - records non-truncated matched/context lines into the session file-read cache with `recordSparse()`.\n11. Final text is passed through `truncateHead(rawOutput, { maxLines: Number.MAX_SAFE_INTEGER })`, so the effective cap is the default byte cap from `streaming-output.ts`, not the default line cap.\n12. `toolResult()` attaches text plus limit/truncation metadata.\n\n## Modes / Variants\n1. **Single file path**\n - `grep()` searches one file.\n - Output is a flat list of match/context lines.\n - Visible limit is the first `100` matches after native offset handling.\n2. **Single directory path or single glob-like path**\n - `parseSearchPath()` may split the input into `path` + `glob`.\n - One native `grep()` scans the directory tree with `gitignore` and `hidden:true`.\n - Native `offset` handles `skip` globally across files.\n - JS round-robins the returned matches to `100` visible rows.\n3. **Multiple explicit paths/globs**\n - `resolveExplicitSearchPaths()` collapses them into a common base and either a brace-union glob, an explicit file list, or per-target searches when the only common base is the filesystem root.\n - Missing entries are skipped non-fatally unless all are missing.\n4. **Internal URL paths**\n - Supported only when the internal resource resolves to a real backing file.\n - No internal-URL globbing.\n - Immutable sources switch to the immutable display mode when formatting anchors.\n\n## Side Effects\n- Filesystem\n - Stats resolved search roots and input paths.\n - Reads matched files through native `grep()`.\n - Records sparse matched/context lines into the session file-read cache via `getFileReadCache(...).recordSparse(...)`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Reads session settings for context defaults.\n - Uses `session.internalRouter` to resolve internal URLs.\n - Populates tool `details.meta` with truncation/limit metadata.\n- Background work / cancellation\n - Wrapped in `untilAborted(signal, ...)` at the JS level.\n - `search.ts` does not pass `signal` or `timeoutMs` into native `grep()`, so native grep cancellation/timeouts are not used by this tool.\n\n## Limits & Caps\n- Visible page limit: `100` matches (`DEFAULT_MATCH_LIMIT` in `packages/coding-agent/src/tools/search.ts`).\n- Native preselection limit: `500` matches (`internalLimit = Math.min(DEFAULT_MATCH_LIMIT * 5, 2000)` in `packages/coding-agent/src/tools/search.ts`).\n- Line truncation: `1024` characters per emitted line (`DEFAULT_MAX_COLUMN` in `packages/coding-agent/src/session/streaming-output.ts`). Native grep marks truncated lines; JS reports `linesTruncated`.\n- Final text truncation: `truncateHead()` default byte cap `50 * 1024` bytes (`DEFAULT_MAX_BYTES` in `packages/coding-agent/src/session/streaming-output.ts`). `search.ts` overrides `maxLines` to `Number.MAX_SAFE_INTEGER`, so normal search output is byte-capped, not line-capped.\n- Context defaults: `search.contextBefore = 1`, `search.contextAfter = 3` in `packages/coding-agent/src/config/settings-schema.ts`.\n- Pagination: `skip` is a global match offset. In single-base searches it is pushed into native `offset`; in exact-file/multi-target aggregation it is applied in JS with `matches.slice(skip)`.\n- Native directory-scan cache: available in `grep.rs`, but this tool always sets `cache: false`.\n\n## Errors\n- `Pattern must not be empty` when trimmed `pattern` is empty.\n- `Skip must be a non-negative number` for negative or non-finite `skip`.\n- `` `paths` must contain non-empty paths or globs `` when any normalized path is empty.\n- `Glob patterns are not supported for internal URLs: ...` for internal URL + glob metacharacters.\n- `Cannot search internal URL without a backing file: ...` when the router resolves a virtual resource without `sourcePath`.\n- `Path not found: ...` when the resolved base path is missing, or when every multi-path entry is missing.\n- Regex compile failures bubble from native `grep()` as tool errors. `search.ts` has a special catch for messages beginning with `regex parse error`, then otherwise rethrows.\n- Multi-file native scans skip per-file open/search failures inside `grep.rs`; the scan continues with surviving files.\n\n## Notes\n- The model-facing prompt documents standard regex syntax plus two search-specific rules: escape literal braces, and use `\\n` or a literal newline for cross-line matching.\n- Native `build_matcher()` already auto-escapes braces that cannot be valid quantifiers, so patterns like `${platform}` become searchable instead of failing. Valid quantifiers like `a{2,4}` remain unchanged.\n- Native compile retry also escapes unescaped literal parentheses only after an unopened/unclosed-group parse error. It is a fallback, not a general parser mode.\n- Internal URLs are resolved before path existence checks. After resolution, the native layer sees ordinary filesystem paths.\n- `hidden:true` is hard-coded in `search.ts`; there is no model-facing flag to exclude dotfiles.\n- `gitignore:false` only affects native directory traversal. It does not disable the tool's own path normalization or explicit-file handling.\n- When `paths` resolves to multiple exact files, `search.ts` does not apply the native `500` match cap and reports `totalMatches` internally as the post-skip length for that branch.\n- The anchor suffix in hashline mode comes from `computeLineHash()` in `packages/coding-agent/src/hashline/hash.ts`; `search` itself only formats it.\n",
|
|
86
86
|
"tools/search_tool_bm25.md": "# search_tool_bm25\n\n> Search the hidden tool-discovery index and activate the top matches for the current session.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/search-tool-bm25.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/search-tool-bm25.md`\n- Key collaborators:\n - `packages/coding-agent/src/tool-discovery/tool-index.ts` — discoverable-tool metadata and BM25 index/search.\n - `packages/coding-agent/src/session/agent-session.ts` — session discovery mode, corpus assembly, activation, cache invalidation.\n - `packages/coding-agent/src/sdk.ts` — initial hiding of discoverable built-ins and prompt-time discoverable summary.\n - `packages/coding-agent/src/tools/index.ts` — tool-session discovery hooks, essential/discoverable load modes, registry wiring.\n - `packages/coding-agent/src/config/settings-schema.ts` — `tools.discoveryMode` and legacy `mcp.discoveryMode` settings.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `query` | `string` | Yes | Natural-language or keyword query. Trimmed before search; empty-after-trim is rejected. |\n| `limit` | `integer` | No | Max matches to return and activate. Minimum `1`. Defaults to `8` (`DEFAULT_LIMIT`). |\n\n## Outputs\n- Single-shot `AgentToolResult`.\n- Model-visible `content` is one text part containing JSON with:\n\n```json\n{\"query\":\"...\",\"activated_tools\":[\"...\"],\"match_count\":2,\"total_tools\":17}\n```\n\n- Runtime-only `details` carries the ranked matches used by the TUI renderer:\n - `query`, `limit`, `total_tools`\n - `activated_tools`: tool names activated by this call\n - `active_selected_tools`: cumulative discovered-tool selections still active\n - `tools`: array of match objects with\n - `name`\n - `label`\n - `description` (`tool.summary`; this is the only snippet-like field)\n - optional `server_name`\n - optional `mcp_tool_name`\n - `schema_keys`\n - `score` rounded to 6 decimals\n- The renderer shows a status line plus up to 5 collapsed tree items by default (`COLLAPSED_MATCH_LIMIT`), each with label, optional server name, score to 3 decimals, and truncated description. The ranked match list is not serialized into `content`.\n\n## Flow\n1. `SearchToolBm25Tool.createIf()` in `packages/coding-agent/src/tools/search-tool-bm25.ts` exposes the tool only when `tools.discoveryMode !== \"off\"` or legacy `mcp.discoveryMode === true`, and only if the session implements the discovery hooks.\n2. `description` is rendered from `packages/coding-agent/src/prompts/tools/search-tool-bm25.md` via `renderSearchToolBm25Description()`, using the current discoverable-tool list plus per-server summary/count.\n3. `execute()` re-checks capability and settings:\n - missing discovery hooks -> `ToolError(\"Tool discovery is unavailable in this session.\")`\n - discovery disabled -> `ToolError(\"Tool discovery is disabled. Enable tools.discoveryMode or mcp.discoveryMode to use search_tool_bm25.\")`\n4. `query` is trimmed and validated; `limit` is defaulted/validated.\n5. `getDiscoverableToolSearchIndexForExecution()` fetches the cached generic search index from the session when available, otherwise falls back to the legacy MCP cache, otherwise rebuilds an index from the current discoverable-tool list.\n6. `getSelectedToolNames()` reads the current discovered selections so already-selected tools can be excluded from fresh results.\n7. `searchDiscoverableTools()` in `packages/coding-agent/src/tool-discovery/tool-index.ts` tokenizes the query, scores every document with BM25, sorts by descending score then `tool.name`, and returns up to `searchIndex.documents.length` results; `execute()` then filters already-selected names and slices to `limit`.\n8. If any matches remain, `activateTools()` activates all matched tool names through `session.activateDiscoveredTools()` or legacy `activateDiscoveredMCPTools()`.\n9. `details` is assembled from the activated names, current selected names, corpus size, and formatted matches; `content` is reduced to the compact JSON summary from `buildSearchToolBm25Content()`.\n10. `searchToolBm25Renderer` renders either:\n - the structured `details` view, or\n - a fallback text-only warning block if `details` is absent.\n\n## Modes / Variants\n- Discovery-mode gating:\n - `tools.discoveryMode = \"all\"`: searches hidden discoverable built-ins plus hidden MCP tools.\n - `tools.discoveryMode = \"mcp-only\"`: searches hidden MCP tools only.\n - legacy `mcp.discoveryMode = true` with `tools.discoveryMode = \"off\"`: same as MCP-only.\n- Search-index source:\n - generic cached discoverable index from the session\n - legacy cached MCP index, cast to the generic shape\n - rebuilt ad hoc from the current discoverable-tool list if neither cache path works\n- Activation backend:\n - generic `activateDiscoveredTools()`\n - legacy `activateDiscoveredMCPTools()` fallback\n\n## Side Effects\n- Session state\n - Adds matched tools to the active session tool set through `activateDiscoveredTools()` / `activateDiscoveredMCPTools()`.\n - Updates discovered-tool selection state so repeated searches accumulate selections instead of replacing them.\n - Invalidates the cached discoverable search index when newly activated built-ins change the hidden corpus (`packages/coding-agent/src/session/agent-session.ts`).\n - Tool availability changes before the next model call in the same turn; the prompt text says this explicitly.\n- User-visible prompts / interactive UI\n - The tool description includes discoverable server summaries and total discoverable-tool count.\n - The TUI renderer shows ranked matches, but the model-visible text summary does not.\n\n## Limits & Caps\n- Default result cap: `8` (`DEFAULT_LIMIT` in `packages/coding-agent/src/tools/search-tool-bm25.ts`).\n- `limit` must be a positive integer; no tool-level upper bound beyond corpus size.\n- Renderer collapsed list cap: `5` (`COLLAPSED_MATCH_LIMIT`).\n- Renderer truncation widths:\n - label: `72` chars (`MATCH_LABEL_LEN`)\n - description: `96` chars (`MATCH_DESCRIPTION_LEN`)\n- BM25 parameters in `packages/coding-agent/src/tool-discovery/tool-index.ts`:\n - `BM25_K1 = 1.2`\n - `BM25_B = 0.75`\n- Weighted corpus fields (`FIELD_WEIGHTS`):\n - `name`: `6`\n - `label`: `4`\n - `mcpToolName`: `4`\n - `serverName`: `2`\n - `summary`: `2`\n - each `schemaKey`: `1`\n- Summary fallback length for discoverable metadata: first `200` chars of `description` when no explicit summary exists (`getDiscoverableTool()` in `packages/coding-agent/src/tool-discovery/tool-index.ts`).\n\n## Errors\n- `execute()` throws `ToolError` for unavailable discovery hooks, disabled discovery mode, empty trimmed query, and non-positive/non-integer `limit`.\n- `searchDiscoverableTools()` throws `Error(\"Query must contain at least one letter or number.\")` if tokenization produces no alphanumeric tokens; `execute()` catches `Error` and rethrows `ToolError(error.message)`.\n- Empty corpus is not an error; search returns `[]`, activation is skipped, and the renderer message becomes either `No discoverable tools are currently loaded.` or `No matching tools found.`\n- `getDiscoverableToolsForDescription()` and `getDiscoverableToolSearchIndexForExecution()` swallow discovery-hook/cache errors and fall back to an empty corpus or rebuilt index.\n\n## Notes\n- The tool wire name stays `search_tool_bm25` for persisted-session back-compat, even though the source file is `search-tool-bm25.ts`.\n- Corpus composition is session-dependent and excludes already-active tools:\n - MCP entries come from `#discoverableMCPTools`, filtered to names not currently active, mapped with `summary = description`.\n - Built-in entries appear only in `\"all\"` mode and only for registry tools whose `loadMode === \"discoverable\"` and are not currently active.\n - Hidden/internal built-ins are intentionally excluded from the built-in corpus: `resolve`, `yield`, `report_finding`, `report_tool_issue` are called out in the `#collectDiscoverableBuiltinTools()` comment.\n- `DiscoverableToolSource` includes `\"extension\"` and `\"custom\"`, but `AgentSession.getDiscoverableTools()` currently assembles only built-in and MCP sources.\n- On startup, `packages/coding-agent/src/sdk.ts` hides non-essential discoverable built-ins in `tools.discoveryMode = \"all\"`; defaults are `read`, `bash`, and `edit` unless `tools.essentialOverride` changes them.\n- Query tokenization is simple and deterministic: camelCase is split, non-alphanumerics become spaces, tokens are lowercased, and only non-empty alphanumeric tokens survive.\n- Scores are rounded differently by surface: `details.tools[].score` keeps 6 decimals; the TUI line renders 3.\n",
|
|
87
87
|
"tools/ssh.md": "# ssh\n\n> Execute one remote command on a discovered SSH host.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/ssh.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/ssh.md`\n- Key collaborators:\n - `packages/coding-agent/src/ssh/ssh-executor.ts` — runs `ssh`, captures output\n - `packages/coding-agent/src/ssh/connection-manager.ts` — master-connection reuse, host probing\n - `packages/coding-agent/src/ssh/sshfs-mount.ts` — optional `sshfs` mount side effect\n - `packages/coding-agent/src/discovery/ssh.ts` — discovers host configs\n - `packages/coding-agent/src/capability/ssh.ts` — canonical host shape\n - `packages/coding-agent/src/session/streaming-output.ts` — tail streaming, truncation, artifacts\n - `packages/coding-agent/src/tools/tool-timeouts.ts` — timeout clamp rules\n - `packages/utils/src/dirs.ts` — user/project ssh config paths\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `host` | `string` | Yes | Host name key from discovered SSH config entries, not an arbitrary hostname/IP. |\n| `command` | `string` | Yes | Remote command string passed to `ssh` as the remote command. |\n| `cwd` | `string` | No | Remote working directory. The tool prepends a shell-specific `cd`/`Set-Location` wrapper. |\n| `timeout` | `number` | No | Timeout in seconds. Default `60`; clamped to `1..3600`. |\n\n## Outputs\nThe tool returns a standard text tool result built in `packages/coding-agent/src/tools/ssh.ts`:\n\n- `content`: one text block containing combined remote stdout+stderr, or `\"(no output)\"` when empty.\n- `details.meta.truncation`: present when output exceeded the in-memory tail window; derived from the executor summary.\n\nStreaming behavior:\n\n- While the command runs, `onUpdate` receives tail-only text snapshots built from `TailBuffer` in `packages/coding-agent/src/session/streaming-output.ts`.\n- Final output is single-shot after process exit.\n\nSide-channel artifacts:\n\n- When session artifact allocation is available and output exceeds the spill threshold, full output is written to a session artifact file and the returned summary carries its `artifactId` internally.\n- The ssh tool itself does not print the `artifact://...` URI into the result text.\n\nFailure behavior:\n\n- Unknown host, missing host config, timeout, cancellation, SSH startup failure, key validation failure, or non-zero remote exit all surface as thrown `ToolError`s.\n- Non-zero remote exit includes captured output plus `Command exited with code N`.\n\n## Flow\n1. `loadSshTool()` in `packages/coding-agent/src/tools/ssh.ts` calls `loadCapability(sshCapability.id, { cwd: session.cwd })` to discover hosts.\n2. `packages/coding-agent/src/discovery/ssh.ts` loads host entries from, in this order: project managed ssh config, user managed ssh config, `ssh.json` in the repo root, `.ssh.json` in the repo root.\n3. `getSSHConfigPath(\"project\")` and `getSSHConfigPath(\"user\")` in `packages/utils/src/dirs.ts` resolve those managed files to `.omp/ssh.json` in the project and `~/.omp/agent/ssh.json` in the user config dir. This tool does not read `~/.ssh/config`.\n4. Capability loading deduplicates by host name with first item winning; provider order is priority-sorted and the SSH JSON provider registers at priority `5`.\n5. `loadHosts()` in `packages/coding-agent/src/tools/ssh.ts` builds `hostsByName` and drops later duplicates again with `if (!hostsByName.has(host.name))`.\n6. Tool description text is built from `packages/coding-agent/src/prompts/tools/ssh.md` plus an `Available hosts:` list. Each host entry calls `getHostInfoForHost()` to show detected shell/OS when cached; otherwise it renders `detecting...`.\n7. On execute, `SshTool.execute()` rejects any `host` not in the discovered host-name set.\n8. `ensureHostInfo()` in `packages/coding-agent/src/ssh/connection-manager.ts` ensures an SSH master connection exists, loads cached host info from disk if present, and probes remote OS/shell when cache is missing or stale.\n9. `buildRemoteCommand()` in `packages/coding-agent/src/tools/ssh.ts` prepends a cwd change when `cwd` is provided:\n - Unix-like or Windows compat shells: `cd -- '<cwd>' && <command>`\n - Windows PowerShell: `Set-Location -Path '<cwd>'; <command>`\n - Windows cmd: `cd /d \"<cwd>\" && <command>`\n10. `clampTimeout(\"ssh\", rawTimeout)` applies the `1..3600` second clamp from `packages/coding-agent/src/tools/tool-timeouts.ts`.\n11. `executeSSH()` in `packages/coding-agent/src/ssh/ssh-executor.ts` calls `ensureConnection(host)` again, opportunistically mounts the remote host root with `sshfs` if available, optionally wraps the command in `bash -c` or `sh -c` for Windows compat mode, then spawns `ssh` with `ptree.spawn`.\n12. Output from both stdout and stderr is piped into one `OutputSink`; chunks are sanitized and forwarded to streaming updates through `streamTailUpdates()`.\n13. On normal exit, the sink returns combined output plus truncation counters. On timeout or abort, `executeSSH()` returns `cancelled: true` and prefixes the output with a notice line such as `[SSH: ...]` or `[Command aborted: ...]`.\n14. `SshTool.execute()` converts `cancelled: true` into `ToolError`, converts non-zero exit codes into `ToolError`, otherwise returns the text result with truncation metadata.\n\n## Modes / Variants\n- **Tool unavailable**: `loadSshTool()` returns `null` when discovery finds no hosts, so the tool is not registered for that session.\n- **Unix-like target**: remote command is passed through directly, with optional `cd -- ... &&` prefix.\n- **Windows native shell**: cwd wrapper uses PowerShell `Set-Location` or cmd `cd /d`; command otherwise runs in the remote default Windows shell.\n- **Windows compat shell**: if host probing finds `bash` or `sh` on Windows, `executeSSH()` wraps the remote command as `bash -c '...'` or `sh -c '...'`. Host config can force compat on/off with `compat`.\n- **Cached vs probed host info**: shell/OS detection comes from in-memory cache, persisted JSON under the remote-host dir, or a fresh probe over SSH.\n- **Truncated vs untruncated output**: small output stays in memory; large output keeps only the last 50 KiB in memory and may spill full output to an artifact file.\n\n## Side Effects\n- Filesystem\n - Reads managed SSH config JSON plus legacy `ssh.json` / `.ssh.json`.\n - Validates private-key path existence and permissions before connecting.\n - Persists probed host info as JSON under the remote-host cache dir via `persistHostInfo()`.\n - May create the SSH control socket dir and, when `sshfs` exists, remote mount dirs.\n - May write full command output to a session artifact file.\n- Network\n - Opens SSH connections to the selected host.\n - May issue extra probe commands to detect OS/shell and compat shells.\n- Subprocesses / native bindings\n - Requires `ssh` on `PATH`; spawns it for connection checks, master startup, probing, and command execution.\n - May call `sshfs`, `mountpoint`, `fusermount`/`fusermount3`, or `umount`.\n - Sanitizes streamed text with `@oh-my-pi/pi-natives` text sanitization.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Uses session artifact allocation when available.\n - Registers postmortem cleanup hooks for SSH master connections and sshfs mounts.\n - Tool concurrency is `exclusive`, so the agent scheduler should not run multiple ssh tool calls concurrently.\n- Background work / cancellation\n - Process spawn receives the tool `AbortSignal`.\n - Cancellation/timeout ends the running ssh process and returns a cancelled result that the tool turns into an error.\n\n## Limits & Caps\n- Timeout defaults/clamps: `default=60`, `min=1`, `max=3600` in `packages/coding-agent/src/tools/tool-timeouts.ts`.\n- Output tail window: `DEFAULT_MAX_BYTES = 50 * 1024` in `packages/coding-agent/src/session/streaming-output.ts`.\n- Output sink spill threshold defaults to the same `50 KiB`; once exceeded, only the tail remains in memory.\n- SSH master reuse persistence: `ControlPersist=3600` in `packages/coding-agent/src/ssh/connection-manager.ts` and `packages/coding-agent/src/ssh/sshfs-mount.ts`.\n- SSH host info schema version: `HOST_INFO_VERSION = 2` in `packages/coding-agent/src/ssh/connection-manager.ts`; stale cache entries are reprobed.\n- Streaming tail buffer compacts after more than `10` pending chunks (`MAX_PENDING`) before trimming.\n\n## Errors\n- `Unknown SSH host: ... Available hosts: ...` when the model passes a host name not present in discovery.\n- `SSH host not loaded: ...` if the discovered-name set and `hostsByName` map diverge.\n- `ssh binary not found on PATH` when `ssh` is unavailable.\n- `SSH key not found: ...`, `SSH key is not a file: ...`, or `SSH key permissions must be 600 or stricter: ...` from key validation.\n- `Failed to start SSH master for <target>: <stderr>` when control-master startup fails.\n- Non-zero remote command exit becomes `ToolError` with captured output and `Command exited with code N`.\n- Timeout becomes a cancelled result with output notice `[SSH: <timeout message>]`, then `ToolError`.\n- Abort becomes a cancelled result with output notice `[Command aborted: <message>]`, then `ToolError`.\n- `sshfs` mount failures are logged and ignored in `executeSSH()`; they do not fail the tool call.\n- Discovery parse problems do not fail tool loading; they become capability warnings. If all sources are empty/invalid, the tool simply does not load.\n\n## Notes\n- Host discovery is JSON-based only. The tool does not parse OpenSSH config files.\n- Discovery expands environment variables recursively in the parsed JSON and expands `~` in `key`/`keyPath`.\n- Host names are capability keys; the model must pass the config key, not the raw hostname.\n- Commands run without a PTY. `executeSSH()` uses `ptree.spawn(..., { stdin: \"pipe\", stderr: \"full\" })` and does not request an interactive terminal.\n- The tool exposes `cwd` but no `env`, `pty`, upload, download, or explicit file-transfer fields.\n- Lower layers support an `artifactId` for full output and a `remotePath` mount target, but `SshTool.execute()` does not expose those knobs.\n- Both stdout and stderr are merged into one output stream; ordering is whatever arrives through the two streams.\n- `StrictHostKeyChecking=accept-new` and `BatchMode=yes` are always set for connection checks, master startup, and command runs.\n- Connection reuse is keyed by discovered host name, not by raw target tuple alone.\n- `closeAllConnections()` and sshfs unmount cleanup run through postmortem hooks, not per-call teardown.\n",
|