@oh-my-pi/pi-coding-agent 15.5.12 → 15.5.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +46 -0
- package/dist/types/config/model-registry.d.ts +1 -1
- package/dist/types/config/models-config-schema.d.ts +2 -0
- package/dist/types/config/settings-schema.d.ts +1 -10
- package/dist/types/edit/file-snapshot-store.d.ts +19 -0
- package/dist/types/eval/__tests__/llm-bridge.test.d.ts +1 -0
- package/dist/types/eval/llm-bridge.d.ts +25 -0
- package/dist/types/export/html/template.generated.d.ts +1 -1
- package/dist/types/extensibility/plugins/legacy-pi-compat.d.ts +15 -0
- package/dist/types/modes/theme/theme.d.ts +2 -1
- package/dist/types/session/agent-session.d.ts +2 -0
- package/dist/types/tools/index.d.ts +0 -1
- package/package.json +8 -8
- package/src/config/model-registry.ts +89 -5
- package/src/config/models-config-schema.ts +1 -1
- package/src/config/settings-schema.ts +1 -10
- package/src/edit/file-snapshot-store.ts +34 -0
- package/src/edit/hashline/diff.ts +3 -8
- package/src/edit/renderer.ts +1 -1
- package/src/eval/__tests__/llm-bridge.test.ts +297 -0
- package/src/eval/js/shared/prelude.txt +8 -0
- package/src/eval/js/tool-bridge.ts +4 -0
- package/src/eval/llm-bridge.ts +181 -0
- package/src/eval/py/prelude.py +52 -31
- package/src/export/html/template.generated.ts +1 -1
- package/src/export/html/template.js +0 -13
- package/src/extensibility/plugins/legacy-pi-compat.ts +60 -23
- package/src/internal-urls/docs-index.generated.ts +4 -5
- package/src/main.ts +4 -0
- package/src/modes/components/model-selector.ts +119 -22
- package/src/modes/components/status-line/presets.ts +1 -0
- package/src/modes/components/status-line/segments.ts +23 -0
- package/src/modes/interactive-mode.ts +22 -87
- package/src/modes/theme/theme.ts +7 -0
- package/src/prompts/tools/eval.md +2 -0
- package/src/session/agent-session.ts +19 -0
- package/src/session/session-manager.ts +47 -0
- package/src/tools/ast-edit.ts +1 -1
- package/src/tools/ast-grep.ts +6 -17
- package/src/tools/eval.ts +24 -48
- package/src/tools/index.ts +0 -4
- package/src/tools/read.ts +23 -33
- package/src/tools/renderers.ts +0 -2
- package/src/tools/search.ts +12 -21
- package/src/tools/write.ts +1 -3
- package/src/utils/file-mentions.ts +1 -3
- package/dist/types/tools/calculator.d.ts +0 -77
- package/src/prompts/tools/calculator.md +0 -10
- package/src/tools/calculator.ts +0 -541
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
// Auto-generated by scripts/generate-docs-index.ts - DO NOT EDIT
|
|
2
2
|
|
|
3
|
-
export const EMBEDDED_DOC_FILENAMES: readonly string[] = ["ERRATA-GPT5-HARMONY.md","ai-schema-normalize.md","approval-mode.md","auth-broker-gateway.md","bash-tool-runtime.md","blob-artifact-architecture.md","compaction.md","config-usage.md","custom-tools.md","environment-variables.md","extension-loading.md","extensions.md","fs-scan-cache-architecture.md","gemini-manifest-extensions.md","handoff-generation-pipeline.md","hooks.md","install-id.md","keybindings.md","lsp-config.md","marketplace.md","mcp-config.md","mcp-protocol-transports.md","mcp-runtime-lifecycle.md","mcp-server-tool-authoring.md","memory.md","models.md","natives-addon-loader-runtime.md","natives-architecture.md","natives-binding-contract.md","natives-build-release-debugging.md","natives-media-system-utils.md","natives-rust-task-cancellation.md","natives-shell-pty-process.md","natives-text-search-pipeline.md","non-compaction-retry-policy.md","notebook-tool-runtime.md","plugin-manager-installer-plumbing.md","porting-from-pi-mono.md","porting-to-natives.md","provider-streaming-internals.md","python-repl.md","render-mermaid.md","resolve-tool-runtime.md","rpc.md","rulebook-matching-pipeline.md","sdk.md","secrets.md","session-operations-export-share-fork-resume.md","session-switching-and-recent-listing.md","session-tree-plan.md","session.md","skills.md","skills/authoring-extensions.md","skills/authoring-hooks.md","skills/authoring-marketplaces.md","skills/examples/hello-extension/README.md","skills/examples/mini-marketplace/README.md","skills/examples/safety-hook/README.md","slash-command-internals.md","task-agent-discovery.md","theme.md","tools/ask.md","tools/ast-edit.md","tools/ast-grep.md","tools/bash.md","tools/browser.md","tools/
|
|
3
|
+
export const EMBEDDED_DOC_FILENAMES: readonly string[] = ["ERRATA-GPT5-HARMONY.md","ai-schema-normalize.md","approval-mode.md","auth-broker-gateway.md","bash-tool-runtime.md","blob-artifact-architecture.md","compaction.md","config-usage.md","custom-tools.md","environment-variables.md","extension-loading.md","extensions.md","fs-scan-cache-architecture.md","gemini-manifest-extensions.md","handoff-generation-pipeline.md","hooks.md","install-id.md","keybindings.md","lsp-config.md","marketplace.md","mcp-config.md","mcp-protocol-transports.md","mcp-runtime-lifecycle.md","mcp-server-tool-authoring.md","memory.md","models.md","natives-addon-loader-runtime.md","natives-architecture.md","natives-binding-contract.md","natives-build-release-debugging.md","natives-media-system-utils.md","natives-rust-task-cancellation.md","natives-shell-pty-process.md","natives-text-search-pipeline.md","non-compaction-retry-policy.md","notebook-tool-runtime.md","plugin-manager-installer-plumbing.md","porting-from-pi-mono.md","porting-to-natives.md","provider-streaming-internals.md","python-repl.md","render-mermaid.md","resolve-tool-runtime.md","rpc.md","rulebook-matching-pipeline.md","sdk.md","secrets.md","session-operations-export-share-fork-resume.md","session-switching-and-recent-listing.md","session-tree-plan.md","session.md","skills.md","skills/authoring-extensions.md","skills/authoring-hooks.md","skills/authoring-marketplaces.md","skills/examples/hello-extension/README.md","skills/examples/mini-marketplace/README.md","skills/examples/safety-hook/README.md","slash-command-internals.md","task-agent-discovery.md","theme.md","tools/ask.md","tools/ast-edit.md","tools/ast-grep.md","tools/bash.md","tools/browser.md","tools/checkpoint.md","tools/debug.md","tools/edit.md","tools/eval.md","tools/find.md","tools/github.md","tools/inspect_image.md","tools/irc.md","tools/job.md","tools/lsp.md","tools/read.md","tools/recall.md","tools/recipe.md","tools/reflect.md","tools/render_mermaid.md","tools/resolve.md","tools/retain.md","tools/rewind.md","tools/search.md","tools/search_tool_bm25.md","tools/ssh.md","tools/task.md","tools/todo_write.md","tools/web_search.md","tools/write.md","tree.md","ttsr-injection-lifecycle.md","tui-runtime-internals.md","tui.md"];
|
|
4
4
|
|
|
5
5
|
export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
|
|
6
6
|
"ERRATA-GPT5-HARMONY.md": "# ERRATA — GPT-5 Harmony-Header Leakage\n\n## 1. The problem\n\nOpenAI frames tool calls in the Harmony chat protocol:\n\n```\n<|start|>assistant<|channel|>commentary to=functions.<NAME><|message|>{ARGS}<|call|>\n```\n\n`<|channel|>commentary to=functions.NAME` is the **routing header** —\ncontrol tokens consumed by the runtime to dispatch the call. These\ntokens never appear as content under normal operation; the runtime\nstrips them.\n\nThe defect: gpt-5 models occasionally emit, **as ordinary content\ninside `{ARGS}`**, the **plain-text shadow** of these routing tokens —\nthe same characters without the `<|…|>` brackets — and continue\nproducing more pseudo-routing structure (channel name, body marker,\nmultilingual spam, fake tool-result framing). The contamination lives\ninside the visible tool argument and is dispatched to the tool as if it\nwere intended content.\n\n**Critical detail.** The actual `<|start|>` / `<|channel|>` /\n`<|message|>` / `<|call|>` special tokens almost never appear in tool\nargs. What leaks is the bracket-less spelling — `analysis to=functions.X\ncode …` — because OpenAI applies a logit mask suppressing the\ncontrol-token IDs inside the args region. The mass that would have gone\nto those special tokens redistributes onto the un-bracketed plain-text\nrepresentation the model also learned. This makes the leak structurally\ninvisible to the routing parser and lands it in the tool input verbatim.\n\nManifestation in tool args (real corpus example):\n\n```\n~ add_function(iso, ctx, ns, \"installSystemChangeObserver\",\n os_install_system_change_observer);】【\"】【analysis to=functions.edit\n code above เงินไทยฟรีuser to=functions.edit code …\n```\n\nThe leading code is real and intended. Everything after the first\nnon-Latin token through the next clean structural boundary is corruption.\n\n---\n\n## 2. Observed statistics & failure modes\n\nSource: `~/.omp/stats.db` (`ss_tool_calls`, `ss_assistant_msgs`), through\n2026-05-10. 1.05M tool calls scanned.\n\n### 2.1 Rate\n\n| Model | Leaks in tool args | Calls | per million |\n|------------------|-------------------:|--------:|------------:|\n| gpt-5.4 | 37 | 226,957 | 163 |\n| gpt-5.3-codex | 17 | 112,243 | 151 |\n| gpt-5.5 | 2 | 80,750 | 25 |\n| gpt-5.2-codex | 0 | — | — |\n\nPlus 15 hits in assistant visible text / thinking blobs.\n\n### 2.2 Tool distribution\n\n| Tool | Hits |\n|---------------------|-----:|\n| `edit` | 38 |\n| `eval` | 11 |\n| `report_tool_issue` | 3 |\n| `grep`/`read`/`search`/`yield` | 1 each |\n\nConcentrated in tools with free-form (non-JSON-schema) argument formats.\n\n### 2.3 Leak shape (deterministic)\n\n```\nLEAK ::= JUNK_PREFIX MARKER CHANNEL_BODY (LEAK)?\nMARKER ::= \"to=functions.\" TOOL_NAME\nCHANNEL_BODY ::= \" code \" (SPAM | reasoning_prose | fake_tool_output)*\nJUNK_PREFIX ::= (GLITCH_TOKEN | CHANNEL_WORD | NON_LATIN_RUN | \"}\" | \"】【\")+\n```\n\n**Cascading is common.** Of 96 marker occurrences across 71 contaminated\nrecords, 39 contain ≥2 markers and 7 contain ≥3 — the model emits\nmultiple fake `to=functions.X code …` blocks back-to-back, often with\nfake `code_output\\nCell N:\\n…` framing between them. Once the\nplain-text scaffolding is in the residual stream, the prefix now *looks\nlike* a fresh tool envelope start, so the macro prior over continuations\nkeeps voting for more scaffolding. Self-amplifying.\n\n### 2.4 Glitch tokens\n\nSingle-token identifiers in `o200k_base` whose embeddings appear to be\nnear-init from underrepresentation in post-training. ASCII residue\nimmediately before the marker in the natural corpus:\n\n| Surface string | Single-token | Token ID | Hits in corpus |\n|-------------------|:-:|---------:|---:|\n| `Japgolly` | ✅ | 199,745 | 1 |\n| `Jsii` | ✅ | 114,318 | (subtoken of `Jsii_commentary`) |\n| `Jsii_commentary` | — (3 toks) | — | 2 |\n| `changedFiles` | — (2 toks) | — | 8 |\n| `RTLU` | — (2 toks) | — | 3 |\n\n`Japgolly` is in the last 0.13% of the vocabulary — the same family of\nGitHub-corpus residue that produced `SolidGoldMagikarp` in the 2023\nGPT-2 vocabulary (Rumbelow & Watkins). `SolidGoldMagikarp` itself\ntokenizes to 5 tokens in `o200k_base` — that specific token was retired,\nbut the class wasn't.\n\nFor the multi-token entries, the corpus-level signature is the surface\nstring; the underlying glitch trigger is a sub-token (e.g. `Jsii` inside\n`Jsii_commentary`). The detector list (`G` signal) keys on the surface\nstrings.\n\nStable across unrelated sessions. Treated as a high-precision detector\nsignal.\n\n### 2.5 Channel-word leakage\n\n`analysis` (5), `assistant` (5), `commentary` (3), `user` (1) appear\ndirectly preceding `to=`. Always bare words; never `<|channel|>analysis`\nor any other bracketed form. Consistent with §1 — the brackets are\nmasked, the words are not.\n\n### 2.6 Non-Latin spam residue\n\n96 marker hits, by script: CJK 40, Cyrillic 12, Telugu/Kannada/Malayalam\n18, Thai 8, Georgian 7, Armenian 7, Arabic 1. Recurring fragments are\nChinese gambling SEO (`大发时时彩`, `天天中彩票`), Georgian/Abkhaz junk,\nand Thai casino spam — well-known low-quality crawl residue.\n\nThis is the same script distribution observed in the controlled\nreproduction (§7.3), independent of the prompt's natural language.\n\n### 2.7 Failure-mode breakdown for the `edit` tool\n\nThe `edit` tool exists in two variants in the corpus:\n\n| Variant | Calls | Recovery |\n|--------------------------|------:|----------|\n| Patch-DSL (`§PATH`/anchor/`«»≔` ops) | 27 | **Recoverable** by op-truncation (§3.3) |\n| JSON-schema (`{path,edits:[…]}`) | 11 | **Not recoverable** — contamination is escaped *inside* JSON strings, parser accepts it cleanly, content would be written verbatim into source files |\n\nFor Patch-DSL leaks specifically:\n\n- 20/27 cases: contamination on the last input line; nothing follows.\n- 7/27 cases: contamination mid-input; what follows is one of: a\n duplicate replay of an earlier file/anchor, intended content for a\n *different* tool call (the model started its next call inline), or\n pure hallucination. Post-contamination content is never trustworthy.\n\n### 2.8 Mechanism (confirmed)\n\n**Prior collapse from null-embedding glitch tokens, into a\ncontrol-token-masked basin whose mass redistributes onto the\nplain-text shadow of the Harmony protocol.**\n\nStep by step:\n\n1. The model is mid-`{ARGS}` of a Harmony tool call. The runtime applies\n a logit mask suppressing structural control tokens (`<|channel|>`,\n `<|message|>`, `<|call|>`, `<|start|>`, `<|end|>`) inside the args\n region. Without this mask, normal generation would constantly\n hallucinate envelope-closes; with it, those token IDs have logit\n `-∞` in args.\n2. A glitch token `g` is sampled. By construction `g` was in the BPE\n merge corpus but barely in LM/RL training, so its **input embedding\n `e_g` ≈ near-init noise of small norm**.\n3. At position t+1, the residual update `h_{t+1} ≈ LN(h_t + e_g + Attn +\n MLP)` is dominated by the prefix-derived terms; the just-emitted-token\n signal is effectively absent. Generation diversity normally comes\n from `e_x` steering the residual into different sub-regions —\n stripped here.\n4. The next-token distribution therefore collapses onto the **conditional\n prior over continuations of the prefix, with local conditioning\n removed**. In a tool-calling rollout context, that prior is sharply\n peaked on Harmony scaffolding (control tokens + routing tokens) —\n that's what RL trained.\n5. The mask zeros the control-token IDs. Mass redistributes onto the\n **next-best continuation**: the un-bracketed surface-form spelling of\n the same protocol (`analysis`, `commentary`, ` to=functions.X`,\n ` code `). This spelling is unmasked because those characters are\n ordinary tokens.\n6. Once a few tokens of plain-text scaffolding land in the residual\n stream, the prefix now resembles a fresh envelope start. The macro\n prior keeps voting for more scaffolding. Cascading (§2.3) follows.\n7. Multilingual spam after the marker is the same prior-collapse\n continuation, drawn from the training neighborhood of the glitch\n token (often ESL/auto-generated multilingual web junk — exactly the\n crawl residue in §2.6).\n\n**Two corollaries the corpus data demanded but only the experiment\nexplained:**\n\n- **The brackets never appear** (§1, §2.5). The mask is what makes the\n leak land in plain text instead of as a real envelope-close.\n- **Counterintuitive grammar dependency** (§7.4). The leak is *worse* in\n formats closest to OpenAI's training distribution. Off-distribution\n custom grammars dampen the macro-prior basin; the official\n `*** Begin Patch` format is the strongest collapse target.\n\nThe 2023 SolidGoldMagikarp paper documented mechanism (1)+(2)+(4). The\nnew piece is (5): when constrained decoding masks the natural collapse\ntarget, the mass laundered through the un-masked plain-text shadow\nbecomes a structurally-invisible exfiltration channel.",
|
|
@@ -28,7 +28,7 @@ export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
|
|
|
28
28
|
"mcp-runtime-lifecycle.md": "# MCP runtime lifecycle\n\nThis document describes how MCP servers are discovered, connected, exposed as tools, refreshed, and torn down in the coding-agent runtime.\n\n## Lifecycle at a glance\n\n1. **SDK startup** calls `discoverAndLoadMCPTools()` (unless MCP is disabled).\n2. **Discovery** (`loadAllMCPConfigs`) resolves MCP server configs from capability sources, filters disabled/project/Exa entries, and preserves source metadata.\n3. **Manager connect phase** (`MCPManager.connectServers`) starts per-server connect + `tools/list` in parallel.\n4. **Fast startup gate** waits up to 250ms, then may return:\n - fully loaded `MCPTool`s,\n - failures per server,\n - or cached `DeferredMCPTool`s for still-pending servers.\n5. **SDK wiring** merges MCP tools into runtime tool registry for the session.\n6. **Post-connect enrichment** best-effort loads resources, resource templates, prompts, and optional resource subscriptions.\n7. **Live session** can refresh MCP tools via `/mcp` flows (`disconnectAll` + rediscover + `session.refreshMCPTools`) and can reconnect individual servers on transport close or `/mcp reconnect`.\n8. **Teardown** happens when callers invoke `disconnectServer`/`disconnectAll`; manager also clears MCP tool/resource/prompt registrations for disconnected servers.\n\n## Discovery and load phase\n\n### Entry path from SDK\n\n`createAgentSession()` in `src/sdk.ts` performs MCP startup when `enableMCP` is true (default):\n\n- calls `discoverAndLoadMCPTools(cwd, { ... })`,\n- passes `authStorage`, cache storage, and `mcp.enableProjectConfig` setting,\n- always sets `filterExa: true`,\n- logs per-server load/connect errors,\n- stores returned manager in `toolSession.mcpManager` and session result.\n\nIf `enableMCP` is false, MCP discovery is skipped entirely.\n\n### Config discovery and filtering\n\n`loadAllMCPConfigs()` (`src/mcp/config.ts`) loads canonical MCP server items through capability discovery, then converts to legacy `MCPServerConfig`.\n\nFiltering behavior:\n\n- `enableProjectConfig: false` removes project-level entries (`_source.level === \"project\"`).\n- `enabled: false` servers are skipped before connect attempts.\n- Exa servers are filtered out by default and API keys are extracted for native Exa tool integration.\n\nResult includes both `configs` and `sources` (metadata used later for provider labeling).\n\n### Discovery-level failure behavior\n\n`discoverAndLoadMCPTools()` distinguishes two failure classes:\n\n- **Discovery hard failure** (exception from `manager.discoverAndConnect`, typically from config discovery): returns an empty tool set and one synthetic error `{ path: \".mcp.json\", error }`.\n- **Per-server runtime/connect failure**: manager returns partial success with `errors` map; other servers continue.\n\nSo startup does not fail the whole agent session when individual MCP servers fail.\n\n## Manager state model\n\n`MCPManager` tracks runtime lifecycle with separate registries:\n\n- `#connections: Map<string, MCPServerConnection>` — fully connected servers.\n- `#pendingConnections: Map<string, Promise<MCPServerConnection>>` — handshake in progress.\n- `#pendingToolLoads: Map<string, Promise<{ connection, serverTools }>>` — connected but tools still loading.\n- `#tools: CustomTool[]` — current MCP tool view exposed to callers.\n- `#sources: Map<string, SourceMeta>` — provider/source metadata even before connect completes.\n- `#pendingReconnections: Map<string, Promise<MCPServerConnection | null>>` — reconnects in progress after a dropped transport or explicit reconnect.\n- `#serverConfigs: Map<string, MCPServerConfig>` — original unresolved configs preserved so reconnect can re-resolve credentials without leaking resolved tokens.\n\n`getConnectionStatus(name)` derives status from these maps:\n\n- `connected` if in `#connections`,\n- `connecting` if pending connect, pending tool load, or pending reconnect,\n- `disconnected` otherwise.\n\n## Connection establishment and startup timing\n\n## Per-server connect pipeline\n\nFor each discovered server in `connectServers()`:\n\n1. store/update source metadata,\n2. skip if already connected/pending/reconnecting,\n3. validate transport fields (`validateServerConfig`),\n4. resolve auth/shell substitutions (`#resolveAuthConfig`),\n5. call `connectToServer(name, resolvedConfig)` with manager notification/request handlers,\n6. wire HTTP OAuth refresh and transport `onClose` reconnect handling,\n7. call `listTools(connection)`,\n8. cache tool definitions (`MCPToolCache.set`) best-effort,\n9. best-effort load resources, resource templates, prompts, and subscriptions after tools load.\n\n`connectToServer()` behavior (`src/mcp/client.ts`):\n\n- creates stdio or HTTP/SSE transport,\n- performs MCP `initialize`,\n- for HTTP/SSE, starts the optional background SSE listener before `notifications/initialized`,\n- sends `notifications/initialized`,\n- uses timeout (`OMP_MCP_TIMEOUT_MS`, `config.timeout`, or 30s default; `0` disables the client-side timeout),\n- closes transport on init failure.\n\n### Fast startup gate + deferred fallback\n\n`connectServers()` waits on a race between:\n\n- all connect/tool-load tasks settled, and\n- `STARTUP_TIMEOUT_MS = 250`.\n\nAfter 250ms:\n\n- fulfilled tasks become live `MCPTool`s,\n- rejected tasks produce per-server errors,\n- still-pending tasks:\n - use cached tool definitions if available (`MCPToolCache.get`) to create `DeferredMCPTool`s,\n - otherwise block until those pending tasks settle.\n\nThis is a hybrid startup model: fast return when cache is available, correctness wait when cache is not.\n\n### Background completion behavior\n\nEach pending `toolsPromise` also has a background continuation that eventually:\n\n- replaces that server’s tool slice in manager state via `#replaceServerTools`,\n- writes cache,\n- logs late failures only after startup (`allowBackgroundLogging`).\n\n## Tool exposure and live-session availability\n\n### Startup registration\n\n`discoverAndLoadMCPTools()` converts manager tools into `LoadedCustomTool[]` and decorates paths (`mcp:<server> via <providerName>` when known).\n\n`createAgentSession()` then pushes these tools into `customTools`, which are wrapped and added to the runtime tool registry with names like `mcp__<server>_<tool>`.\n\n### Tool calls\n\n- `MCPTool` calls tools through an already connected `MCPServerConnection`.\n- `DeferredMCPTool` waits for `waitForConnection(server)` before calling; this allows cached tools to exist before connection is ready.\n- Both attempt a reconnect + single retry for retriable connection failures.\n\nBoth return structured tool output and convert remaining transport/tool errors into `MCP error: ...` tool content (abort remains abort).\n\n## Refresh/reload paths (startup vs live reload)\n\n### Initial startup path\n\n- one-time discovery/load in `sdk.ts`,\n- tools are registered in initial session tool registry.\n\n### Interactive reload path\n\n`/mcp reload` path (`src/modes/controllers/mcp-command-controller.ts`) does:\n\n1. `mcpManager.disconnectAll()`,\n2. `mcpManager.discoverAndConnect()`,\n3. `session.refreshMCPTools(mcpManager.getTools())`.\n\n`session.refreshMCPTools()` (`src/session/agent-session.ts`) removes all `mcp__` tools, re-wraps latest MCP tools, and re-activates tool set so MCP changes apply without restarting session.\n\nThere is also a follow-up path for late connections: after waiting for a specific server, if status becomes `connected`, it re-runs `session.refreshMCPTools(...)` so newly available tools are rebound in-session.\n\n## Health, reconnect, and partial failure behavior\n\nCurrent runtime behavior is connection-event driven:\n\n- **No autonomous polling health monitor** in manager/client.\n- **Automatic reconnect is wired to `transport.onClose`** for managed connections.\n- Reconnect retries with backoff (`500`, `1000`, `2000`, `4000` ms), reloads tools, and notifies consumers on success.\n- Tool calls that see retriable connection errors also attempt one reconnect + retry.\n- Reconnect is also explicit via `/mcp reconnect <name>` or broader `/mcp reload`.\n\nOperationally:\n\n- one server failing does not remove tools from healthy servers,\n- connect/list failures are isolated per server,\n- stale tools may remain visible while reconnect is attempted; calls report MCP errors if recovery fails,\n- tool cache, resource/prompt loading, subscriptions, and background updates are best-effort (warnings/errors logged, no hard stop).\n\n## Teardown semantics\n\n### Server-level teardown\n\n`disconnectServer(name)`:\n\n- removes pending entries, source metadata, saved config, resource refresh/subscription state,\n- detaches `onClose` so explicit close does not trigger reconnect,\n- closes transport if connected,\n- filters manager tool state for names beginning with `mcp__${name}_`.\n\n### Global teardown\n\n`disconnectAll()`:\n\n- detaches `onClose` for all active transports, then closes them with `Promise.allSettled`,\n- clears pending maps, sources, saved configs, connections, subscriptions, resource refreshes, and manager tool list.\n\nIn current wiring, explicit teardown is used in MCP command flows (for reload/remove/disable). Startup stores the manager on the session; callers that need deterministic MCP shutdown should invoke manager disconnect methods.\n\n## Failure modes and guarantees\n\n| Scenario | Behavior | Hard fail vs best-effort |\n| ---------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------- | ------------------------------ |\n| Discovery throws (capability/config load path) | Loader returns empty tools + synthetic `.mcp.json` error | Best-effort session startup |\n| Invalid server config | Server skipped with validation error entry | Best-effort per server |\n| Connect timeout/init failure | Server error recorded; others continue | Best-effort per server |\n| `tools/list` still pending at startup with cache hit | Deferred tools returned immediately | Best-effort fast startup |\n| `tools/list` still pending at startup without cache | Startup waits for pending to settle | Hard wait for correctness |\n| Late background tool-load failure | Logged after startup gate | Best-effort logging |\n| Runtime dropped transport | Manager attempts reconnect; stale tools remain while reconnecting and future calls may retry once or fail with MCP errors | Best-effort automatic recovery |\n\n## Public API surface\n\n`src/mcp/index.ts` re-exports loader/manager/client APIs for external callers. `src/sdk.ts` exposes `discoverMCPServers()` as a convenience wrapper returning the same loader result shape.\n\n## Implementation files\n\n- [`src/mcp/loader.ts`](../packages/coding-agent/src/mcp/loader.ts) — loader facade, discovery error normalization, `LoadedCustomTool` conversion.\n- [`src/mcp/manager.ts`](../packages/coding-agent/src/mcp/manager.ts) — lifecycle state registries, parallel connect/list flow, refresh/disconnect.\n- [`src/mcp/client.ts`](../packages/coding-agent/src/mcp/client.ts) — transport setup, initialize handshake, list/call/disconnect.\n- [`src/mcp/index.ts`](../packages/coding-agent/src/mcp/index.ts) — MCP module API exports.\n- [`src/sdk.ts`](../packages/coding-agent/src/sdk.ts) — startup wiring into session/tool registry.\n- [`src/mcp/config.ts`](../packages/coding-agent/src/mcp/config.ts) — config discovery/filtering/validation used by manager.\n- [`src/mcp/tool-bridge.ts`](../packages/coding-agent/src/mcp/tool-bridge.ts) — `MCPTool` and `DeferredMCPTool` runtime behavior.\n- [`src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts) — `refreshMCPTools` live rebinding.\n- [`src/modes/controllers/mcp-command-controller.ts`](../packages/coding-agent/src/modes/controllers/mcp-command-controller.ts) — interactive reload/reconnect flows.\n- [`src/task/executor.ts`](../packages/coding-agent/src/task/executor.ts) — subagent MCP proxying via parent manager connections.\n",
|
|
29
29
|
"mcp-server-tool-authoring.md": "# MCP server and tool authoring\n\nThis document explains how MCP server definitions become callable `mcp__*` tools in coding-agent, and what operators should expect when configs are invalid, duplicated, disabled, or auth-gated.\n\n## Architecture at a glance\n\n```text\nConfig sources (.omp/.claude/.cursor/.vscode/mcp.json, mcp.json, etc.)\n -> discovery providers normalize to canonical MCPServer\n -> capability loader dedupes by server name (higher provider priority wins)\n -> loadAllMCPConfigs converts to MCPServerConfig + skips enabled:false\n -> MCPManager connects/listTools (with auth/header/env resolution)\n -> manager best-effort loads resources/prompts and subscribes to resource updates when enabled\n -> MCPTool/DeferredMCPTool bridge exposes tools as mcp__<server>_<tool>\n -> AgentSession.refreshMCPTools replaces live MCP tools immediately\n```\n\n## 1) Server config model and validation\n\n`src/mcp/types.ts` defines the authoring shape used by MCP config writers and runtime:\n\n- `stdio` (default when `type` missing): requires `command`, optional `args`, `env`, `cwd`\n- `http`: requires `url`, optional `headers`\n- `sse`: requires `url`, optional `headers` (kept for compatibility)\n- shared fields: `enabled`, `timeout`, `auth`, `oauth`\n\n`validateServerConfig()` (`src/mcp/config.ts`) enforces transport basics:\n\n- rejects configs that set both `command` and `url`\n- requires `command` for stdio\n- requires `url` for http/sse\n- rejects unknown `type`\n\n`config-writer.ts` applies this validation for add/update operations and also validates server names:\n\n- non-empty\n- max 100 chars\n- only `[a-zA-Z0-9_.-]`\n\n### Transport pitfalls\n\n- `type` omitted means stdio. If you intended HTTP/SSE but omitted `type`, `command` becomes mandatory.\n- `sse` is still accepted but treated as HTTP transport internally (`createHttpTransport`).\n- Validation is structural, not reachability: a syntactically valid URL can still fail at connect time.\n\n## 2) Discovery, normalization, and precedence\n\n### Capability-based discovery\n\n`loadAllMCPConfigs()` (`src/mcp/config.ts`) loads canonical `MCPServer` items via `loadCapability(mcpCapability.id)`.\n\nThe capability layer (`src/capability/index.ts`) then:\n\n1. loads providers in priority order\n2. dedupes by `server.name` (first win = highest priority)\n3. validates deduped items\n\nResult: duplicate server names across sources are not merged. One definition wins; lower-priority duplicates are shadowed.\n\n### `.mcp.json` and related files\n\nThe dedicated fallback provider in `src/discovery/mcp-json.ts` reads project-root `mcp.json` and `.mcp.json` (low priority).\n\nIn practice MCP servers also come from higher-priority providers (for example native `.omp/...` and tool-specific config dirs). Authoring guidance:\n\n- Prefer `.omp/mcp.json` (project) or `~/.omp/agent/mcp.json` (user) for explicit control.\n- Use root `mcp.json` / `.mcp.json` when you need fallback compatibility.\n- Reusing the same server name in multiple sources causes precedence shadowing, not merge.\n\n### Normalization behavior\n\n`convertToLegacyConfig()` (`src/mcp/config.ts`) maps canonical `MCPServer` to runtime `MCPServerConfig`.\n\nKey behavior:\n\n- transport inferred as `server.transport ?? (command ? \"stdio\" : url ? \"http\" : \"stdio\")`\n- disabled servers (`enabled === false`) are dropped before connection\n- optional fields are preserved when present\n\n### Environment expansion during discovery\n\n`mcp-json.ts` expands env placeholders in string fields with `expandEnvVarsDeep()`:\n\n- supports `${VAR}` and `${VAR:-default}`\n- unresolved values remain literal `${VAR}` strings\n\n`mcp-json.ts` also performs runtime type checks for user JSON and logs warnings for invalid `enabled`/`timeout` values instead of hard-failing the whole file.\n\n## 3) Auth and runtime value resolution\n\n`MCPManager.prepareConfig()`/`#resolveAuthConfig()` (`src/mcp/manager.ts`) is the final pre-connect pass.\n\n### OAuth credential injection\n\nIf config has:\n\n```ts\nauth: { type: \"oauth\", credentialId: \"...\" }\n```\n\nand credential exists in auth storage:\n\n- `http`/`sse`: injects `Authorization: Bearer <access_token>` header\n- `stdio`: injects `OAUTH_ACCESS_TOKEN` env var\n\nIf credential lookup fails, manager logs a warning and continues with unresolved auth.\n\n### Header/env value resolution\n\nBefore connect, manager resolves stdio `env` values and HTTP/SSE `headers` values via `resolveConfigValue()` (`src/config/resolve-config-value.ts`):\n\n- value starting with `!` => execute shell command, use trimmed stdout (cached)\n- failed, timed-out, or whitespace-only commands produce `undefined`, so that entry is omitted\n- otherwise, treat value as environment variable name first (`process.env[name]`), fallback to literal value\n\nOperational caveat: a mistyped `!` secret command can silently remove that header/env entry, producing downstream 401/403 or server startup failures. A mistyped environment variable name is sent literally unless that literal happens to be meaningful to the server.\n\n## 4) Tool bridge: MCP -> agent-callable tools\n\n`src/mcp/tool-bridge.ts` converts MCP tool definitions into `CustomTool`s.\n\n### Naming and collision domain\n\nTool names are generated as:\n\n```text\nmcp__<sanitized_server_name>_<sanitized_tool_name>\n```\n\nRules:\n\n- lowercases\n- non-`[a-z_]` chars become `_`\n- repeated underscores collapse\n- redundant `<server>_` prefix in tool name is stripped once\n\nThis avoids many collisions, but not all. Different raw names can still sanitize to the same identifier (for example `my-server` and `my.server` both sanitize similarly), and registry insertion is last-write-wins.\n\n### Schema mapping\n\n`tool-bridge.ts` passes each MCP `inputSchema` through `sanitizeSchemaForMCP()` before registering it as a `CustomTool` schema.\n\n### Execution mapping\n\n`MCPTool.execute()` / `DeferredMCPTool.execute()`:\n\n- calls MCP `tools/call`\n- flattens MCP content into displayable text\n- returns structured details (`serverName`, `mcpToolName`, provider metadata)\n- maps server-reported `isError` to `Error: ...` text result\n- attempts reconnect + one retry for retriable connection errors\n- maps remaining thrown transport/runtime failures to `MCP error: ...`\n- preserves abort semantics by translating AbortError into `ToolAbortError`\n\n## 5) Operator lifecycle: add/edit/remove and live updates\n\nInteractive mode exposes `/mcp` in `src/modes/controllers/mcp-command-controller.ts`.\n\nSupported operations:\n\n- `add` (wizard or quick-add)\n- `remove` / `rm`\n- `enable` / `disable`\n- `test`\n- `reauth` / `unauth`\n- `reconnect`\n- `reload`\n- `resources`, `prompts`, `notifications`\n- Smithery search/login/logout flows\n\nConfig writes are atomic (`writeMCPConfigFile`: temp file + rename).\n\nAfter changes, controller calls `#reloadMCP()`:\n\n1. `mcpManager.disconnectAll()`\n2. `mcpManager.discoverAndConnect()`\n3. `session.refreshMCPTools(mcpManager.getTools())`\n\n`refreshMCPTools()` replaces all `mcp__` registry entries and immediately re-activates the latest MCP tool set, so changes take effect without restarting the session.\n\n### Mode differences\n\n- **Interactive/TUI mode**: `/mcp` gives in-app UX (wizard, OAuth flow, connection status text, immediate runtime rebinding).\n- **SDK/headless integration**: `discoverAndLoadMCPTools()` (`src/mcp/loader.ts`) returns loaded tools + per-server errors; no `/mcp` command UX.\n\n## 6) User-visible error surfaces\n\nCommon error strings users/operators see:\n\n- add/update validation failures:\n - `Invalid server config: ...`\n - `Server \"<name>\" already exists in <path>`\n- quick-add argument issues:\n - `Use either --url or -- <command...>, not both.`\n - `--token requires --url (HTTP/SSE transport).`\n- connect/test failures:\n - `Failed to connect to \"<name>\": <message>`\n - timeout help text suggests increasing timeout\n - auth help text for `401/403`\n- auth/OAuth flows:\n - `Authentication required ... OAuth endpoints could not be discovered`\n - `OAuth flow timed out. Please try again.`\n - `OAuth authentication failed: ...`\n- disabled server usage:\n - `Server \"<name>\" is disabled. Run /mcp enable <name> first.`\n\nBad source JSON in discovery is generally handled as warnings/logs; config-writer paths throw explicit errors.\n\n## 7) Practical authoring guidance\n\nFor robust MCP authoring in this codebase:\n\n1. Keep server names globally unique across all MCP-capable config sources.\n2. Prefer names that remain distinct after MCP tool-name sanitization to avoid generated `mcp__` collisions.\n3. Use explicit `type` to avoid accidental stdio defaults.\n4. Treat `enabled: false` as hard-off: server is omitted from runtime connect set.\n5. For OAuth configs, store a valid `credentialId`; otherwise auth injection is skipped.\n6. If using command-based secret resolution (`!cmd`), verify command output is stable and non-empty.\n\n## Implementation files\n\n- [`src/mcp/types.ts`](../packages/coding-agent/src/mcp/types.ts)\n- [`src/mcp/config.ts`](../packages/coding-agent/src/mcp/config.ts)\n- [`src/mcp/config-writer.ts`](../packages/coding-agent/src/mcp/config-writer.ts)\n- [`src/mcp/tool-bridge.ts`](../packages/coding-agent/src/mcp/tool-bridge.ts)\n- [`src/discovery/mcp-json.ts`](../packages/coding-agent/src/discovery/mcp-json.ts)\n- [`src/modes/controllers/mcp-command-controller.ts`](../packages/coding-agent/src/modes/controllers/mcp-command-controller.ts)\n- [`src/mcp/manager.ts`](../packages/coding-agent/src/mcp/manager.ts)\n- [`src/capability/index.ts`](../packages/coding-agent/src/capability/index.ts)\n- [`src/config/resolve-config-value.ts`](../packages/coding-agent/src/config/resolve-config-value.ts)\n- [`src/mcp/loader.ts`](../packages/coding-agent/src/mcp/loader.ts)\n",
|
|
30
30
|
"memory.md": "# Autonomous Memory\n\nWhen enabled, the agent automatically extracts durable knowledge from past sessions and injects a compact summary into each new session. Over time it builds a project-scoped memory store — technical decisions, recurring workflows, pitfalls — that carries forward without manual effort.\n\nDisabled by default. Enable via `/settings` or `config.yml`:\n\n```yaml\nmemories:\n enabled: true\n```\n\n## Usage\n\n### What gets injected\n\nAt session start, if a memory summary exists for the current project, it is injected into the system prompt as a **Memory Guidance** block. The agent is instructed to:\n\n- Treat memory as heuristic context — useful for process and prior decisions, not authoritative on current repo state.\n- Cite the memory artifact path when memory changes the plan, and pair it with current-repo evidence before acting.\n- Prefer repo state and user instruction when they conflict with memory; treat conflicting memory as stale.\n\n### Reading memory artifacts\n\nThe agent can read memory files directly using `memory://` URLs with the `read` tool:\n\n| URL | Content |\n| -------------------------------------- | ----------------------------------- |\n| `memory://root` | Compact summary injected at startup |\n| `memory://root/MEMORY.md` | Full long-term memory document |\n| `memory://root/skills/<name>/SKILL.md` | A generated skill playbook |\n\n### `/memory` slash command\n\n| Subcommand | Effect |\n| --------------------- | ---------------------------------------------- |\n| `view` | Show the current memory injection payload |\n| `clear` / `reset` | Delete all memory data and generated artifacts |\n| `enqueue` / `rebuild` | Force consolidation to run at next startup |\n\n## How it works\n\nMemories are built by a background pipeline that runs at startup or when manually triggered via slash command.\n\n**Phase 1 — per-session extraction:** For each past session that has changed since it was last processed, a model reads the session history and extracts durable signal: technical decisions, constraints, resolved failures, recurring workflows. Sessions that are too recent, too old, or currently active are skipped. Each extraction produces a raw memory block and a short synopsis for that session.\n\n**Phase 2 — consolidation:** After extraction, a second model pass reads all per-session extractions and produces three outputs written to disk:\n\n- `MEMORY.md` — a curated long-term memory document\n- `memory_summary.md` — the compact text injected at session start\n- `skills/` — reusable procedural playbooks, each in its own subdirectory\n\nPhase 2 uses a lease to prevent double-running when multiple processes start simultaneously. Stale skill directories from prior runs are pruned automatically.\n\nAll output is scanned for secrets before being written to disk.\n\n### Extraction behavior\n\nMemory extraction and consolidation behavior is driven by static prompt files in `packages/coding-agent/src/prompts/memories/`.\n\n| File | Purpose | Variables |\n| --------------------- | ------------------------------------------- | ------------------------------------------- |\n| `stage_one_system.md` | System prompt for per-session extraction | — |\n| `stage_one_input.md` | User-turn template wrapping session content | `{{thread_id}}`, `{{response_items_json}}` |\n| `consolidation.md` | Prompt for cross-session consolidation | `{{raw_memories}}`, `{{rollout_summaries}}` |\n| `read_path.md` | Memory guidance injected into live sessions | `{{memory_summary}}` |\n\n### Model selection\n\nMemory piggybacks on the model role system.\n\n| Phase | Role | Purpose |\n| ----------------------- | ------------------------------------------------------------------- | -------------------------------- |\n| Phase 1 (extraction) | `default` | Per-session knowledge extraction |\n| Phase 2 (consolidation) | `smol` (falls back to `default`, then current/first registry model) | Cross-session synthesis |\n\nIf the requested memory role is not configured, memory model resolution falls back to the `default` role, then the active session model, then the first model in the registry.\n\n## Configuration\n\n| Setting | Default | Description |\n| ------------------------------------- | ------- | --------------------------------------------------------- |\n| `memories.enabled` | `false` | Master switch |\n| `memories.maxRolloutAgeDays` | `30` | Sessions older than this are not processed |\n| `memories.minRolloutIdleHours` | `12` | Sessions active more recently than this are skipped |\n| `memories.maxRolloutsPerStartup` | `64` | Cap on sessions processed in a single startup |\n| `memories.summaryInjectionTokenLimit` | `5000` | Max tokens of the summary injected into the system prompt |\n\nAdditional tuning knobs (concurrency, lease durations, token budgets) are available in config for advanced use.\n\n## Key files\n\n- `packages/coding-agent/src/memories/index.ts` — pipeline orchestration, injection, slash command handling\n- `packages/coding-agent/src/memories/storage.ts` — SQLite-backed job queue and thread registry\n- `packages/coding-agent/src/prompts/memories/` — memory prompt templates\n- `packages/coding-agent/src/internal-urls/memory-protocol.ts` — `memory://` URL handler\n",
|
|
31
|
-
"models.md": "# Model and Provider Configuration (`models.yml`)\n\nThis document describes how the coding-agent currently loads models, applies overrides, resolves credentials, and chooses models at runtime.\n\n## What controls model behavior\n\nPrimary implementation files:\n\n- `src/config/model-registry.ts` — loads built-in + custom models, provider overrides, runtime discovery, auth integration\n- `src/config/model-resolver.ts` — parses model patterns and selects initial/smol/slow models\n- `src/config/settings-schema.ts` — model-related settings (`modelRoles`, provider transport preferences)\n- `src/session/auth-storage.ts` — API key + OAuth resolution order\n- `packages/ai/src/models.ts` and `packages/ai/src/types.ts` — built-in providers/models and `Model`/`compat` types\n\n## Config file location and legacy behavior\n\nDefault config path:\n\n- `~/.omp/agent/models.yml`\n\nLegacy behavior still present:\n\n- If `models.yml` is missing and `models.json` exists at the same location, it is migrated to `models.yml`.\n- Explicit `.json` / `.jsonc` config paths are still supported when passed programmatically to `ModelRegistry`.\n\n## `models.yml` shape\n\n```yaml\nproviders:\n <provider-id>:\n # provider-level config\nequivalence:\n overrides:\n <provider-id>/<model-id>: <canonical-model-id>\n exclude:\n - <provider-id>/<model-id>\n```\n\n`provider-id` is the canonical provider key used across selection and auth lookup.\n\n`equivalence` is optional and configures canonical model grouping on top of concrete provider models:\n\n- `overrides` maps an exact concrete selector (`provider/modelId`) to an official upstream canonical id\n- `exclude` opts a concrete selector out of canonical grouping\n\n## Provider-level fields\n\n```yaml\nproviders:\n my-provider:\n baseUrl: https://api.example.com/v1\n apiKey: MY_PROVIDER_API_KEY\n api: openai-completions\n headers:\n X-Team: platform\n authHeader: true\n auth: apiKey\n disableStrictTools: false # set true for Anthropic-compatible endpoints that reject the strict field\n discovery:\n type: ollama\n modelOverrides:\n some-model-id:\n name: Renamed model\n models:\n - id: some-model-id\n name: Some Model\n api: openai-completions\n reasoning: false\n input: [text]\n cost:\n input: 0\n output: 0\n cacheRead: 0\n cacheWrite: 0\n contextWindow: 128000\n maxTokens: 16384\n headers:\n X-Model: value\n compat:\n supportsStore: true\n supportsDeveloperRole: true\n supportsReasoningEffort: true\n maxTokensField: max_completion_tokens\n openRouterRouting:\n only: [anthropic]\n vercelGatewayRouting:\n order: [anthropic, openai]\n extraBody:\n gateway: m1-01\n controller: mlx\n```\n\n### Allowed provider/model `api` values\n\n- `openai-completions`\n- `openai-responses`\n- `openai-codex-responses`\n- `azure-openai-responses`\n- `anthropic-messages`\n- `google-generative-ai`\n- `google-vertex`\n\n### Allowed auth/discovery values\n\n- `auth`: `apiKey` (default), `none`, or `oauth`; for `models.yml` custom models, `oauth` is accepted by schema but does not waive the `apiKey` requirement\n- `discovery.type`: `ollama`, `llama.cpp`, or `lm-studio`\n\n## Validation rules (current)\n\n### Full custom provider (`models` is non-empty)\n\nRequired:\n\n- `baseUrl`\n- `apiKey` unless `auth: none`\n- `api` at provider level or each model\n\n### Override-only provider (`models` missing or empty)\n\nMust define at least one of:\n\n- `baseUrl`\n- `headers`\n- `compat`\n- `disableStrictTools`\n- `modelOverrides`\n- `discovery`\n\n### Discovery\n\n- `discovery` requires provider-level `api`.\n\n### Model value checks\n\n- `id` required\n- `contextWindow` and `maxTokens` must be positive if provided\n\n## Merge and override order\n\nModelRegistry pipeline (on refresh):\n\n1. Load built-in providers/models from `@oh-my-pi/pi-ai`.\n2. Load `models.yml` custom config.\n3. Apply provider overrides (`baseUrl`, `headers`, `disableStrictTools`) to built-in models.\n4. Apply `modelOverrides` (per provider + model id).\n5. Merge custom `models`:\n - same `provider + id` replaces existing\n - otherwise append\n6. Load cached/runtime-discovered models (Ollama, llama.cpp, LM Studio, plus built-in provider managers), then re-apply model overrides.\n\n### Provider-model cache and static fingerprint\n\nCached per-provider model lists are persisted in the model-cache SQLite\ndatabase (schema v3) with a `static_fingerprint` column that hashes the\nstatic catalog slice merged into the row. When `resolveProviderModels`\nskips the network fetch and the fingerprint of the in-memory static\ncatalog matches the cached one, the cached rows are returned verbatim —\nthe static + dynamic merge is bypassed entirely. The fingerprint is\nmemoized per process via a WeakMap keyed by the static-models array\nreference, so repeated cold-start calls do not re-hash.\n\n## Canonical model equivalence and coalescing\n\nThe registry keeps every concrete provider model and then builds a canonical layer above them.\n\nCanonical ids are official upstream ids only, for example:\n\n- `claude-opus-4-6`\n- `claude-haiku-4-5`\n- `gpt-5.3-codex`\n\n### `models.yml` equivalence config\n\nExample:\n\n```yaml\nproviders:\n zenmux:\n baseUrl: https://api.zenmux.example/v1\n apiKey: ZENMUX_API_KEY\n api: openai-codex-responses\n models:\n - id: codex\n name: Zenmux Codex\n reasoning: true\n input: [text]\n cost:\n input: 0\n output: 0\n cacheRead: 0\n cacheWrite: 0\n contextWindow: 200000\n maxTokens: 32768\n\nequivalence:\n overrides:\n zenmux/codex: gpt-5.3-codex\n p-codex/codex: gpt-5.3-codex\n exclude:\n - demo/codex-preview\n```\n\nBuild order for canonical grouping:\n\n1. exact user override from `equivalence.overrides`\n2. bundled official-id matches from built-in model metadata\n3. conservative heuristic normalization for gateway/provider variants\n4. fallback to the concrete model's own id\n\nCurrent heuristics are intentionally narrow:\n\n- embedded upstream prefixes can be stripped when present, for example `anthropic/...` or `openai/...`\n- dotted and dashed version variants can normalize only when they map to an existing official id, for example `4.6 -> 4-6`\n- ambiguous families or versions are not merged without a bundled match or explicit override\n\n### Canonical resolution behavior\n\nWhen multiple concrete variants share a canonical id, resolution uses:\n\n1. availability and auth\n2. `config.yml` `modelProviderOrder`\n3. existing registry/provider order if `modelProviderOrder` is unset\n\nDisabled or unauthenticated providers are skipped.\n\nSession state and transcripts continue to record the concrete provider/model that actually executed the turn.\n\nProvider defaults vs per-model overrides:\n\n- Provider `headers` are baseline.\n- Model `headers` override provider header keys.\n- `modelOverrides` can override model metadata (`name`, `reasoning`, `input`, `cost`, `contextWindow`, `maxTokens`, `headers`, `compat`, `contextPromotionTarget`).\n- `compat` is deep-merged for nested routing blocks (`openRouterRouting`, `vercelGatewayRouting`, `extraBody`).\n\n## Runtime discovery integration\n\n### Implicit Ollama discovery\n\nIf `ollama` is not explicitly configured, registry adds an implicit discoverable provider:\n\n- provider: `ollama`\n- api: `openai-responses`\n- base URL: `OLLAMA_BASE_URL` or `http://127.0.0.1:11434`\n- auth mode: keyless (`auth: none` behavior)\n\nRuntime discovery calls Ollama endpoints and normalizes discovered OpenAI-compatible models to `openai-responses`.\n\n### Implicit llama.cpp discovery\n\nIf `llama.cpp` is not explicitly configured, registry adds an implicit discoverable provider:\n\n- provider: `llama.cpp`\n- api: `openai-responses`\n- base URL: `LLAMA_CPP_BASE_URL` or `http://127.0.0.1:8080`\n- auth mode: keyless (`auth: none` behavior)\n\nRuntime discovery calls llama.cpp model endpoints and synthesizes model entries with local defaults.\n\n### Implicit LM Studio discovery\n\nIf `lm-studio` is not explicitly configured, registry adds an implicit discoverable provider:\n\n- provider: `lm-studio`\n- api: `openai-completions`\n- base URL: `LM_STUDIO_BASE_URL` or `http://127.0.0.1:1234/v1`\n- auth mode: keyless (`auth: none` behavior)\n\nRuntime discovery fetches models (`GET /models`) and synthesizes model entries with local defaults.\n\n### Explicit provider discovery\n\nYou can configure discovery yourself:\n\n```yaml\nproviders:\n ollama:\n baseUrl: http://127.0.0.1:11434\n api: openai-responses\n auth: none\n discovery:\n type: ollama\n\n llama.cpp:\n baseUrl: http://127.0.0.1:8080\n api: openai-responses\n auth: none\n discovery:\n type: llama.cpp\n```\n\n### Extension provider registration\n\nExtensions can register providers at runtime (`pi.registerProvider(...)`), including:\n\n- model replacement/append for a provider\n- custom stream handler registration for new API IDs\n- custom OAuth provider registration\n\n## Auth and API key resolution order\n\nWhen requesting a key for a provider, effective order is:\n\n1. Runtime override (CLI `--api-key`)\n2. Stored API key credential in `agent.db`\n3. Stored OAuth credential in `agent.db` (with refresh)\n4. Environment variable mapping (`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, etc.)\n5. ModelRegistry fallback resolver (provider `apiKey` from `models.yml`, env-name-or-literal semantics)\n\n`models.yml` `apiKey` behavior:\n\n- Value is first treated as an environment variable name.\n- If no env var exists, the literal string is used as the token.\n\nIf `authHeader: true` and provider `apiKey` is set, models get:\n\n- `Authorization: Bearer <resolved-key>` header injected.\n\nKeyless providers:\n\n- Providers marked `auth: none` are treated as available without credentials.\n- `getApiKey*` returns `kNoAuth` for them.\n\n### Broker mode\n\nWhen `OMP_AUTH_BROKER_URL` (or `auth.broker.url`) is set, the local SQLite credential store is replaced by `RemoteAuthCredentialStore`. Layers 2 and 3 above (stored API key / OAuth in `agent.db`) are served from a broker-supplied snapshot whose `refresh` tokens are redacted; expiry triggers `POST /v1/credential/:id/refresh` on the broker rather than a local refresh.\n\n`AuthStorage.setConfigApiKey` lets a `models.yml` `apiKey` win over a broker-resolved OAuth token without overriding a runtime `--api-key`. See [`auth-broker-gateway.md`](./auth-broker-gateway.md) for the full broker / gateway design and env surface (`OMP_AUTH_BROKER_URL`, `OMP_AUTH_BROKER_TOKEN`, `auth.broker.url`, `auth.broker.token`).\n\n## Model availability vs all models\n\n- `getAll()` returns the loaded model registry (built-in + merged custom + discovered).\n- `getAvailable()` filters to models that are keyless or have resolvable auth.\n\nSo a model can exist in registry but not be selectable until auth is available.\n\n## Runtime model resolution\n\n### CLI and pattern parsing\n\n`model-resolver.ts` supports:\n\n- exact `provider/modelId`\n- exact canonical model id\n- exact model id (provider inferred)\n- fuzzy/substring matching\n- glob scope patterns in `--models` (e.g. `openai/*`, `*sonnet*`)\n- optional `:thinkingLevel` suffix (`off|minimal|low|medium|high|xhigh`)\n\n`--provider` is legacy; `--model` is preferred.\n\nResolution precedence for exact selectors:\n\n1. exact `provider/modelId` bypasses coalescing\n2. exact canonical id resolves through the canonical index\n3. exact bare concrete id still works\n4. fuzzy and glob matching run after the exact paths\n\n### Initial model selection priority\n\n`findInitialModel(...)` uses this order:\n\n1. explicit CLI provider+model\n2. first scoped model (if not resuming)\n3. saved default provider/model\n4. known provider defaults (e.g. OpenAI/Anthropic/etc.) among available models\n5. first available model\n\n### Role aliases and settings\n\nSupported model roles:\n\n- `default`, `smol`, `slow`, `vision`, `plan`, `designer`, `commit`, `task`\n\nRole aliases like `pi/smol` expand through `settings.modelRoles`. Each role value can also append a thinking selector such as `:minimal`, `:low`, `:medium`, or `:high`.\n\nIf a role points at another role, the target model still inherits normally and any explicit suffix on the referring role wins for that role-specific use.\n\nRelated settings:\n\n- `modelRoles` (record)\n- `enabledModels` (scoped pattern list)\n- `modelProviderOrder` (global canonical-provider precedence)\n- `providers.kimiApiFormat` (`openai` or `anthropic` request format)\n- `providers.openaiWebsockets` (`auto|off|on` websocket preference for OpenAI Codex transport)\n\n`modelRoles` may store either:\n\n- `provider/modelId` to pin a concrete provider variant\n- a canonical id such as `gpt-5.3-codex` to allow provider coalescing\n\nFor `enabledModels` and CLI `--models`:\n\n- exact canonical ids expand to all concrete variants in that canonical group\n- explicit `provider/modelId` entries stay exact\n- globs and fuzzy matches still operate on concrete models\n\nGlobal `enabledModels` and `disabledProviders` entries may also be scoped to a path prefix:\n\n```yaml\nenabledModels:\n - claude-sonnet-4-5\n - path: ~/work\n models:\n - anthropic/claude-opus-4-5\ndisabledProviders:\n - ollama\n - path: ~/private\n providers:\n - anthropic\n```\n\nString entries apply everywhere. Scoped entries apply when the current working directory is the configured path or one of its subdirectories. Use `path`, `paths`, `pathPrefix`, or `pathPrefixes`; use `models` for `enabledModels`, `providers` for `disabledProviders`, or `values` for either.\n\n## `/model` and `--list-models`\n\nBoth surfaces keep provider-prefixed models visible and selectable.\n\nThey now also expose canonical/coalesced models:\n\n- `/model` includes a canonical view alongside provider tabs\n- `--list-models` prints a canonical section plus the concrete provider rows\n\nSelecting a canonical entry stores the canonical selector. Selecting a provider row stores the explicit `provider/modelId`.\n\n## Context promotion (model-level fallback chains)\n\nContext promotion is an overflow recovery mechanism for small-context variants (for example `*-spark`) that automatically promotes to a larger-context sibling when the API rejects a request with a context length error.\n\n### Trigger and order\n\nWhen a turn fails with a context overflow error (e.g. `context_length_exceeded`), `AgentSession` attempts promotion **before** falling back to compaction:\n\n1. If `contextPromotion.enabled` is true, resolve a promotion target (see below).\n2. If a target is found, switch to it and retry the request — no compaction needed.\n3. If no target is available, fall through to auto-compaction on the current model.\n\n### Target selection\n\nSelection is model-driven, not role-driven:\n\n1. `currentModel.contextPromotionTarget` (if configured)\n2. smallest larger-context model on the same provider + API\n\nCandidates are ignored unless credentials resolve (`ModelRegistry.getApiKey(...)`).\n\n### OpenAI Codex websocket handoff\n\nIf switching from/to `openai-codex-responses`, session provider state key `openai-codex-responses` is closed before model switch. This drops websocket transport state so the next turn starts clean on the promoted model.\n\n### Persistence behavior\n\nPromotion uses temporary switching (`setModelTemporary`):\n\n- recorded as a temporary `model_change` in session history\n- does not rewrite saved role mapping\n\n### Configuring explicit fallback chains\n\nConfigure fallback directly in model metadata via `contextPromotionTarget`.\n\n`contextPromotionTarget` accepts either:\n\n- `provider/model-id` (explicit)\n- `model-id` (resolved within current provider)\n\nExample (`models.yml`) for Spark -> non-Spark on the same provider:\n\n```yaml\nproviders:\n openai-codex:\n modelOverrides:\n gpt-5.3-codex-spark:\n contextPromotionTarget: openai-codex/gpt-5.3-codex\n```\n\nThe built-in model generator also assigns this automatically for `*-spark` models when a same-provider base model exists.\n\n## Compatibility and routing fields\n\nThe `compat` block on a provider or model overrides the URL-based auto-detection in `packages/ai/src/providers/openai-completions-compat.ts`. It is validated by `OpenAICompatSchema` in `packages/coding-agent/src/config/model-registry.ts` and consumed by every `openai-completions` transport (`packages/ai/src/providers/openai-completions.ts`). The canonical type is `OpenAICompat` in `packages/ai/src/types.ts`.\n\n`models.yml` accepts the following keys (all optional; unset falls back to URL detection):\n\nRequest shaping:\n\n- `supportsStore` — emit `store: false` on requests. Default: auto (off for non-standard endpoints).\n- `supportsDeveloperRole` — use the `developer` system role for reasoning models instead of `system`. Default: auto.\n- `supportsUsageInStreaming` — send `stream_options: { include_usage: true }` to receive token usage on streaming responses. Default: `true`.\n- `maxTokensField` — `\"max_completion_tokens\"` or `\"max_tokens\"`. Default: auto.\n- `supportsToolChoice` — emit the `tool_choice` parameter when the caller forces a specific tool. Default: `true`. Set `false` for endpoints that 400 on `tool_choice` (e.g. DeepSeek when reasoning is on).\n- `disableReasoningOnForcedToolChoice` — drop `reasoning_effort` / OpenRouter `reasoning` whenever `tool_choice` forces a call. Default: auto (Kimi/Anthropic-fronted endpoints).\n- `extraBody` — extra top-level fields merged into every request body (gateway hints, controller selectors, etc.).\n\nReasoning / thinking:\n\n- `supportsReasoningEffort` — accept `reasoning_effort`. Default: auto (off for Grok and zAI).\n- `reasoningEffortMap` — partial map from internal effort levels (`minimal|low|medium|high|xhigh`) to provider-specific strings (e.g. DeepSeek maps `xhigh -> \"max\"`).\n- `thinkingFormat` — request shape for thinking: `\"openai\"` (`reasoning_effort`), `\"openrouter\"` (`reasoning: { effort }`), `\"zai\"` (`thinking: { type: \"enabled\" }`), `\"qwen\"` (top-level `enable_thinking`), or `\"qwen-chat-template\"` (`chat_template_kwargs.enable_thinking`). Default: `\"openai\"`.\n- `reasoningContentField` — assistant field carrying chain-of-thought: `\"reasoning_content\"`, `\"reasoning\"`, or `\"reasoning_text\"`. Default: auto.\n- `requiresReasoningContentForToolCalls` — assistant tool-call turns must round-trip the reasoning field (DeepSeek-R1, Kimi, OpenRouter when reasoning is on). Default: `false`.\n- `requiresAssistantContentForToolCalls` — assistant tool-call turns must include non-empty text content (Kimi). Default: `false`.\n\nTool / message normalization:\n\n- `requiresToolResultName` — tool-result messages need a `name` field (Mistral). Default: auto.\n- `requiresAssistantAfterToolResult` — a user message after a tool result needs an assistant turn in between. Default: auto.\n- `requiresThinkingAsText` — convert thinking blocks to text wrapped in `<thinking>` delimiters (Mistral). Default: auto.\n- `requiresMistralToolIds` — normalize tool-call ids to exactly 9 alphanumeric chars. Default: auto.\n- `supportsStrictMode` — accept the per-tool `strict` field on tool schemas. Default: conservative auto-detect per provider/baseUrl.\n- `toolStrictMode` — `\"all_strict\"` forces strict on every tool, `\"none\"` forces it off; unset keeps the existing per-tool mixed behavior.\n\nGateway routing (only applied when `baseUrl` matches the gateway):\n\n- `openRouterRouting.only` / `openRouterRouting.order` — provider routing on `openrouter.ai` (see <https://openrouter.ai/docs/provider-routing>).\n- `vercelGatewayRouting.only` / `vercelGatewayRouting.order` — provider routing on `ai-gateway.vercel.sh` (see <https://vercel.com/docs/ai-gateway/models-and-providers/provider-options>).\n\nProvider-level `compat` is the baseline; per-model `compat` is deep-merged on top, with `openRouterRouting`, `vercelGatewayRouting`, and `extraBody` merged as nested objects.\n\n### Anthropic compatibility (`anthropic-messages`)\n\nFor `anthropic-messages` models the runtime uses a separate `AnthropicCompat` shape (`packages/ai/src/types.ts`). The `models.yml` schema currently exposes only the strict-tools opt-out as a top-level provider field (see below); the remaining Anthropic-side knobs (`disableAdaptiveThinking`, `supportsEagerToolInputStreaming`, `supportsLongCacheRetention`) are set by built-in catalog metadata and are not user-configurable from `models.yml`.\n\n### Strict tool schemas (`disableStrictTools`)\n\nAnthropic's API supports a `strict` field on tool definitions that forces the model to always follow the provided schema exactly. This is enabled by default for all `anthropic-messages` providers because it guarantees schema conformance in agentic systems.\n\nThird-party providers that front the Anthropic API (AWS Bedrock, Azure, self-hosted proxies) do not always implement this field and will reject requests that include it. Set `disableStrictTools: true` at the provider level to opt out:\n\n```yaml\nproviders:\n bedrock-anthropic:\n baseUrl: https://bedrock-runtime.us-east-1.amazonaws.com/anthropic\n apiKey: AWS_BEARER_TOKEN\n api: anthropic-messages\n disableStrictTools: true\n models:\n - id: claude-sonnet-4-20250514\n name: Claude Sonnet 4 (Bedrock)\n input: [text, image]\n contextWindow: 200000\n maxTokens: 16384\n cost:\n input: 3.00\n output: 15.00\n cacheRead: 0.30\n cacheWrite: 3.75\n```\n\n`disableStrictTools` is a provider-level flag that applies to all models in the provider.\n\nTool schemas going on the wire are normalized by the unified flow in\n`packages/ai/src/utils/schema/normalize.ts` (Google/CCA/MCP dispatchers\nplus the OpenAI strict-mode sanitize+enforce pipeline). See\n[`ai-schema-normalize.md`](./ai-schema-normalize.md) for the strict-mode\nedge cases (local `$ref` inlining, single-item `allOf` collapse,\n`anyOf`-wrapper description hoist, enum/const primitive-type inference)\nand the per-provider dispatcher mapping.\n## Practical examples\n\n### Local OpenAI-compatible endpoint (no auth)\n\n```yaml\nproviders:\n local-openai:\n baseUrl: http://127.0.0.1:8000/v1\n auth: none\n api: openai-completions\n models:\n - id: Qwen/Qwen2.5-Coder-32B-Instruct\n name: Qwen 2.5 Coder 32B (local)\n```\n\n### Hosted proxy with env-based key\n\n```yaml\nproviders:\n anthropic-proxy:\n baseUrl: https://proxy.example.com/anthropic\n apiKey: ANTHROPIC_PROXY_API_KEY\n api: anthropic-messages\n authHeader: true\n disableStrictTools: true # if the proxy doesn't support strict tool schemas\n models:\n - id: claude-sonnet-4-20250514\n name: Claude Sonnet 4 (Proxy)\n reasoning: true\n input: [text, image]\n```\n\n### Override built-in provider route + model metadata\n\n```yaml\nproviders:\n openrouter:\n baseUrl: https://my-proxy.example.com/v1\n headers:\n X-Team: platform\n modelOverrides:\n anthropic/claude-sonnet-4:\n name: Sonnet 4 (Corp)\n compat:\n openRouterRouting:\n only: [anthropic]\n```\n\n## Legacy consumer caveat\n\nMost model configuration now flows through `models.yml` via `ModelRegistry`. Explicit `.json` / `.jsonc` paths remain supported only when passed programmatically to `ModelRegistry`; the default user config is `~/.omp/agent/models.yml`.\n\n## Failure mode\n\nIf `models.yml` fails schema or validation checks:\n\n- registry keeps operating with built-in models\n- error is exposed via `ModelRegistry.getError()` and surfaced in UI/notifications\n",
|
|
31
|
+
"models.md": "# Model and Provider Configuration (`models.yml`)\n\nThis document describes how the coding-agent currently loads models, applies overrides, resolves credentials, and chooses models at runtime.\n\n## What controls model behavior\n\nPrimary implementation files:\n\n- `src/config/model-registry.ts` — loads built-in + custom models, provider overrides, runtime discovery, auth integration\n- `src/config/model-resolver.ts` — parses model patterns and selects initial/smol/slow models\n- `src/config/settings-schema.ts` — model-related settings (`modelRoles`, provider transport preferences)\n- `src/session/auth-storage.ts` — API key + OAuth resolution order\n- `packages/ai/src/models.ts` and `packages/ai/src/types.ts` — built-in providers/models and `Model`/`compat` types\n\n## Config file location and legacy behavior\n\nDefault config path:\n\n- `~/.omp/agent/models.yml`\n\nLegacy behavior still present:\n\n- If `models.yml` is missing and `models.json` exists at the same location, it is migrated to `models.yml`.\n- Explicit `.json` / `.jsonc` config paths are still supported when passed programmatically to `ModelRegistry`.\n\n## `models.yml` shape\n\n```yaml\nproviders:\n <provider-id>:\n # provider-level config\nequivalence:\n overrides:\n <provider-id>/<model-id>: <canonical-model-id>\n exclude:\n - <provider-id>/<model-id>\n```\n\n`provider-id` is the canonical provider key used across selection and auth lookup.\n\n`equivalence` is optional and configures canonical model grouping on top of concrete provider models:\n\n- `overrides` maps an exact concrete selector (`provider/modelId`) to an official upstream canonical id\n- `exclude` opts a concrete selector out of canonical grouping\n\n## Provider-level fields\n\n```yaml\nproviders:\n my-provider:\n baseUrl: https://api.example.com/v1\n apiKey: MY_PROVIDER_API_KEY\n api: openai-completions\n headers:\n X-Team: platform\n authHeader: true\n auth: apiKey\n disableStrictTools: false # set true for Anthropic-compatible endpoints that reject the strict field\n discovery:\n type: ollama\n modelOverrides:\n some-model-id:\n name: Renamed model\n models:\n - id: some-model-id\n name: Some Model\n api: openai-completions\n reasoning: false\n input: [text]\n cost:\n input: 0\n output: 0\n cacheRead: 0\n cacheWrite: 0\n contextWindow: 128000\n maxTokens: 16384\n headers:\n X-Model: value\n compat:\n supportsStore: true\n supportsDeveloperRole: true\n supportsReasoningEffort: true\n maxTokensField: max_completion_tokens\n openRouterRouting:\n only: [anthropic]\n vercelGatewayRouting:\n order: [anthropic, openai]\n extraBody:\n gateway: m1-01\n controller: mlx\n```\n\n### Allowed provider/model `api` values\n\n- `openai-completions`\n- `openai-responses`\n- `openai-codex-responses`\n- `azure-openai-responses`\n- `anthropic-messages`\n- `google-generative-ai`\n- `google-vertex`\n\n### Allowed auth/discovery values\n\n- `auth`: `apiKey` (default), `none`, or `oauth`; for `models.yml` custom models, `oauth` is accepted by schema but does not waive the `apiKey` requirement\n- `discovery.type`: `ollama`, `llama.cpp`, `lm-studio`, `openai-models-list`, or `proxy`\n\n## Validation rules (current)\n\n### Full custom provider (`models` is non-empty)\n\nRequired:\n\n- `baseUrl`\n- `apiKey` unless `auth: none`\n- `api` at provider level or each model\n\n### Override-only provider (`models` missing or empty)\n\nMust define at least one of:\n\n- `baseUrl`\n- `headers`\n- `compat`\n- `disableStrictTools`\n- `modelOverrides`\n- `discovery`\n\n### Discovery\n\n- `discovery` requires provider-level `api`.\n\n### Model value checks\n\n- `id` required\n- `contextWindow` and `maxTokens` must be positive if provided\n\n## Merge and override order\n\nModelRegistry pipeline (on refresh):\n\n1. Load built-in providers/models from `@oh-my-pi/pi-ai`.\n2. Load `models.yml` custom config.\n3. Apply provider overrides (`baseUrl`, `headers`, `disableStrictTools`) to built-in models.\n4. Apply `modelOverrides` (per provider + model id).\n5. Merge custom `models`:\n - same `provider + id` replaces existing\n - otherwise append\n6. Load cached/runtime-discovered models (Ollama, llama.cpp, LM Studio, plus built-in provider managers), then re-apply model overrides.\n\n### Provider-model cache and static fingerprint\n\nCached per-provider model lists are persisted in the model-cache SQLite\ndatabase (schema v3) with a `static_fingerprint` column that hashes the\nstatic catalog slice merged into the row. When `resolveProviderModels`\nskips the network fetch and the fingerprint of the in-memory static\ncatalog matches the cached one, the cached rows are returned verbatim —\nthe static + dynamic merge is bypassed entirely. The fingerprint is\nmemoized per process via a WeakMap keyed by the static-models array\nreference, so repeated cold-start calls do not re-hash.\n\n## Canonical model equivalence and coalescing\n\nThe registry keeps every concrete provider model and then builds a canonical layer above them.\n\nCanonical ids are official upstream ids only, for example:\n\n- `claude-opus-4-6`\n- `claude-haiku-4-5`\n- `gpt-5.3-codex`\n\n### `models.yml` equivalence config\n\nExample:\n\n```yaml\nproviders:\n zenmux:\n baseUrl: https://api.zenmux.example/v1\n apiKey: ZENMUX_API_KEY\n api: openai-codex-responses\n models:\n - id: codex\n name: Zenmux Codex\n reasoning: true\n input: [text]\n cost:\n input: 0\n output: 0\n cacheRead: 0\n cacheWrite: 0\n contextWindow: 200000\n maxTokens: 32768\n\nequivalence:\n overrides:\n zenmux/codex: gpt-5.3-codex\n p-codex/codex: gpt-5.3-codex\n exclude:\n - demo/codex-preview\n```\n\nBuild order for canonical grouping:\n\n1. exact user override from `equivalence.overrides`\n2. bundled official-id matches from built-in model metadata\n3. conservative heuristic normalization for gateway/provider variants\n4. fallback to the concrete model's own id\n\nCurrent heuristics are intentionally narrow:\n\n- embedded upstream prefixes can be stripped when present, for example `anthropic/...` or `openai/...`\n- dotted and dashed version variants can normalize only when they map to an existing official id, for example `4.6 -> 4-6`\n- ambiguous families or versions are not merged without a bundled match or explicit override\n\n### Canonical resolution behavior\n\nWhen multiple concrete variants share a canonical id, resolution uses:\n\n1. availability and auth\n2. `config.yml` `modelProviderOrder`\n3. existing registry/provider order if `modelProviderOrder` is unset\n\nDisabled or unauthenticated providers are skipped.\n\nSession state and transcripts continue to record the concrete provider/model that actually executed the turn.\n\nProvider defaults vs per-model overrides:\n\n- Provider `headers` are baseline.\n- Model `headers` override provider header keys.\n- `modelOverrides` can override model metadata (`name`, `reasoning`, `input`, `cost`, `contextWindow`, `maxTokens`, `headers`, `compat`, `contextPromotionTarget`).\n- `compat` is deep-merged for nested routing blocks (`openRouterRouting`, `vercelGatewayRouting`, `extraBody`).\n\n## Runtime discovery integration\n\n### Implicit Ollama discovery\n\nIf `ollama` is not explicitly configured, registry adds an implicit discoverable provider:\n\n- provider: `ollama`\n- api: `openai-responses`\n- base URL: `OLLAMA_BASE_URL` or `http://127.0.0.1:11434`\n- auth mode: keyless (`auth: none` behavior)\n\nRuntime discovery calls Ollama endpoints and normalizes discovered OpenAI-compatible models to `openai-responses`.\n\n### Implicit llama.cpp discovery\n\nIf `llama.cpp` is not explicitly configured, registry adds an implicit discoverable provider:\n\n- provider: `llama.cpp`\n- api: `openai-responses`\n- base URL: `LLAMA_CPP_BASE_URL` or `http://127.0.0.1:8080`\n- auth mode: keyless (`auth: none` behavior)\n\nRuntime discovery calls llama.cpp model endpoints and synthesizes model entries with local defaults.\n\n### Implicit LM Studio discovery\n\nIf `lm-studio` is not explicitly configured, registry adds an implicit discoverable provider:\n\n- provider: `lm-studio`\n- api: `openai-completions`\n- base URL: `LM_STUDIO_BASE_URL` or `http://127.0.0.1:1234/v1`\n- auth mode: keyless (`auth: none` behavior)\n\nRuntime discovery fetches models (`GET /models`) and synthesizes model entries with local defaults.\n\n### Explicit provider discovery\n\nYou can configure discovery yourself:\n\n```yaml\nproviders:\n ollama:\n baseUrl: http://127.0.0.1:11434\n api: openai-responses\n auth: none\n discovery:\n type: ollama\n\n llama.cpp:\n baseUrl: http://127.0.0.1:8080\n api: openai-responses\n auth: none\n discovery:\n type: llama.cpp\n```\n\n### Proxy discovery (`discovery.type: proxy`)\n\nFor Anthropic+OpenAI-compatible proxies (new-api / one-api / similar)\nthat expose both `/v1/messages` and `/v1/chat/completions` behind the same\nhost. Discovery hits `GET /v1/models` (10s timeout, OpenAI-style payload) and\nderives each model's `api` from the entry's `supported_endpoint_types`:\n\n- contains `\"anthropic\"` -> `api: anthropic-messages` (routes via `/v1/messages`)\n- contains `\"openai\"` -> `api: openai-completions` (routes via `/v1/chat/completions`)\n- otherwise -> falls back to provider-level `api` if set, else dropped\n\nProvider-level `api` is **optional** with `discovery.type: proxy` because the\nper-model wire is auto-detected. The Anthropic SDK strips a trailing `/v1`\nfrom `baseUrl` before appending `/v1/messages`, so a single discovery `baseUrl`\n(ending in `/v1`) round-trips correctly to both wires.\n\n```yaml\nproviders:\n newapi-reseller:\n baseUrl: https://api.example.com/v1\n apiKey: xxxx\n authHeader: true # injects Authorization: Bearer for openai models\n disableStrictTools: true # most anthropic-fronted proxies reject `strict`\n discovery:\n type: proxy\n```\n\n### Extension provider registration\n\nExtensions can register providers at runtime (`pi.registerProvider(...)`), including:\n\n- model replacement/append for a provider\n- custom stream handler registration for new API IDs\n- custom OAuth provider registration\n\n## Auth and API key resolution order\n\nWhen requesting a key for a provider, effective order is:\n\n1. Runtime override (CLI `--api-key`)\n2. Stored API key credential in `agent.db`\n3. Stored OAuth credential in `agent.db` (with refresh)\n4. Environment variable mapping (`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, etc.)\n5. ModelRegistry fallback resolver (provider `apiKey` from `models.yml`, env-name-or-literal semantics)\n\n`models.yml` `apiKey` behavior:\n\n- Value is first treated as an environment variable name.\n- If no env var exists, the literal string is used as the token.\n\nIf `authHeader: true` and provider `apiKey` is set, models get:\n\n- `Authorization: Bearer <resolved-key>` header injected.\n\nKeyless providers:\n\n- Providers marked `auth: none` are treated as available without credentials.\n- `getApiKey*` returns `kNoAuth` for them.\n\n### Broker mode\n\nWhen `OMP_AUTH_BROKER_URL` (or `auth.broker.url`) is set, the local SQLite credential store is replaced by `RemoteAuthCredentialStore`. Layers 2 and 3 above (stored API key / OAuth in `agent.db`) are served from a broker-supplied snapshot whose `refresh` tokens are redacted; expiry triggers `POST /v1/credential/:id/refresh` on the broker rather than a local refresh.\n\n`AuthStorage.setConfigApiKey` lets a `models.yml` `apiKey` win over a broker-resolved OAuth token without overriding a runtime `--api-key`. See [`auth-broker-gateway.md`](./auth-broker-gateway.md) for the full broker / gateway design and env surface (`OMP_AUTH_BROKER_URL`, `OMP_AUTH_BROKER_TOKEN`, `auth.broker.url`, `auth.broker.token`).\n\n## Model availability vs all models\n\n- `getAll()` returns the loaded model registry (built-in + merged custom + discovered).\n- `getAvailable()` filters to models that are keyless or have resolvable auth.\n\nSo a model can exist in registry but not be selectable until auth is available.\n\n## Runtime model resolution\n\n### CLI and pattern parsing\n\n`model-resolver.ts` supports:\n\n- exact `provider/modelId`\n- exact canonical model id\n- exact model id (provider inferred)\n- fuzzy/substring matching\n- glob scope patterns in `--models` (e.g. `openai/*`, `*sonnet*`)\n- optional `:thinkingLevel` suffix (`off|minimal|low|medium|high|xhigh`)\n\n`--provider` is legacy; `--model` is preferred.\n\nResolution precedence for exact selectors:\n\n1. exact `provider/modelId` bypasses coalescing\n2. exact canonical id resolves through the canonical index\n3. exact bare concrete id still works\n4. fuzzy and glob matching run after the exact paths\n\n### Initial model selection priority\n\n`findInitialModel(...)` uses this order:\n\n1. explicit CLI provider+model\n2. first scoped model (if not resuming)\n3. saved default provider/model\n4. known provider defaults (e.g. OpenAI/Anthropic/etc.) among available models\n5. first available model\n\n### Role aliases and settings\n\nSupported model roles:\n\n- `default`, `smol`, `slow`, `vision`, `plan`, `designer`, `commit`, `task`\n\nRole aliases like `pi/smol` expand through `settings.modelRoles`. Each role value can also append a thinking selector such as `:minimal`, `:low`, `:medium`, or `:high`.\n\nIf a role points at another role, the target model still inherits normally and any explicit suffix on the referring role wins for that role-specific use.\n\nRelated settings:\n\n- `modelRoles` (record)\n- `enabledModels` (scoped pattern list)\n- `modelProviderOrder` (global canonical-provider precedence)\n- `providers.kimiApiFormat` (`openai` or `anthropic` request format)\n- `providers.openaiWebsockets` (`auto|off|on` websocket preference for OpenAI Codex transport)\n\n`modelRoles` may store either:\n\n- `provider/modelId` to pin a concrete provider variant\n- a canonical id such as `gpt-5.3-codex` to allow provider coalescing\n\nFor `enabledModels` and CLI `--models`:\n\n- exact canonical ids expand to all concrete variants in that canonical group\n- explicit `provider/modelId` entries stay exact\n- globs and fuzzy matches still operate on concrete models\n\nGlobal `enabledModels` and `disabledProviders` entries may also be scoped to a path prefix:\n\n```yaml\nenabledModels:\n - claude-sonnet-4-5\n - path: ~/work\n models:\n - anthropic/claude-opus-4-5\ndisabledProviders:\n - ollama\n - path: ~/private\n providers:\n - anthropic\n```\n\nString entries apply everywhere. Scoped entries apply when the current working directory is the configured path or one of its subdirectories. Use `path`, `paths`, `pathPrefix`, or `pathPrefixes`; use `models` for `enabledModels`, `providers` for `disabledProviders`, or `values` for either.\n\n## `/model` and `--list-models`\n\nBoth surfaces keep provider-prefixed models visible and selectable.\n\nThey now also expose canonical/coalesced models:\n\n- `/model` includes a canonical view alongside provider tabs\n- `--list-models` prints a canonical section plus the concrete provider rows\n\nSelecting a canonical entry stores the canonical selector. Selecting a provider row stores the explicit `provider/modelId`.\n\n## Context promotion (model-level fallback chains)\n\nContext promotion is an overflow recovery mechanism for small-context variants (for example `*-spark`) that automatically promotes to a larger-context sibling when the API rejects a request with a context length error.\n\n### Trigger and order\n\nWhen a turn fails with a context overflow error (e.g. `context_length_exceeded`), `AgentSession` attempts promotion **before** falling back to compaction:\n\n1. If `contextPromotion.enabled` is true, resolve a promotion target (see below).\n2. If a target is found, switch to it and retry the request — no compaction needed.\n3. If no target is available, fall through to auto-compaction on the current model.\n\n### Target selection\n\nSelection is model-driven, not role-driven:\n\n1. `currentModel.contextPromotionTarget` (if configured)\n2. smallest larger-context model on the same provider + API\n\nCandidates are ignored unless credentials resolve (`ModelRegistry.getApiKey(...)`).\n\n### OpenAI Codex websocket handoff\n\nIf switching from/to `openai-codex-responses`, session provider state key `openai-codex-responses` is closed before model switch. This drops websocket transport state so the next turn starts clean on the promoted model.\n\n### Persistence behavior\n\nPromotion uses temporary switching (`setModelTemporary`):\n\n- recorded as a temporary `model_change` in session history\n- does not rewrite saved role mapping\n\n### Configuring explicit fallback chains\n\nConfigure fallback directly in model metadata via `contextPromotionTarget`.\n\n`contextPromotionTarget` accepts either:\n\n- `provider/model-id` (explicit)\n- `model-id` (resolved within current provider)\n\nExample (`models.yml`) for Spark -> non-Spark on the same provider:\n\n```yaml\nproviders:\n openai-codex:\n modelOverrides:\n gpt-5.3-codex-spark:\n contextPromotionTarget: openai-codex/gpt-5.3-codex\n```\n\nThe built-in model generator also assigns this automatically for `*-spark` models when a same-provider base model exists.\n\n## Compatibility and routing fields\n\nThe `compat` block on a provider or model overrides the URL-based auto-detection in `packages/ai/src/providers/openai-completions-compat.ts`. It is validated by `OpenAICompatSchema` in `packages/coding-agent/src/config/model-registry.ts` and consumed by every `openai-completions` transport (`packages/ai/src/providers/openai-completions.ts`). The canonical type is `OpenAICompat` in `packages/ai/src/types.ts`.\n\n`models.yml` accepts the following keys (all optional; unset falls back to URL detection):\n\nRequest shaping:\n\n- `supportsStore` — emit `store: false` on requests. Default: auto (off for non-standard endpoints).\n- `supportsDeveloperRole` — use the `developer` system role for reasoning models instead of `system`. Default: auto.\n- `supportsUsageInStreaming` — send `stream_options: { include_usage: true }` to receive token usage on streaming responses. Default: `true`.\n- `maxTokensField` — `\"max_completion_tokens\"` or `\"max_tokens\"`. Default: auto.\n- `supportsToolChoice` — emit the `tool_choice` parameter when the caller forces a specific tool. Default: `true`. Set `false` for endpoints that 400 on `tool_choice` (e.g. DeepSeek when reasoning is on).\n- `disableReasoningOnForcedToolChoice` — drop `reasoning_effort` / OpenRouter `reasoning` whenever `tool_choice` forces a call. Default: auto (Kimi/Anthropic-fronted endpoints).\n- `extraBody` — extra top-level fields merged into every request body (gateway hints, controller selectors, etc.).\n\nReasoning / thinking:\n\n- `supportsReasoningEffort` — accept `reasoning_effort`. Default: auto (off for Grok and zAI).\n- `reasoningEffortMap` — partial map from internal effort levels (`minimal|low|medium|high|xhigh`) to provider-specific strings (e.g. DeepSeek maps `xhigh -> \"max\"`).\n- `thinkingFormat` — request shape for thinking: `\"openai\"` (`reasoning_effort`), `\"openrouter\"` (`reasoning: { effort }`), `\"zai\"` (`thinking: { type: \"enabled\" }`), `\"qwen\"` (top-level `enable_thinking`), or `\"qwen-chat-template\"` (`chat_template_kwargs.enable_thinking`). Default: `\"openai\"`.\n- `reasoningContentField` — assistant field carrying chain-of-thought: `\"reasoning_content\"`, `\"reasoning\"`, or `\"reasoning_text\"`. Default: auto.\n- `requiresReasoningContentForToolCalls` — assistant tool-call turns must round-trip the reasoning field (DeepSeek-R1, Kimi, OpenRouter when reasoning is on). Default: `false`.\n- `requiresAssistantContentForToolCalls` — assistant tool-call turns must include non-empty text content (Kimi). Default: `false`.\n\nTool / message normalization:\n\n- `requiresToolResultName` — tool-result messages need a `name` field (Mistral). Default: auto.\n- `requiresAssistantAfterToolResult` — a user message after a tool result needs an assistant turn in between. Default: auto.\n- `requiresThinkingAsText` — convert thinking blocks to text wrapped in `<thinking>` delimiters (Mistral). Default: auto.\n- `requiresMistralToolIds` — normalize tool-call ids to exactly 9 alphanumeric chars. Default: auto.\n- `supportsStrictMode` — accept the per-tool `strict` field on tool schemas. Default: conservative auto-detect per provider/baseUrl.\n- `toolStrictMode` — `\"all_strict\"` forces strict on every tool, `\"none\"` forces it off; unset keeps the existing per-tool mixed behavior.\n\nGateway routing (only applied when `baseUrl` matches the gateway):\n\n- `openRouterRouting.only` / `openRouterRouting.order` — provider routing on `openrouter.ai` (see <https://openrouter.ai/docs/provider-routing>).\n- `vercelGatewayRouting.only` / `vercelGatewayRouting.order` — provider routing on `ai-gateway.vercel.sh` (see <https://vercel.com/docs/ai-gateway/models-and-providers/provider-options>).\n\nProvider-level `compat` is the baseline; per-model `compat` is deep-merged on top, with `openRouterRouting`, `vercelGatewayRouting`, and `extraBody` merged as nested objects.\n\n### Anthropic compatibility (`anthropic-messages`)\n\nFor `anthropic-messages` models the runtime uses a separate `AnthropicCompat` shape (`packages/ai/src/types.ts`). The `models.yml` schema currently exposes only the strict-tools opt-out as a top-level provider field (see below); the remaining Anthropic-side knobs (`disableAdaptiveThinking`, `supportsEagerToolInputStreaming`, `supportsLongCacheRetention`) are set by built-in catalog metadata and are not user-configurable from `models.yml`.\n\n### Strict tool schemas (`disableStrictTools`)\n\nAnthropic's API supports a `strict` field on tool definitions that forces the model to always follow the provided schema exactly. This is enabled by default for all `anthropic-messages` providers because it guarantees schema conformance in agentic systems.\n\nThird-party providers that front the Anthropic API (AWS Bedrock, Azure, self-hosted proxies) do not always implement this field and will reject requests that include it. Set `disableStrictTools: true` at the provider level to opt out:\n\n```yaml\nproviders:\n bedrock-anthropic:\n baseUrl: https://bedrock-runtime.us-east-1.amazonaws.com/anthropic\n apiKey: AWS_BEARER_TOKEN\n api: anthropic-messages\n disableStrictTools: true\n models:\n - id: claude-sonnet-4-20250514\n name: Claude Sonnet 4 (Bedrock)\n input: [text, image]\n contextWindow: 200000\n maxTokens: 16384\n cost:\n input: 3.00\n output: 15.00\n cacheRead: 0.30\n cacheWrite: 3.75\n```\n\n`disableStrictTools` is a provider-level flag that applies to all models in the provider.\n\nTool schemas going on the wire are normalized by the unified flow in\n`packages/ai/src/utils/schema/normalize.ts` (Google/CCA/MCP dispatchers\nplus the OpenAI strict-mode sanitize+enforce pipeline). See\n[`ai-schema-normalize.md`](./ai-schema-normalize.md) for the strict-mode\nedge cases (local `$ref` inlining, single-item `allOf` collapse,\n`anyOf`-wrapper description hoist, enum/const primitive-type inference)\nand the per-provider dispatcher mapping.\n## Practical examples\n\n### Local OpenAI-compatible endpoint (no auth)\n\n```yaml\nproviders:\n local-openai:\n baseUrl: http://127.0.0.1:8000/v1\n auth: none\n api: openai-completions\n models:\n - id: Qwen/Qwen2.5-Coder-32B-Instruct\n name: Qwen 2.5 Coder 32B (local)\n```\n\n### Hosted proxy with env-based key\n\n```yaml\nproviders:\n anthropic-proxy:\n baseUrl: https://proxy.example.com/anthropic\n apiKey: ANTHROPIC_PROXY_API_KEY\n api: anthropic-messages\n authHeader: true\n disableStrictTools: true # if the proxy doesn't support strict tool schemas\n models:\n - id: claude-sonnet-4-20250514\n name: Claude Sonnet 4 (Proxy)\n reasoning: true\n input: [text, image]\n```\n\n### Override built-in provider route + model metadata\n\n```yaml\nproviders:\n openrouter:\n baseUrl: https://my-proxy.example.com/v1\n headers:\n X-Team: platform\n modelOverrides:\n anthropic/claude-sonnet-4:\n name: Sonnet 4 (Corp)\n compat:\n openRouterRouting:\n only: [anthropic]\n```\n\n## Legacy consumer caveat\n\nMost model configuration now flows through `models.yml` via `ModelRegistry`. Explicit `.json` / `.jsonc` paths remain supported only when passed programmatically to `ModelRegistry`; the default user config is `~/.omp/agent/models.yml`.\n\n## Failure mode\n\nIf `models.yml` fails schema or validation checks:\n\n- registry keeps operating with built-in models\n- error is exposed via `ModelRegistry.getError()` and surfaced in UI/notifications\n",
|
|
32
32
|
"natives-addon-loader-runtime.md": "# Natives Addon Loader Runtime\n\nThis document covers the runtime loader shipped by `@oh-my-pi/pi-natives`: how `native/index.js` decides which `.node` file to require, how compiled-binary embedded payloads are extracted, and what startup failures report.\n\n## Implementation files\n\n- `packages/natives/native/index.js`\n- `packages/natives/native/loader-state.js`\n- `packages/natives/native/embedded-addon.js`\n- `packages/natives/scripts/embed-native.ts`\n- `packages/natives/package.json`\n\n## Scope and responsibility\n\nThe loader is intentionally narrow:\n\n- Build a platform/CPU-aware candidate list for addon filenames and directories.\n- Treat an embedded-addon manifest as the authoritative compiled-binary signal when present.\n- Optionally materialize an embedded addon into a versioned per-user cache directory.\n- Attempt candidates in deterministic order and return the first addon that `require(...)` loads.\n\nThe current loader does **not** run a separate `validateNative(...)` export-presence gate. API shape is provided by the generated N-API binding file (`native/index.d.ts`) and the loaded addon itself. A stale binary therefore normally fails as a missing property or native load error rather than as a custom \"missing exports\" validation error.\n\n## Runtime inputs and derived state\n\nAt module initialization, `native/index.js` computes:\n\n- **Platform tag**: `${process.platform}-${process.arch}` (for example `darwin-arm64`).\n- **Package version**: from `packages/natives/package.json`.\n- **Core directories**:\n - `nativeDir`: package-local `packages/natives/native`.\n - `execDir`: directory containing `process.execPath`.\n - `versionedDir`: `<getNativesDir()>/<packageVersion>`.\n - `userDataDir` fallback:\n - Windows: `%LOCALAPPDATA%/omp` or `%USERPROFILE%/AppData/Local/omp`.\n - Non-Windows: `~/.local/bin`.\n- **Natives cache root** (`getNativesDir()`):\n - if `$XDG_DATA_HOME/omp` exists, `$XDG_DATA_HOME/omp/natives`;\n - otherwise `~/.omp/natives`.\n- **Compiled-binary mode** (`detectCompiledBinary`): true if any of:\n - embedded-addon manifest is non-null,\n - `PI_COMPILED` env var is set,\n - `import.meta.url` contains Bun embedded markers (`$bunfs`, `~BUN`, `%7EBUN`).\n- **Variant override**: `PI_NATIVE_VARIANT` (`modern`/`baseline` only; invalid values ignored).\n- **Selected variant**: explicit override, otherwise runtime AVX2 detection on x64 (`modern` if AVX2, else `baseline`).\n\n## Platform support and tag resolution\n\n`SUPPORTED_PLATFORMS` is fixed to:\n\n- `linux-x64`\n- `linux-arm64`\n- `darwin-x64`\n- `darwin-arm64`\n- `win32-x64`\n\nUnsupported platforms are not rejected before probing. The loader first tries the computed candidate paths. If all fail and `platformTag` is unsupported, it throws an unsupported-platform error listing supported tags.\n\n## Variant selection (`modern` / `baseline` / default)\n\n### x64 behavior\n\n1. `PI_NATIVE_VARIANT=modern|baseline` wins when valid.\n2. Otherwise AVX2 support is detected:\n - Linux: scan `/proc/cpuinfo` for `avx2`.\n - macOS: `sysctl -n machdep.cpu.leaf7_features`, then `machdep.cpu.features`.\n - Windows: PowerShell `[System.Runtime.Intrinsics.X86.Avx2]::IsSupported`.\n3. AVX2 selects `modern`; unavailable or undetectable AVX2 selects `baseline`.\n\n### Non-x64 behavior\n\nNo variant suffix is used; the filename is `pi_natives.<platform>-<arch>.node`.\n\n### Filename construction\n\n`loader-state.js#getAddonFilenames` returns:\n\n- Non-x64 or no variant: `pi_natives.<tag>.node`\n- x64 + `modern`:\n 1. `pi_natives.<tag>-modern.node`\n 2. `pi_natives.<tag>-baseline.node`\n 3. `pi_natives.<tag>.node`\n- x64 + `baseline`:\n 1. `pi_natives.<tag>-baseline.node`\n 2. `pi_natives.<tag>.node`\n\nThe default unsuffixed fallback remains part of the x64 candidate list.\n\n## Candidate path construction and fallback ordering\n\n`resolveLoaderCandidates(...)` expands every filename across directories, then de-duplicates while preserving first occurrence order.\n\n### Non-compiled runtime\n\nFor each filename, candidates are:\n\n1. `<nativeDir>/<filename>`\n2. `<execDir>/<filename>`\n\n### Compiled runtime\n\nFor each filename, candidates are:\n\n1. `<versionedDir>/<filename>`\n2. `<userDataDir>/<filename>`\n3. `<nativeDir>/<filename>`\n4. `<execDir>/<filename>`\n\nAt load time, an extracted embedded candidate, when produced, is prepended ahead of these de-duplicated candidates.\n\n## Embedded addon extraction lifecycle\n\n`embedded-addon.js` is generated by `scripts/embed-native.ts`. The reset stub exports `embeddedAddon = null`. A populated manifest has:\n\n- `platformTag`\n- `version`\n- `files[]` entries with `variant`, `filename`, and `filePath`\n\nExtraction (`maybeExtractEmbeddedAddon`) runs only when:\n\n1. compiled-binary mode is true,\n2. `embeddedAddon` is non-null,\n3. manifest `platformTag` equals the runtime platform tag,\n4. manifest `version` equals the package version,\n5. a variant-appropriate embedded file exists.\n\nVariant file selection:\n\n- Non-x64: prefer `default`, then first available file.\n- x64 + `modern`: prefer `modern`, fallback to `baseline`.\n- x64 + `baseline`: require `baseline`.\n\nMaterialization:\n\n1. Ensure `<versionedDir>` exists.\n2. Reuse `<versionedDir>/<selected filename>` if it already exists.\n3. Otherwise read `selectedEmbeddedFile.filePath` and write the target path.\n4. Return the target path as the first candidate.\n\nDirectory creation or write failures are appended to the loader error list; probing continues through normal candidates.\n\n## Lifecycle and state transitions\n\n```text\nInit\n -> Load package metadata and embedded-addon manifest\n -> Compute platform/version/variant/filenames/candidate paths\n -> (compiled + embedded manifest matches?)\n yes -> try extract to versionedDir (record errors, continue)\n no -> skip extraction\n -> For each runtime candidate in order:\n require(candidate)\n -> success: return addon exports (READY)\n -> failure: record error, continue\n -> none loaded:\n if unsupported platform tag -> throw Unsupported platform\n else -> throw Failed to load (tried-path diagnostics + hints)\n```\n\n## Failure behavior and diagnostics\n\n### Unsupported platform\n\nIf all candidates fail and `platformTag` is not supported, the loader throws:\n\n- `Unsupported platform: <tag>`\n- supported platform list\n- issue-reporting guidance\n\n### No loadable candidate\n\nIf the platform is supported but no candidate can be loaded, the final error includes:\n\n- `Failed to load pi_natives native addon for <platformTag>` or `<platformTag> (<variant>)`\n- every attempted path with the corresponding `require(...)` error\n- mode-specific remediation hints\n\n### Compiled-binary startup failures\n\nCompiled mode diagnostics include:\n\n- expected versioned cache target paths (`<versionedDir>/<filename>`),\n- remediation to delete the versioned cache and rerun,\n- direct release download `curl` commands for each expected filename.\n\n### Non-compiled startup failures\n\nNormal package/runtime diagnostics include:\n\n- reinstall hint (`bun install @oh-my-pi/pi-natives`),\n- local rebuild command (`bun --cwd=packages/natives run build`),\n- optional x64 variant build hint (`TARGET_VARIANT=baseline|modern bun --cwd=packages/natives run build`).\n",
|
|
33
33
|
"natives-architecture.md": "# Natives Architecture\n\n`@oh-my-pi/pi-natives` is now a two-layer package around a loader:\n\n1. **CommonJS loader/package entrypoint** resolves and loads the correct `.node` addon and patches generated enum objects onto the export object.\n2. **Rust N-API module layer** implements the exported functions/classes and emits the generated TypeScript declarations.\n\nThis document is the foundation for deeper module-level docs.\n\n## Implementation files\n\n- `packages/natives/native/index.js`\n- `packages/natives/native/index.d.ts`\n- `packages/natives/native/loader-state.js`\n- `packages/natives/native/embedded-addon.js`\n- `packages/natives/scripts/build-native.ts`\n- `packages/natives/scripts/embed-native.ts`\n- `packages/natives/scripts/gen-enums.ts`\n- `packages/natives/package.json`\n- `crates/pi-natives/src/lib.rs`\n\n## Package entrypoint and public surface\n\n`packages/natives/package.json` points directly at generated native bindings:\n\n- `main`: `./native/index.js`\n- `types`: `./native/index.d.ts`\n- `exports[\".\"].types`: `./native/index.d.ts`\n- `exports[\".\"].import`: `./native/index.js`\n\nThere is no current `packages/natives/src` TypeScript wrapper layer. Consumers import functions/classes/enums directly from `@oh-my-pi/pi-natives`; the type contract is the generated `native/index.d.ts` plus enum exports appended by `scripts/gen-enums.ts`.\n\nCurrent capability groups in the generated API include:\n\n- **Search/text/code primitives**: `grep`, `search`, `hasMatch`, `fuzzyFind`, `glob`, `astGrep`, `astEdit`, text width/slicing/wrapping/sanitization, syntax highlighting, token counting.\n- **Execution/process/terminal primitives**: `executeShell`, `Shell`, `PtySession`, process-tree helpers, key parsing.\n- **System/media/conversion primitives**: clipboard, image resize/encode/SIXEL, HTML-to-Markdown, macOS appearance/power helpers, work profiling, Windows ProjFS overlay helpers.\n\n## Loader layer\n\n`packages/natives/native/index.js` owns runtime addon selection and optional embedded extraction.\n\n### Candidate resolution model\n\n- Platform tag is `${process.platform}-${process.arch}`.\n- Supported tags are currently:\n - `linux-x64`\n - `linux-arm64`\n - `darwin-x64`\n - `darwin-arm64`\n - `win32-x64`\n- x64 can use CPU variants:\n - `modern` (AVX2-capable)\n - `baseline` (fallback)\n- Non-x64 uses the default filename without a variant suffix.\n\nFilename strategy:\n\n- Default: `pi_natives.<platform>-<arch>.node`\n- x64 variant: `pi_natives.<platform>-<arch>-modern.node` or `...-baseline.node`\n- x64 runtime fallback includes the unsuffixed default filename after variant candidates.\n\n### Platform-specific variant detection\n\nFor x64, variant selection uses:\n\n- Linux: `/proc/cpuinfo`\n- macOS: `sysctl -n machdep.cpu.leaf7_features`, then `machdep.cpu.features`\n- Windows: PowerShell check for `System.Runtime.Intrinsics.X86.Avx2`\n\n`PI_NATIVE_VARIANT` can force `modern` or `baseline`; invalid values are ignored.\n\n### Binary distribution and extraction model\n\n`packages/natives/package.json` publishes `native/`, which contains the loader, generated declarations, generated enum patch, embedded-addon manifest stub, and prebuilt `.node` artifacts.\n\nFor compiled binaries, loader behavior is:\n\n1. Check versioned user cache path: `<getNativesDir()>/<packageVersion>/...`.\n2. Check legacy compiled-binary location:\n - Windows: `%LOCALAPPDATA%/omp` (fallback `%USERPROFILE%/AppData/Local/omp`)\n - non-Windows: `~/.local/bin`\n3. Fall back to packaged `native/` and executable directory candidates.\n\n`getNativesDir()` uses `$XDG_DATA_HOME/omp/natives` when `$XDG_DATA_HOME/omp` exists; otherwise it uses `~/.omp/natives`.\n\nIf a populated embedded addon manifest is present, it is also treated as a compiled-binary signal. The loader can extract the matching embedded `.node` into the versioned cache directory before candidate probing.\n\n### Failure modes\n\nLoader failures are explicit:\n\n- **Unsupported platform tag**: after failed probing, throws with supported platform list.\n- **No loadable candidate**: throws with all attempted paths and remediation hints.\n- **Embedded extraction errors**: directory/write failures are recorded and included in final load diagnostics if no candidate loads.\n\nThe current loader does not perform a separate post-`require` export validation pass.\n\n## Rust N-API module layer\n\n`crates/pi-natives/src/lib.rs` declares exported module ownership:\n\n- `appearance`\n- `ast`\n- `clipboard`\n- `fd`\n- `fs_cache`\n- `glob`\n- `glob_util`\n- `grep`\n- `highlight`\n- `html`\n- `image`\n- `keys`\n- `language`\n- `power`\n- `prof`\n- `projfs_overlay`\n- `ps`\n- `pty`\n- `shell`\n- `task`\n- `text`\n- `tokens`\n- `utils` (crate-private helpers)\n\nN-API exports are generated from Rust `#[napi]` functions/classes/objects/enums. Snake_case Rust names are exposed as camelCase JavaScript names unless explicitly configured by napi-rs.\n\n## Ownership boundaries\n\n- **Loader/package ownership (`packages/natives/native`, `packages/natives/scripts`)**\n - runtime binary selection\n - CPU variant selection and override handling\n - compiled-binary embedded extraction\n - generated TypeScript declarations and enum export patching\n- **Rust ownership (`crates/pi-natives/src`)**\n - algorithmic and system-level implementation\n - platform-native behavior and performance-sensitive logic\n - N-API symbol implementation consumed directly by package callers\n- **Consumer ownership (`packages/coding-agent`, `packages/tui`)**\n - user-facing policy and fallbacks that are not built into the native API\n - higher-level rendering, artifact, shell-session, and command behavior\n\n## Runtime flow (high level)\n\n1. Consumer imports from `@oh-my-pi/pi-natives`.\n2. `native/index.js` computes platform/arch/variant and candidate paths.\n3. Optional embedded binary extraction occurs for compiled distributions.\n4. The first `require(candidate)` that succeeds becomes the exported addon object.\n5. Generated enum objects are appended to `module.exports`.\n6. Caller invokes generated N-API functions/classes directly.\n\n## Glossary\n\n- **Native addon**: A `.node` binary loaded via Node-API (N-API).\n- **Platform tag**: Runtime tuple `platform-arch` (for example `darwin-arm64`).\n- **Variant**: x64 CPU-specific build flavor (`modern` AVX2, `baseline` fallback).\n- **Generated binding declaration**: `native/index.d.ts` emitted by napi-rs during `build-native.ts`.\n- **Compiled binary mode**: Runtime mode where the CLI is bundled and native addons are resolved from embedded/cache paths before package-local paths.\n- **Embedded addon**: Build artifact metadata and file references generated into `native/embedded-addon.js` so compiled binaries can extract matching `.node` payloads.\n",
|
|
34
34
|
"natives-binding-contract.md": "# Natives Binding Contract (JavaScript/TypeScript Side)\n\nThis document defines the JS/TS contract between `@oh-my-pi/pi-natives` callers and the loaded N-API addon.\n\nCurrent package shape is direct-to-native: there is no `packages/natives/src/<module>` TypeScript wrapper layer. The public API is the generated `packages/natives/native/index.d.ts` declaration file, the CommonJS loader in `packages/natives/native/index.js`, and the Rust `#[napi]` exports in `crates/pi-natives/src`.\n\n## Implementation files\n\n- `packages/natives/native/index.js`\n- `packages/natives/native/index.d.ts`\n- `packages/natives/native/loader-state.js`\n- `packages/natives/scripts/build-native.ts`\n- `packages/natives/scripts/gen-enums.ts`\n- `packages/natives/package.json`\n- `crates/pi-natives/src/lib.rs`\n- Rust modules under `crates/pi-natives/src/*.rs`\n\n## Contract model\n\nThe contract has three parts:\n\n1. **Generated runtime loader** (`native/index.js`)\n - computes candidates and `require(...)`s the `.node` addon;\n - exports the loaded addon object directly;\n - appends enum objects generated by `scripts/gen-enums.ts`.\n2. **Generated TypeScript declarations** (`native/index.d.ts`)\n - generated by napi-rs during `scripts/build-native.ts`;\n - declares exported functions, classes, object interfaces, and native enums;\n - is the package `types` entry.\n3. **Rust N-API exports** (`crates/pi-natives/src`)\n - `#[napi]` functions/classes/objects/enums are the source of generated declarations and runtime symbols;\n - snake_case Rust names become camelCase JavaScript names by napi-rs convention.\n\nThere is no current `NativeBindings` declaration-merging lifecycle and no `validateNative(...)` required-export list in the loader.\n\n## Public export surface organization\n\n`packages/natives/package.json` exposes the package root only:\n\n```json\n{\n \"main\": \"./native/index.js\",\n \"types\": \"./native/index.d.ts\",\n \"exports\": {\n \".\": {\n \"types\": \"./native/index.d.ts\",\n \"import\": \"./native/index.js\"\n }\n }\n}\n```\n\nConsumers in `packages/coding-agent` and `packages/tui` import directly from `@oh-my-pi/pi-natives`.\n\n## JS API ↔ native export mapping (representative)\n\n| Category | Public JS API | Rust source | Return style |\n| ----------------- | ---------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------- | ----------------------------- |\n| Grep | `grep(options, onMatch?)` | `grep.rs` | `Promise<GrepResult>` |\n| Grep | `search(content, options)` | `grep.rs` | `SearchResult` |\n| Grep | `hasMatch(content, pattern, ignoreCase?, multiline?)` | `grep.rs` | `boolean` |\n| Fuzzy path search | `fuzzyFind(options)` | `fd.rs` | `Promise<FuzzyFindResult>` |\n| Glob | `glob(options, onMatch?)` | `glob.rs` | `Promise<GlobResult>` |\n| Glob cache | `invalidateFsScanCache(path?)` | `fs_cache.rs` | `void` |\n| AST search/edit | `astGrep(options)`, `astEdit(options)` | `ast.rs` | `Promise<...>` |\n| Shell | `executeShell(options, onChunk?)` | `shell.rs` | `Promise<ShellExecuteResult>` |\n| Shell | `new Shell(options?)`, `shell.run(...)`, `shell.abort()` | `shell.rs` | class / promises |\n| PTY | `new PtySession()`, `start/write/resize/kill` | `pty.rs` | class / promises |\n| Process | `killTree(pid, signal)`, `listDescendants(pid)` | `ps.rs` | sync |\n| Keys | `parseKey`, `matchesKey`, Kitty/legacy helpers | `keys.rs` | sync |\n| Text | `wrapTextWithAnsi`, `truncateToWidth`, `sliceWithWidth`, `extractSegments`, `visibleWidth` | `text.rs` | sync |\n| Highlight | `highlightCode`, `supportsLanguage`, `getSupportedLanguages` | `highlight.rs` | sync |\n| HTML | `htmlToMarkdown(html, options?)` | `html.rs` | `Promise<string>` |\n| Image | `PhotonImage`, `encodeSixel` | `image.rs` | class / sync / promises |\n| Clipboard | `copyToClipboard`, `readImageFromClipboard` | `clipboard.rs` | sync / promise |\n| Tokens | `countTokens(input, encoding?)` | `tokens.rs` | sync |\n| System | `detectMacOSAppearance`, `MacAppearanceObserver`, `MacOSPowerAssertion`, `getWorkProfile`, ProjFS helpers | `appearance.rs`, `power.rs`, `prof.rs`, `projfs_overlay.rs` | mixed |\n\n## Sync vs async contract differences\n\nThe contract preserves Rust/N-API call style:\n\n- **Promise-returning exports** for worker-thread or async runtime work (`grep`, `glob`, `fuzzyFind`, `astGrep`, `astEdit`, `htmlToMarkdown`, shell/PTY runs, image parse/resize/encode, clipboard image read).\n- **Synchronous exports** for deterministic in-memory transforms/parsers or direct system calls (`search`, `hasMatch`, highlighting, text utilities, token counting, process queries, `copyToClipboard`, `encodeSixel`).\n- **Constructor exports** for stateful runtime objects (`Shell`, `PtySession`, `PhotonImage`, macOS observer/power handles).\n\nChanging sync ↔ async for an existing export is a breaking public API change because consumers call these exports directly.\n\n## Object and enum typing patterns\n\n### Object patterns\n\n`#[napi(object)]` Rust structs become TS interfaces, for example:\n\n- `GrepResult`, `SearchResult`, `GlobResult`, `FuzzyFindResult`\n- `ShellRunResult`, `ShellExecuteResult`, `PtyRunResult`, `MinimizerResult`\n- `AstFindResult`, `AstReplaceResult`\n- `System`/media payloads such as `ClipboardImage`, `WorkProfile`, `ParsedKittyResult`\n\nRuntime shape correctness is owned by napi-rs and the Rust implementation.\n\n### Enum patterns\n\nNative enums are represented in generated declarations and also appended to `module.exports` by `scripts/gen-enums.ts`, because the loader is hand-maintained CommonJS around the generated addon. Current enum objects include:\n\n- `AstMatchStrictness`\n- `Ellipsis`\n- `Encoding`\n- `FileType`\n- `GrepOutputMode`\n- `ImageFormat`\n- `KeyEventType`\n- `MacOSAppearance`\n- `SamplingFilter`\n\n## Error behavior and caveats\n\n- Addon load failure or unsupported platform throws during package import from `native/index.js`.\n- The loader does not verify the full export set after `require(...)`; stale or mismatched binaries surface as native load errors or missing members at use sites.\n- N-API conversion validates basic argument conversion, but TS optional fields do not guarantee semantic validity for untyped callers.\n- Numeric enum declarations do not prevent out-of-range numeric values from untyped callers unless the Rust function rejects them during conversion.\n- Callback exports use napi-rs `ThreadsafeFunction` shape: `(error: Error | null, value) => void`. Native code generally emits successful values; hard failures reject/throw through the owning call.\n\n## Maintainer checklist for binding changes\n\nWhen adding/changing an export, update all of:\n\n1. Rust `#[napi]` implementation in the owning `crates/pi-natives/src/<module>.rs`.\n2. `crates/pi-natives/src/lib.rs` if a new module is added.\n3. Any consumer imports/callsites in `packages/coding-agent` or `packages/tui`.\n4. Build output by running the natives build so `native/index.d.ts` and `native/index.js` stay in sync.\n5. `scripts/gen-enums.ts` if enum runtime export patching needs to change.\n\nDo not add a parallel TS wrapper convention unless the package design intentionally moves back to wrappers; current consumers depend on the direct generated API.\n",
|
|
@@ -69,11 +69,10 @@ export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
|
|
|
69
69
|
"tools/ast-grep.md": "# ast_grep\n\n> Structural code search over supported source files via native ast-grep.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/ast-grep.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/ast-grep.md`\n- Key collaborators:\n - `crates/pi-natives/src/ast.rs` — native scan, parse, match engine\n - `crates/pi-natives/src/language/mod.rs` — language aliases and extension inference\n - `packages/coding-agent/src/tools/path-utils.ts` — path/glob parsing and multi-path resolution\n - `packages/coding-agent/src/tools/render-utils.ts` — parse-error dedupe and display caps\n - `packages/coding-agent/src/tools/match-line-format.ts` — hashline match rendering\n - `packages/coding-agent/src/utils/file-display-mode.ts` — hashline vs line-number output mode\n - `packages/natives/native/index.d.ts` — JS-visible native binding contract\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `pat` | `string` | Yes | Single AST pattern. The wrapper trims it and rejects empty strings. |\n| `paths` | `string[]` | Yes | One or more files, directories, globs, or internal URLs with backing files. Empty entries are rejected. Globs are forbidden for internal URLs. |\n| `skip` | `number` | No | Match offset. Defaults to `0`, then `Math.floor(...)`; negatives and non-finite values fail. |\n\nPattern grammar and language support exposed to the model:\n- `$NAME` — capture one AST node.\n- `$_` — match one AST node without binding.\n- `$$$NAME` — capture zero or more AST nodes; ast-grep stops lazily at the next satisfiable node.\n- `$$$` — match zero or more AST nodes without binding.\n- Metavariable names must be uppercase and must stand for whole AST nodes, not partial tokens or string fragments.\n- Reusing the same metavariable requires identical code at each occurrence.\n- Patterns must parse as one valid AST node for the inferred target language.\n- Supported canonical languages come from `SupportLang::all_langs()` in `crates/pi-natives/src/language/mod.rs`: `astro`, `bash`, `c`, `cmake`, `cpp`, `csharp`, `dart`, `clojure`, `css`, `diff`, `dockerfile`, `elixir`, `erlang`, `go`, `graphql`, `haskell`, `hcl`, `html`, `ini`, `java`, `javascript`, `json`, `just`, `julia`, `kotlin`, `lua`, `make`, `markdown`, `nix`, `objc`, `ocaml`, `odin`, `perl`, `php`, `powershell`, `protobuf`, `python`, `r`, `regex`, `ruby`, `rust`, `scala`, `solidity`, `sql`, `starlark`, `svelte`, `swift`, `toml`, `tlaplus`, `tsx`, `typescript`, `verilog`, `vue`, `xml`, `yaml`, `zig`.\n\n## Outputs\n- Single-shot tool result.\n- Model-facing `content` is one text block:\n - grouped by file for directory/multi-file searches,\n - match lines rendered under `¶PATH#HASH` as `*LINE:text` in hashline mode or `*LINE|text` otherwise,\n - continuation lines for multi-line matches rendered with a leading space,\n - optional `meta: NAME=value` lines when ast-grep captured metavariables.\n- If no matches are found, text is `No matches found` or `No matches found. Parse issues mean the query may be mis-scoped; narrow paths before concluding absence.` plus formatted parse issues.\n- If the wrapper truncates visible results, the text ends with `Result limit reached; narrow paths or increase limit.`\n- `details` includes counts and metadata, not full match payloads:\n - `matchCount`, `fileCount`, `filesSearched`, `limitReached`\n - optional `parseErrors`, `scopePath`, `files`, `fileMatches`, `displayContent`, `meta`\n- Native ranges (`byteStart`, `byteEnd`, `startLine`, `startColumn`, `endLine`, `endColumn`) exist only inside the native result; the wrapper does not emit them directly to the model.\n\n## Flow\n1. `AstGrepTool.execute()` validates `pat`, normalizes `skip`, and normalizes each `paths` entry in `packages/coding-agent/src/tools/ast-grep.ts`.\n2. Internal URLs are resolved through `session.internalRouter`; entries without `sourcePath` fail, and internal-URL globs fail early.\n3. For multiple path inputs, `partitionExistingPaths()` drops missing bases only when at least one surviving base remains; if all bases are missing the call fails.\n4. `parseSearchPath()` splits a single path into `basePath` plus optional `glob`. `resolveExplicitSearchPaths()` collapses multiple inputs into a common base plus a brace-union glob, or separate `targets` when the only common base is a filesystem root.\n5. The wrapper stats the resolved base path to decide whether output should be grouped as a directory result.\n6. Execution dispatches to either:\n - one native `astGrep(...)` call for a single resolved base, or\n - `runMultiTargetAstGrep(...)`, which calls the native binding once per target, rebases paths back to the common root, sorts globally, then applies `skip` and the wrapper limit.\n7. Native `ast_grep` in `crates/pi-natives/src/ast.rs`:\n - normalizes and deduplicates patterns,\n - resolves a `MatchStrictness` (`smart` by default),\n - collects candidate files from a file or gitignore-aware directory scan,\n - infers language per candidate from extension unless `lang` was provided,\n - compiles the pattern separately for each language present,\n - reads each file, reports syntax-error trees as parse issues, runs `find_all`, and optionally captures metavariable bindings.\n8. Native results are sorted by path and source position, then paged by `offset`/`limit`.\n9. The TS wrapper normalizes parse-error strings, deduplicates them, groups matches by formatted path, renders anchor lines, appends limit/parse notices, and returns `toolResult(...).text(...).done()`.\n\n## Modes / Variants\n- Single file: native path is the file; output is a flat list of rendered match lines.\n- Directory + optional glob: native scan walks the directory, then filters by compiled glob.\n- Multiple explicit paths/globs: wrapper unions them into one synthetic scope or runs per-target native calls when paths only meet at root.\n- Internal URL inputs: only supported when the router can resolve them to a backing file path.\n- Hashline output mode vs plain line-number mode: controlled by `resolveFileDisplayMode()`; hashline mode requires the edit tool and non-raw, mutable sources.\n\n## Side Effects\n- Filesystem\n - Stats input paths in the TS wrapper.\n - Native code reads matched files and scans directories through `fs_cache`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - None beyond normal tool transcript/result metadata.\n- Background work / cancellation\n - Native work runs on a blocking worker via `task::blocking(...)`.\n - Cancellation and optional native timeout are cooperative through `CancelToken::heartbeat()`.\n\n## Limits & Caps\n- Wrapper-visible result cap: `DEFAULT_AST_LIMIT = 50` in `packages/coding-agent/src/tools/ast-grep.ts`.\n - Single-target calls rely on the native default limit of 50 in `crates/pi-natives/src/ast.rs`.\n - Multi-target calls fetch `skip + 50 + 1` matches per target, then re-page after global sort.\n- Native `limit` is clamped to at least `1`; omitted `offset` defaults to `0` in `crates/pi-natives/src/ast.rs`.\n- Parse issues are rendered with at most `PARSE_ERRORS_LIMIT = 20` lines in `packages/coding-agent/src/tools/render-utils.ts`; `details.parseErrors` itself is only deduplicated, not capped.\n- Directory scans use `include_hidden: true`, `use_gitignore: true`, and skip `node_modules` unless the glob text explicitly mentions `node_modules` in `crates/pi-natives/src/ast.rs`.\n- No hard file-count cap is applied by the wrapper or native `ast_grep`; candidate count is whatever the resolved path/glob expands to after gitignore filtering.\n- Multi-path union deduplicates identical path inputs before resolution in `resolveExplicitSearchPaths()`.\n\n## Errors\n- TS wrapper throws `ToolError` for empty patterns, invalid `skip`, empty path entries, unsupported internal-URL globs, internal URLs without `sourcePath`, and missing paths.\n- Native code returns hard errors for:\n - unsupported explicit `lang`,\n - inability to infer language for a candidate when `lang` is not supplied,\n - invalid AST pattern compilation for every relevant language,\n - unreadable search roots or bad glob compilation,\n - cancellation (`Aborted: Signal`) or timeout (`Aborted: Timeout`).\n- File-level parse failures and many per-language pattern compile failures are non-fatal: they are accumulated in `parseErrors` and surfaced alongside successful matches.\n- `no matches` is not an error, even when parse issues were recorded.\n\n## Notes\n- `pat` is always wrapped into a one-element `patterns` array by the TS tool; the model cannot send multiple patterns through `ast_grep` even though the native binding supports it.\n- `ast_grep` can search mixed-language trees because native compilation happens per discovered language, but the prompt still tells the model to keep calls single-language when possible to reduce parse noise.\n- Pattern compilation is per language present in the candidate set. One pattern can succeed for some languages and generate per-file parse errors for others in the same run.\n- A file with tree-sitter error nodes still gets searched; the syntax warning is additive, not a skip condition.\n- For glob semantics, `*.ts` matches only direct children while `**/*.ts` recurses; this is covered by native tests in `crates/pi-natives/src/ast.rs`.\n- Output anchors are intended for follow-up tools, but the exact anchor format depends on session edit mode (`hashline` vs line-number mode).",
|
|
70
70
|
"tools/bash.md": "# bash\n\n> Execute a shell command in the session workspace, with optional PTY or background-job handling.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/bash.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/bash.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/bash-interactive.ts` — PTY/TUI execution path.\n - `packages/coding-agent/src/tools/bash-interceptor.ts` — blocks tool-better shell patterns.\n - `packages/coding-agent/src/tools/bash-skill-urls.ts` — expands internal URLs to paths.\n - `packages/coding-agent/src/exec/bash-executor.ts` — non-PTY shell execution.\n - `packages/coding-agent/src/session/streaming-output.ts` — tail buffer, truncation, artifact spill.\n - `packages/coding-agent/src/tools/tool-timeouts.ts` — timeout clamp bounds.\n - `packages/coding-agent/src/config/settings-schema.ts` — default interceptor rules.\n - `docs/bash-tool-runtime.md` — deeper executor/runtime notes; use as the companion doc for shell-session internals.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `command` | `string` | Yes | Shell command text to execute. A leading `cd <path> && ...` is rewritten into `cwd` only when `cwd` was omitted. |\n| `env` | `Record<string, string>` | No | Extra environment variables. Keys must match `^[A-Za-z_][A-Za-z0-9_]*$` or the tool throws. Values also go through internal-URL expansion. |\n| `timeout` | `number` | No | Timeout in seconds. Default `300`; clamped to `1..3600` by `clampTimeout(\"bash\", ...)`. |\n| `cwd` | `string` | No | Working directory, resolved against `session.cwd` via `resolveToCwd`. Must exist and be a directory. |\n| `pty` | `boolean` | No | Request PTY mode. Default `false`. PTY is used only when `pty: true`, `PI_NO_PTY !== \"1\"`, and the tool context has a UI. |\n| `async` | `boolean` | No | Background execution request. Present only when `async.enabled` is true for the session. Returns immediately with a job id instead of waiting. |\n\n## Outputs\nThe tool returns a single `text` content block plus optional `details`.\n\n- Success, foreground:\n - `content[0].text`: command output, or `(no output)` when the command produced nothing.\n - `details.timeoutSeconds`: effective timeout after clamping.\n - `details.requestedTimeoutSeconds`: only present when the requested timeout was clamped.\n - `details.meta.truncation`: present when output was truncated in memory; includes `artifactId` when full output spilled to an artifact.\n- Success, background start (`async: true` or auto-background):\n - `content[0].text`: optional preview tail, timeout notice if any, then `Background job <id> started: <label>` with follow-up instructions.\n - `details.async`: `{ state: \"running\", jobId, type: \"bash\" }`.\n- Background progress / completion:\n - delivered through `onUpdate` / async job manager, not the initial return.\n - running updates contain tail text and `details.async.state: \"running\"` only after the job is considered backgrounded.\n - completion/failure updates carry final text and `details.async.state: \"completed\" | \"failed\"`.\n- Failure:\n - the tool throws `ToolError` / `ToolAbortError`; non-zero exits are surfaced as errors, not success results.\n\nStdout and stderr are merged before the model sees them. Non-zero exit codes are appended to the thrown error text as `Command exited with code <n>`.\n\n## Flow\n1. `BashTool.execute()` in `packages/coding-agent/src/tools/bash.ts` reads `command`, normalizes `env`, and defaults `timeout` to `300`.\n2. If `cwd` is absent, it rewrites a leading `cd <path> && ...` into the structured `cwd` field and strips that prefix from `command`.\n3. If `async: true` is requested while `async.enabled` is off, it throws `ToolError` before any execution.\n4. If `bashInterceptor.enabled` is on, `checkBashInterception()` runs against both the original command and the `cd`-stripped command. A matching enabled rule throws before URL expansion or execution.\n5. `expandInternalUrls()` rewrites supported internal URLs inside `command`, each `env` value, and protocol-looking `cwd` values. Command/env replacements are shell-escaped unless `noEscape` is requested by the caller path.\n6. `resolveToCwd()` resolves `cwd` against `session.cwd`; `fs.stat()` verifies that the target exists and is a directory.\n7. `clampTimeout(\"bash\", requestedTimeoutSec)` enforces `TOOL_TIMEOUTS.bash` (`default: 300`, `min: 1`, `max: 3600`). When clamped, `#buildCompletedResult()` / `#buildBackgroundStartResult()` append a notice line.\n8. Execution path splits:\n 1. `async: true` -> `#startManagedBashJob()` registers a session async job and returns immediately.\n 2. Non-PTY with `bash.autoBackground.enabled` and an async job manager -> starts a managed job, waits up to `min(thresholdMs, timeoutMs - 1000)`, and either returns the completed result or converts the run into a background job.\n 3. Otherwise runs foreground execution.\n9. Foreground non-PTY calls `executeBash()` from `packages/coding-agent/src/exec/bash-executor.ts`.\n10. Foreground PTY calls `runInteractiveBashPty()` from `packages/coding-agent/src/tools/bash-interactive.ts`.\n11. Both paths allocate an output artifact first when `session.allocateOutputArtifact` is available. The artifact path/id are passed into the sink so large output can spill to disk.\n12. `executeBash()` loads shell settings, optional shell snapshot, and shell minimizer settings, then runs via a persistent native `Shell` session or one-shot `executeShell()`. `docs/bash-tool-runtime.md` covers that path in detail.\n13. `runInteractiveBashPty()` creates a `PtySession`, overlays an xterm-backed console UI, forwards user key input into the PTY, captures output through `OutputSink`, and kills the PTY on dismiss/dispose.\n14. On completion, `#buildCompletedResult()` formats `(no output)` when needed, attaches truncation metadata from the `OutputSink` summary, and re-checks exit status / timeout / cancellation before returning.\n15. On non-zero exit, timeout, missing exit status, or cancellation, `#buildResultText()` throws with the captured output included in the error message.\n\n## Modes / Variants\n1. Foreground non-PTY\n - Default path.\n - Uses `executeBash()`.\n - Streams tail-only updates through `streamTailUpdates()` and `TailBuffer(DEFAULT_MAX_BYTES)`.\n2. Foreground PTY\n - Requires `pty: true`, UI context, and `PI_NO_PTY !== \"1\"`.\n - Uses `runInteractiveBashPty()` and a `PtySession` overlay.\n - Supports interactive input; `Esc` kills the session from the overlay.\n3. Explicit background job\n - Requires `async: true` and `async.enabled`.\n - Registers a job with `session.asyncJobManager` and returns `{ state: \"running\", jobId }` immediately.\n4. Auto-backgrounded non-PTY job\n - Requires `bash.autoBackground.enabled`, no PTY, and an async job manager.\n - Starts like a foreground managed job, then backgrounds it when it outlives the wait window.\n5. Intercepted command\n - No subprocess created.\n - Returns a `ToolError` pointing the model at `read`, `search`, `find`, `edit`, or `write`.\n\n## Side Effects\n- Filesystem\n - Validates `cwd` with `fs.stat()`.\n - May allocate and write artifact files for full output (`bash`) and minimizer-preserved raw output (`bash-original`).\n - `expandInternalUrls(..., { ensureLocalParentDirs: true })` creates parent directories for `local://` paths before execution.\n- Subprocesses / native bindings\n - Non-PTY uses native shell execution via `@oh-my-pi/pi-natives` (`Shell.run()` or `executeShell()`).\n - PTY uses native `PtySession.start()`.\n- Session state\n - Reads session settings for async, auto-background, interceptor, tool availability, and shell configuration.\n - Registers jobs with `session.asyncJobManager` for explicit/auto background runs.\n - Uses `session.getSessionId()` to isolate shell reuse and async session keys.\n - Uses `session.allocateOutputArtifact()` for spill files.\n- User-visible prompts / interactive UI\n - PTY mode opens a TUI overlay titled `Console` and forwards input to the PTY.\n - Background start messages direct the agent to the `job` tool (use `list: true` for a snapshot, or pass `poll: [id]` to wait).\n- Background work / cancellation\n - Async and auto-background jobs continue after the initial tool return.\n - Cancellation aborts the native run; PTY overlay dismissal also kills the PTY.\n\n## Limits & Caps\n- Default timeout: `300s` (`TOOL_TIMEOUTS.bash.default` in `packages/coding-agent/src/tools/tool-timeouts.ts`).\n- Timeout clamp: `1..3600s` (`TOOL_TIMEOUTS.bash.min/max`).\n- Auto-background default threshold: `60_000ms` (`DEFAULT_AUTO_BACKGROUND_THRESHOLD_MS` in `packages/coding-agent/src/tools/bash.ts`), further capped to `timeoutMs - 1000` by `#resolveAutoBackgroundWaitMs()`.\n- Hard kill grace beyond requested timeout in non-PTY executor: `5_000ms` (`HARD_TIMEOUT_GRACE_MS` in `packages/coding-agent/src/exec/bash-executor.ts`).\n- In-memory output tail cap: `50 * 1024` bytes (`DEFAULT_MAX_BYTES` in `packages/coding-agent/src/session/streaming-output.ts`). Once exceeded, the sink keeps only the tail window in memory.\n- Streaming callback throttle in `executeBash()`: `50ms` between `onChunk` calls when streaming is enabled.\n- TUI collapsed preview: `10` visual lines (`BASH_DEFAULT_PREVIEW_LINES`) when rendered inline in the agent UI; this is a renderer cap, not a tool output cap.\n\n## Errors\n- Input validation:\n - invalid env key -> `ToolError(\"Invalid bash env name: <key>\")`.\n - async requested while disabled -> `ToolError(\"Async bash execution is disabled...\")`.\n - missing async job manager -> `ToolError(\"Async job manager unavailable for this session.\")`.\n - missing/bad `cwd` -> `ToolError(\"Working directory does not exist: ...\")` or `ToolError(\"Working directory is not a directory: ...\")`.\n- Interceptor:\n - matched command -> `ToolError` with `Blocked: <rule.message>` and the original command.\n - invalid interceptor regexes are silently skipped by `compileRules()`.\n- Internal URL expansion:\n - unsupported scheme, unknown skill, path traversal, missing router support, or router resolution failures all throw `ToolError` from `packages/coding-agent/src/tools/bash-skill-urls.ts`.\n- Execution:\n - non-zero exit -> thrown `ToolError` containing captured output plus `Command exited with code <n>`.\n - missing exit code -> thrown `ToolError` with `Command failed: missing exit status`.\n - timeout -> thrown `ToolError`; PTY uses `Command timed out after <n> seconds`, non-PTY executor returns cancelled output that `BashTool` converts to an error.\n - user abort -> `ToolAbortError` when the caller signal is aborted.\n- Artifact allocation / artifact save failures are swallowed in `saveBashOriginalArtifact()` and `OutputSink.#createFileSink()`; execution continues without that artifact.\n\n## Notes\n- `strict = true` and `concurrency = \"exclusive\"` are set on `BashTool`; the tool does not run concurrently with another bash tool call in the same session.\n- `command` and `env` URL expansions shell-escape replacements; `cwd` expansion uses `noEscape: true` because it becomes a filesystem path argument, not shell text.\n- `checkBashInterception()` blocks only when the matching rule's `tool` name is present in `ctx.toolNames`; missing tools disable their corresponding rule.\n- Default interceptor rules come from `DEFAULT_BASH_INTERCEPTOR_RULES` in `packages/coding-agent/src/config/settings-schema.ts`:\n - `cat|head|tail|less|more` -> `read`\n - `grep|rg|ripgrep|ag|ack` -> `search`\n - `find|fd|locate` with name/type/glob flags -> `find`\n - `sed -i`, `perl -i`, `awk -i inplace` -> `edit`\n - `echo|printf|cat <<` with redirection -> `write`\n- PTY mode is ignored in non-UI contexts and when `PI_NO_PTY=1`; the tool silently falls back to non-PTY execution.\n- Non-PTY runs merge `NON_INTERACTIVE_ENV` with `env`; PTY runs also prepend `NON_INTERACTIVE_ENV` before custom env values.\n- When the shell minimizer rewrites output inside `executeBash()`, the visible output is replaced with minimized text and a `[raw output: artifact://<id>]` footer may be appended if `onMinimizedSave` persisted the original text.\n- The TUI renderer parses partial JSON to recover `env` assignments early in streaming previews; that behavior is display-only.\n- For executor internals that are not tool-specific — shell session reuse keys, snapshots, prefix handling, and native timeout behavior — see `docs/bash-tool-runtime.md`.\n",
|
|
71
71
|
"tools/browser.md": "# browser\n\n> Open, reuse, close, and script Puppeteer tabs against headless Chromium or CDP-attached apps.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/browser.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/browser.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/browser/tab-supervisor.ts` — global tab registry; worker lifecycle; run/close coordination.\n - `packages/coding-agent/src/tools/browser/tab-worker.ts` — executes `run` code; implements the `tab` helper API.\n - `packages/coding-agent/src/tools/browser/tab-worker-entry.ts` — worker-thread transport bootstrap.\n - `packages/coding-agent/src/tools/browser/registry.ts` — browser-handle registry keyed by browser kind.\n - `packages/coding-agent/src/tools/browser/launch.ts` — Puppeteer loading, Chromium resolution/download, headless launch, stealth injection.\n - `packages/coding-agent/src/tools/browser/attach.ts` — CDP attach/reuse, target picking, spawned-app process handling.\n - `packages/coding-agent/src/tools/browser/tab-protocol.ts` — worker init/run/result message schema.\n - `packages/coding-agent/src/tools/browser/readable.ts` — `tab.extract()` readability extraction.\n - `packages/coding-agent/src/tools/browser/render.ts` — TUI rendering for `open`/`close` status lines and `run` JS cells.\n - `packages/coding-agent/src/tools/puppeteer/00_stealth_tampering.txt` — mask patched functions/descriptors as native.\n - `packages/coding-agent/src/tools/puppeteer/01_stealth_activity.txt` — synthesize visibility/focus/scroll activity.\n - `packages/coding-agent/src/tools/puppeteer/02_stealth_hairline.txt` — fix Modernizr hairline detection.\n - `packages/coding-agent/src/tools/puppeteer/03_stealth_botd.txt` — spoof `navigator.webdriver`, `window.chrome`, and Chrome fingerprint surfaces.\n - `packages/coding-agent/src/tools/puppeteer/04_stealth_iframe.txt` — patch iframe `contentWindow`/`srcdoc` behavior.\n - `packages/coding-agent/src/tools/puppeteer/05_stealth_webgl.txt` — spoof WebGL vendor/renderer/precision.\n - `packages/coding-agent/src/tools/puppeteer/06_stealth_screen.txt` — normalize screen/viewport/device-pixel-ratio values.\n - `packages/coding-agent/src/tools/puppeteer/07_stealth_fonts.txt` — spoof local fonts and perturb canvas text rendering.\n - `packages/coding-agent/src/tools/puppeteer/08_stealth_audio.txt` — spoof audio latency/sample-rate and perturb offline rendering.\n - `packages/coding-agent/src/tools/puppeteer/09_stealth_locale.txt` — force locale/languages/timezone/date strings.\n - `packages/coding-agent/src/tools/puppeteer/10_stealth_plugins.txt` — synthesize `navigator.plugins`/`navigator.mimeTypes`.\n - `packages/coding-agent/src/tools/puppeteer/11_stealth_hardware.txt` — spoof `navigator.hardwareConcurrency`.\n - `packages/coding-agent/src/tools/puppeteer/12_stealth_codecs.txt` — spoof media codec support.\n - `packages/coding-agent/src/tools/puppeteer/13_stealth_worker.txt` — carry UA/platform spoofing into `Worker`/`SharedWorker`.\n\n## Inputs\n\n### Shared fields\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `action` | `\"open\" \\| \"close\" \\| \"run\"` | Yes | Dispatches to the open/close/run path. |\n| `name` | `string` | No | Tab id. Defaults to `\"main\"`. Tabs live in a process-global map, so the same name is reused across later calls and in-process subagents until closed. |\n| `timeout` | `number` | No | Tool wall-clock timeout in seconds. Defaults to `30`; clamped to the browser tool range before execution. |\n\n### `action: \"open\"`\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `url` | `string` | No | Navigate after the tab is ready. Existing reusable tabs also navigate when `url` is supplied. |\n| `viewport` | `{ width: number; height: number; scale?: number }` | No | Requested viewport. For headless launch this becomes the initial viewport; for a page it is applied with `page.setViewport()`. `scale` maps to Puppeteer `deviceScaleFactor`. |\n| `wait_until` | `\"load\" \\| \"domcontentloaded\" \\| \"networkidle0\" \\| \"networkidle2\"` | No | Navigation wait condition. Defaults to `\"networkidle2\"` where omitted. |\n| `dialogs` | `\"accept\" \\| \"dismiss\"` | No | Installs a page `dialog` handler that auto-accepts or auto-dismisses dialogs. Omitted means no handler. |\n| `app` | `{ path?: string; cdp_url?: string; args?: string[]; target?: string }` | No | Selects browser kind. No `app` uses the session `browser.headless` setting. `app.path` is resolved against the session cwd and used as the executable path for spawn/attach reuse. `app.cdp_url` connects to an existing CDP endpoint. `args` are appended only when spawning `app.path`. `target` is only used for attached/spawned-app page selection. |\n\n### `action: \"close\"`\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `all` | `boolean` | No | Close every known tab. Omitted closes only `name`. |\n| `kill` | `boolean` | No | When a tab release drops a spawned-app browser handle to refcount 0, also terminate its process tree. Has no effect on headless shutdown and only disconnects connected CDP browsers. |\n\n### `action: \"run\"`\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `code` | `string` | Yes | Async-function body executed in a VM context with `page`, `browser`, `tab`, `display`, `assert`, `wait`, `console`, timers, `URL`, `TextEncoder`, `TextDecoder`, and `Buffer` in scope. |\n\n## Outputs\nThe tool returns one result per call; no streaming partial output is emitted from the browser implementation itself.\n\n- `open`: text content with `Opened` or `Reused`, browser description, URL, and optional title. `details` includes `action`, `name`, `browser`, `url`, `viewport`, and the same text in `details.result`.\n- `close`: text content with either `Closed ...` or `No tab named ...`. `details` includes `action`, `name`, and `details.result`.\n- `run`: ordered `content` array built as:\n 1. every `display(value)` call in execution order,\n 2. final return value, JSON-stringified unless already a string,\n 3. or `Ran code on tab \"...\"` if nothing else was produced.\n- `display(value)` coercion in `packages/coding-agent/src/tools/browser/tab-worker.ts`:\n - `{ type: \"image\", data: string, mimeType: string }` becomes image content,\n - `string` becomes text content,\n - other values become pretty JSON text when serializable, else `String(value)`.\n- `tab.screenshot()` also appends text plus an image content item unless `silent: true`; `details.screenshots` records persisted screenshot metadata `{ dest, mimeType, bytes, width, height }`.\n- `run` `details` includes `action`, `name`, current `browser`/`url` when the tab exists, optional `screenshots`, and `details.result` containing only the concatenated text outputs.\n\n## Flow\n1. `BrowserTool.execute()` (`packages/coding-agent/src/tools/browser.ts`) abort-checks, clamps `timeout` via `clampTimeout(\"browser\", ...)`, defaults `name` to `\"main\"`, and dispatches on `action`.\n2. `open` resolves browser kind with `resolveBrowserKind()`:\n - `app.cdp_url` → `{ kind: \"connected\" }` after trimming trailing slashes.\n - `app.path` → `{ kind: \"spawned\" }` after resolving against session cwd.\n - otherwise → `{ kind: \"headless\", headless: session.settings.get(\"browser.headless\") }`.\n3. `open` rejects reusing the same tab name across different browser kinds (`sameBrowserKind()`); callers must close first.\n4. `open` acquires a browser handle through `acquireBrowser()` (`packages/coding-agent/src/tools/browser/registry.ts`):\n - existing connected handle is reused by browser-kind key;\n - stale disconnected handles are disposed and recreated;\n - headless launches via `launchHeadlessBrowser()`;\n - `connected` waits for `${cdpUrl}/json/version`, then `puppeteer.connect()`;\n - `spawned` first tries `findReusableCdp()`, else kills same-path processes, allocates a free loopback port, spawns the executable with `--remote-debugging-port=<port>`, waits for CDP, then connects.\n5. `open` acquires a tab through `acquireTab()` (`packages/coding-agent/src/tools/browser/tab-supervisor.ts`):\n - same-name + same-browser + alive tab is reused unless `dialogs` changed;\n - same-name but different browser handle, dead state, or changed dialog policy forces release and recreation;\n - reusing with a new `url` navigates by issuing `await tab.goto(...)` through the worker.\n6. New tabs build a `WorkerInitPayload` in `buildInitPayload()`:\n - headless mode sends `url`, `waitUntil`, `viewport`, `dialogs`, and timeout;\n - attach mode resolves a page with `pickElectronTarget()`, gets its target id, and sends `targetId` plus `dialogs`.\n7. `acquireTab()` spawns a dedicated Bun `Worker` from `tab-worker-entry.ts`; if that fails it falls back to inline execution in the main thread (`spawnInlineWorker()`), preserving behavior but losing protection against synchronous infinite loops.\n8. `WorkerCore.#init()` (`packages/coding-agent/src/tools/browser/tab-worker.ts`) connects back to the browser websocket endpoint. Headless mode opens a new page, applies stealth patches, applies viewport, installs dialog handling if requested, and optionally navigates. Attach mode resolves the requested target page and optionally installs dialog handling.\n9. On success the worker sends `ready` with `{ url, title, viewport, targetId }`; the supervisor stores a `TabSession`, increments browser-handle refcount with `holdBrowser()`, and keeps the tab in a process-global `Map<string, TabSession>`.\n10. `run` requires non-empty `code`, looks up the tab with `getTab()`, then delegates to `runInTab()`.\n11. `runInTabWithSnapshot()` rejects dead tabs and concurrent runs (`Tab ... is busy`), captures session cwd plus optional `browser.screenshotDir`, registers an abort hook, sends a `run` message to the worker, and races the result against `timeoutMs + 750` ms. Timeouts force-kill the tab worker and, for headless tabs, close the orphaned page target.\n12. `WorkerCore.#run()` creates a VM context, exposes the raw Puppeteer `page`/`browser` plus a synthetic `tab` API, and executes `(async () => { ...code... })()` via `vm.runInContext()`.\n13. The `tab` helper API implemented in `#createTabApi()` is:\n - `tab.name: string`\n - `tab.page: Page`\n - `tab.signal?: AbortSignal`\n - `tab.url(): string`\n - `tab.title(): Promise<string>`\n - `tab.goto(url, { waitUntil? })`\n - `tab.observe({ includeAll?, viewportOnly? })`\n - `tab.screenshot({ selector?, fullPage?, save?, silent? })`\n - `tab.extract(format = \"markdown\")`\n - `tab.click(selector)`\n - `tab.type(selector, text)`\n - `tab.fill(selector, value)`\n - `tab.press(key, { selector? })`\n - `tab.scroll(deltaX, deltaY)`\n - `tab.drag(from, to)`\n - `tab.waitFor(selector)`\n - `tab.evaluate(fn, ...args)`\n - `tab.scrollIntoView(selector)`\n - `tab.select(selector, ...values)`\n - `tab.uploadFile(selector, ...filePaths)`\n - `tab.waitForUrl(pattern, { timeout? })`\n - `tab.waitForResponse(pattern, { timeout? })`\n - `tab.id(n)`\n14. Selector handling in `normalizeSelector()` accepts plain CSS and Puppeteer query handlers, and rewrites legacy Playwright-style prefixes `p-text/`, `p-xpath/`, `p-pierce/`, `p-aria/`; other `p-*` prefixes throw a `ToolError`.\n15. `tab.observe()` clears the element cache, takes a Puppeteer accessibility snapshot, filters to interactive nodes unless `includeAll`, optionally filters to viewport-visible nodes, assigns numeric ids, caches `ElementHandle`s, and returns URL/title/viewport/scroll metadata plus `elements`.\n16. `tab.id(n)` resolves the cached `ElementHandle`, verifies `el.isConnected`, and throws a stale-id error after cache invalidation if the DOM changed or the cache was cleared.\n17. `tab.goto()` clears the cached element ids before navigating. Any new `tab.observe()` also clears and rebuilds the cache.\n18. `tab.click()` uses a custom retry loop for `text/...` selectors to find an actionable visible match; other selectors use `page.locator(...).click()` with the run timeout.\n19. `tab.screenshot()` captures either the whole page or a selector PNG, downsizes a copy for model output, chooses a persistence path, writes the image to disk, records metadata, and optionally emits text + image display entries.\n20. `display()` calls accumulate in an array. After code finishes, the worker posts `{ displays, returnValue, screenshots }`; `BrowserTool.#run()` appends the return value as trailing text content when not `undefined`.\n21. `close` releases one tab or all tabs via `releaseTab()` / `releaseAllTabs()`. Each tab aborts pending runs, asks the worker to close, waits up to `750` ms for a `closed` ack, terminates the worker, decrements browser refcount, and disposes the browser handle when refcount reaches zero.\n\n## Modes / Variants\n- **Action dispatch**\n - `open` — acquire/reuse browser + tab.\n - `close` — release one tab or all tabs.\n - `run` — execute JS inside the tab worker.\n- **Browser kind**\n - **Headless**: launches local Chromium with Puppeteer, applies stealth patches, and creates a fresh page per tab.\n - **Spawned app (`app.path`)**: reuses an existing CDP-enabled process for that executable when possible; otherwise kills same-path processes, spawns the executable with remote debugging enabled, then attaches. No stealth patches are injected.\n - **Connected browser (`app.cdp_url`)**: attaches to an already-running CDP endpoint. No process ownership; close only disconnects.\n- **Target selection for attached/spawned browsers**\n - With `app.target`, `pickElectronTarget()` returns the first page whose URL or title contains the case-insensitive substring.\n - Without `app.target`, it skips titles/URLs matching `request handler|devtools|background page|background host|service worker` and otherwise falls back to the first page.\n- **Worker mode**\n - **Dedicated worker**: normal path; user code runs off the main thread and can be aborted even when it blocks synchronously.\n - **Inline fallback**: activated when Bun worker spawn fails; behavior matches, but synchronous infinite loops on user code cannot be interrupted.\n- **Dialog policy**\n - No `dialogs` field: no auto-handler.\n - `accept`/`dismiss`: page `dialog` events are handled automatically.\n - Changing dialog policy on an existing live tab forces tab recreation instead of mutating the worker in place.\n- **Screenshot persistence**\n - `save` provided: persist full-resolution PNG at the resolved cwd-relative or absolute path.\n - `browser.screenshotDir` session setting set: persist full-resolution PNG under that directory with a timestamped filename.\n - Neither set: persist the resized image to a temp-file path under the OS temp dir.\n\n## Side Effects\n- Filesystem\n - `loadPuppeteer()` writes `{}` to `<puppeteer-safe-dir>/package.json` before importing `puppeteer-core`.\n - First headless launch may download Chromium into the Puppeteer cache directory returned by `getPuppeteerDir()`.\n - `tab.screenshot()` creates parent directories and writes image files.\n - `tab.uploadFile()` resolves supplied paths against the session cwd.\n- Network\n - CDP attach paths poll `http://127.0.0.1:<port>/json/version` or the supplied `cdp_url` `/json/version`.\n - Headless/browser-attach sessions create CDP websocket connections.\n - Headless first-use Chromium download uses `@puppeteer/browsers`.\n - User `page` / `tab` operations perform normal browser network traffic.\n- Subprocesses / native bindings\n - Headless mode launches Chromium through Puppeteer.\n - `app.path` mode may spawn the target executable via `Bun.spawn()`.\n - `killExistingByPath()` / `gracefulKillTreeOnce()` use `@oh-my-pi/pi-natives` process inspection/termination.\n - Worker mode uses Bun `Worker`; fallback mode does not.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Browser handles are cached in a process-global `Map` keyed by browser kind in `packages/coding-agent/src/tools/browser/registry.ts`.\n - Tabs are cached in a process-global `Map` keyed by `name` in `packages/coding-agent/src/tools/browser/tab-supervisor.ts`.\n - `run` captures session cwd and optional `browser.screenshotDir` for screenshot/save path resolution.\n - `restartForModeChange()` drops only headless tabs.\n- User-visible prompts / interactive UI\n - None beyond normal tool output. Dialog auto-handling is invisible unless it fails and emits debug logs.\n- Background work / cancellation\n - `open`, `run`, CDP waits, and browser actions thread through abort signals.\n - A timed-out `run` aborts the worker execution path and can tear down the tab.\n\n## Limits & Caps\n- Tool timeout clamp: default `30` s, min `1` s, max `30` s (`TOOL_TIMEOUTS.browser` in `packages/coding-agent/src/tools/tool-timeouts.ts`).\n- Supervisor grace period around init/run/close: `750` ms (`GRACE_MS` in `packages/coding-agent/src/tools/browser/tab-supervisor.ts`).\n- Puppeteer protocol timeout for launch/connect operations: `60_000` ms (`BROWSER_PROTOCOL_TIMEOUT_MS` in `packages/coding-agent/src/tools/browser/launch.ts`).\n- Connected-browser CDP readiness wait: `5_000` ms before `puppeteer.connect()` (`packages/coding-agent/src/tools/browser/registry.ts`).\n- Spawned-app CDP readiness wait after spawn: `30_000` ms (`packages/coding-agent/src/tools/browser/registry.ts`).\n- CDP polling cadence: 150 ms in `waitForCdp()` (`packages/coding-agent/src/tools/browser/attach.ts`).\n- Headless default viewport: `1365x768` at `deviceScaleFactor: 1.25` (`DEFAULT_VIEWPORT` in `packages/coding-agent/src/tools/browser/launch.ts`).\n- Screenshot model-attachment resize cap: `maxWidth 1024`, `maxHeight 1024`, `maxBytes 150 * 1024`, `jpegQuality 70` (`packages/coding-agent/src/tools/browser/tab-worker.ts`).\n- `tab.waitForUrl()` polling interval: `200` ms (`packages/coding-agent/src/tools/browser/tab-worker.ts`).\n- Drag simulation uses `12` mouse-move steps (`packages/coding-agent/src/tools/browser/tab-worker.ts`).\n\n## Errors\n- `BrowserTool.execute()` converts DOM-style `AbortError` into `ToolAbortError`; other errors propagate.\n- `run` hard-fails on missing code: `Missing required parameter 'code' for action 'run'.`\n- `open` fails when reusing a name across browser kinds: `Tab \"...\" is bound to a different browser (...). Close it first.`\n- `runInTabWithSnapshot()` fails when the tab is absent/dead (`Tab \"...\" is not alive. Reopen it.`) or already running (`Tab \"...\" is busy`).\n- Worker init failures and run failures are serialized through `RunErrorPayload`; `ToolError` and abort state are reconstructed on the host side by `errorFromPayload()`.\n- Attached-target mismatches surface as:\n - `No page targets available on the attached browser`\n - `No page target matched \"...\". Available pages:\\n...`\n - `Target ... is no longer available on the attached browser`\n- Spawned-app path validation requires an absolute executable path, not an app bundle path.\n- Spawn/attach failures are wrapped into `ToolError`s such as `Timed out waiting for CDP endpoint ...`, `Failed to attach to ...`, or `Connected to ... but puppeteer.connect failed: ...`.\n- `tab` helper errors are user-visible `ToolError`s, including unsupported selector prefix, stale/unknown element id, invalid drag target, missing upload files, non-`<select>` for `tab.select()`, non-file-input for `tab.uploadFile()`, and screenshot selector misses.\n- On run timeout, the worker reports `Browser code execution timed out after <ms>ms`; the supervisor may escalate to `Browser code execution hung past grace; tab killed` if the worker does not respond after the grace window.\n\n## Notes\n- `loadPuppeteer()` and `loadPuppeteerInWorker()` temporarily redirect `cwd` to a safe Puppeteer directory before importing `puppeteer-core`, because Puppeteer probes the current working directory during module load.\n- Headless launch prefers a detected system Chrome/Chromium, then `PUPPETEER_EXECUTABLE_PATH`, and only then downloads Chromium.\n- Headless launch always passes `--no-sandbox`, `--disable-setuid-sandbox`, `--disable-blink-features=AutomationControlled`, and a `--window-size=...` matching the initial viewport. It also ignores Puppeteer default args `--disable-extensions`, `--disable-default-apps`, and `--disable-component-extensions-with-background-pages`.\n- Proxy-related env vars only affect headless launch: `PUPPETEER_PROXY`, `PUPPETEER_PROXY_BYPASS_LOOPBACK`, and `PUPPETEER_PROXY_IGNORE_CERT_ERRORS`.\n- Stealth patches are applied only in headless mode. Spawned or externally connected browsers are intentionally left untouched.\n- `applyStealthPatches()` also strips Puppeteer's `//# sourceURL=__puppeteer_evaluation_script__` suffix from CDP `Runtime.evaluate` / `Runtime.callFunctionOn` payloads.\n- `tab.extract()` reads `page.content()`, runs Readability first, then falls back to `main article`/`article`/`main`/`[role='main']`/`body`, and returns `null` if neither extraction path yields content.\n- `close(all: true, kill: false)` disconnects from spawned/connected browsers when the last tab closes but leaves spawned app processes running.\n- Headless orphan cleanup is best-effort: if a worker dies before closing its page, the supervisor searches browser targets by `targetId` and closes that page.\n- Console methods inside `run` do not appear in tool output; they are forwarded as debug/warn/error logs through the worker transport.",
|
|
72
|
-
"tools/calc.md": "# calc\n\n> Evaluates one or more arithmetic expressions and returns formatted numeric results.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/calculator.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/calculator.md`\n- Key collaborators:\n - `packages/coding-agent/src/tui.ts` — status lines and tree-list rendering\n - `packages/coding-agent/src/tools/render-utils.ts` — preview limits and formatting helpers\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `calculations` | `Calculation[]` | Yes | Batch of expressions to evaluate in order. |\n\n### `Calculation`\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `expression` | `string` | Yes | Arithmetic expression string. |\n| `prefix` | `string` | Yes | Prepended verbatim to the rendered numeric result. |\n| `suffix` | `string` | Yes | Appended verbatim to the rendered numeric result. |\n\n## Outputs\n- Single-shot result.\n- `content[0].text` is the newline-joined `prefix + value + suffix` string for each calculation.\n- `details.results` is an array of `{ expression, value, output }`.\n- On renderer fallback, if `details` is missing but `content[0].text` exists, the TUI tries to pair each output line with the original expressions from call args.\n\n## Flow\n1. `execute()` wraps evaluation in `untilAborted(...)`.\n2. For each entry, `evaluateExpression(...)` tokenizes the expression, parses it with a recursive-descent parser, rejects non-finite outputs, and normalizes `-0` to `0`.\n3. `tokenizeExpression(...)` accepts whitespace, parentheses, operators, and number literals; any other character throws immediately.\n4. `ExpressionParser` applies precedence in this order: `+ -`, `* / %`, unary `+ -`, exponentiation `**`, parentheses/literals.\n5. Exponentiation is right-associative (`2 ** 3 ** 2` parses as `2 ** (3 ** 2)`).\n6. Each numeric result is formatted with `String(value)` and wrapped with the provided `prefix` and `suffix`.\n7. The tool returns text output plus structured `details`.\n\n## Side Effects\n- Background work / cancellation\n - Supports abort via `untilAborted(...)`.\n- Session state\n - None.\n- Filesystem / Network / Subprocesses\n - None.\n\n## Limits & Caps\n- Supported operators: `+`, `-`, `*`, `/`, `%`, `**` (`packages/coding-agent/src/tools/calculator.ts`).\n- Supported numeric literals:\n - decimal integers/floats, including leading-dot forms like `.5`\n - scientific notation like `1e10`, `2.5E-3`\n - hexadecimal `0x...`\n - binary `0b...`\n - octal `0o...`\n- Results must be finite; `Infinity` and `NaN` are rejected.\n- The renderer collapses long result lists using `PREVIEW_LIMITS.COLLAPSED_ITEMS` from `packages/coding-agent/src/tools/render-utils.ts`.\n\n## Errors\n- Invalid characters: e.g. `Invalid character \"x\" in expression`.\n- Malformed numbers: invalid prefixed literal, invalid exponent, invalid number.\n- Syntax errors: `Unexpected token in expression`, `Unexpected end of expression`, `Missing closing parenthesis`, `Expression is empty`.\n- Non-finite arithmetic: `Expression result is not a finite number`.\n- Any evaluation error aborts the whole batch; the tool does not return partial successes.\n\n## Notes\n- Despite the schema example showing `sqrt(16)`, the parser does not support functions, identifiers, units, or constants; only numeric literals, operators, and parentheses are accepted.\n- Precision is plain JavaScript `number` semantics throughout, including floating-point rounding behavior.\n- `/` and `%` use JavaScript numeric operators directly; there is no integer-only mode or unit handling.\n- Unary operators bind tighter than `*`/`/`/`%` but looser than exponentiation because unary parsing delegates to `#parsePower()`.\n",
|
|
73
72
|
"tools/checkpoint.md": "# checkpoint\n\n> Mark the current top-level conversation state so later `rewind` can collapse exploratory context into a report.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/checkpoint.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/checkpoint.md`\n- Key collaborators:\n - `packages/coding-agent/src/session/agent-session.ts` — captures the active checkpoint after tool success.\n - `packages/coding-agent/src/session/session-manager.ts` — persists the normal session entry stream; not the active checkpoint marker.\n - `packages/coding-agent/src/tools/index.ts` — registers the tool and gates it behind `checkpoint.enabled`.\n - `packages/coding-agent/src/config/settings-schema.ts` — defines the disabled-by-default feature flag.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `goal` | `string` | Yes | Investigation goal. Required by the schema and echoed in the tool result. |\n\n## Outputs\nThe tool returns a single text result plus structured details:\n\n- text body:\n - `Checkpoint created.`\n - `Goal: <goal>`\n - `Run your investigation, then call rewind with a concise report.`\n- `details`:\n - `goal: string`\n - `startedAt: string` — ISO timestamp created inside `CheckpointTool.execute()`\n\nNo checkpoint ID, artifact URI, job handle, file path, or restore token is returned.\n\n## Flow\n1. `CheckpointTool.createIf()` in `packages/coding-agent/src/tools/checkpoint.ts` returns `null` for subagents by checking `session.taskDepth`; only top-level sessions can see the tool.\n2. `CheckpointTool.execute()` rejects subagent calls again with `ToolError(\"Checkpoint not available in subagents.\")`.\n3. It rejects nested checkpoints with `ToolError(\"Checkpoint already active.\")` when `session.getCheckpointState?.()` is already set.\n4. It creates `startedAt = new Date().toISOString()` and returns a normal `toolResult()` payload. The tool itself does not persist anything.\n5. On the later `tool_execution_end` event, `AgentSession` in `packages/coding-agent/src/session/agent-session.ts` detects successful `checkpoint` execution and captures three in-memory fields:\n - `checkpointMessageCount` — current `agent.state.messages.length`, after the checkpoint tool result has already been appended\n - `checkpointEntryId` — `sessionManager.getEntries().at(-1)?.id ?? null`, i.e. the last persisted session entry ID at checkpoint time\n - `startedAt` — copied from tool details or regenerated\n6. `AgentSession` stores that object in its private `#checkpointState` field and clears `#pendingRewindReport`.\n\n## Side Effects\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Sets `AgentSession.#checkpointState` in memory.\n - Records the checkpoint boundary as a message count plus a session entry ID.\n - Enables the later yield guard: if a checkpoint is active and no rewind report is pending, `#enforceRewindBeforeYield()` injects a developer-role warning and schedules another turn.\n- User-visible prompts / interactive UI\n - The tool result tells the model to call `rewind` after the investigation.\n - If the agent tries to `yield` first, `AgentSession` injects:\n\n```text\n<system-warning>\nYou are in an active checkpoint. You MUST call rewind with your investigation findings before yielding. Do NOT yield without completing the checkpoint.\n</system-warning>\n```\n\n## Limits & Caps\n- Availability is gated by `checkpoint.enabled`, default `false`, in `packages/coding-agent/src/config/settings-schema.ts`.\n- The tool is registered as discoverable in `packages/coding-agent/src/tools/index.ts`.\n- Only one active checkpoint is allowed per top-level session.\n- Checkpoint state is not persisted as a dedicated session entry. If the process exits, a resumed session can reload the conversation history, but not the live `#checkpointState` guard.\n- Session persistence still applies to the ordinary checkpoint tool call message. Global session persistence truncation is `MAX_PERSIST_CHARS = 500_000` in `packages/coding-agent/src/session/session-manager.ts`.\n\n## Errors\n- `ToolError(\"Checkpoint not available in subagents.\")` — thrown for subagent sessions.\n- `ToolError(\"Checkpoint already active.\")` — thrown when a prior checkpoint has not been rewound or cleared.\n- The tool body has no local `try/catch`; unexpected exceptions propagate.\n\n## Notes\n- Despite the summary string `Create a git-based checkpoint to save and restore session state`, the implementation does not call git and does not snapshot filesystem state.\n- Captured state is conversation/session metadata only:\n - in-memory message count\n - session entry ID in the session tree\n - timestamp\n- Not captured:\n - working tree contents\n - staged changes\n - artifacts\n - blob-store contents\n - SQLite history rows from `packages/coding-agent/src/session/history-storage.ts`\n - auth or agent records from `packages/coding-agent/src/session/agent-storage.ts`\n- If the turn ends with `stopReason === \"aborted\"` while a checkpoint is active, `AgentSession` clears `#checkpointState` and `#pendingRewindReport` instead of preserving a half-finished checkpoint.\n",
|
|
74
73
|
"tools/debug.md": "# debug\n\n> Drive one DAP debug session; adjacent debug UI code reuses the same subsystem for logs, raw SSE capture, reports, profiling, and system diagnostics.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/debug.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/debug.md`\n- Key collaborators:\n - `packages/coding-agent/src/dap/session.ts` — session lifecycle, breakpoint/state cache\n - `packages/coding-agent/src/dap/client.ts` — adapter process/socket transport, DAP message loop\n - `packages/coding-agent/src/dap/config.ts` — adapter resolution and auto-selection\n - `packages/coding-agent/src/dap/defaults.json` — built-in adapter definitions\n - `packages/coding-agent/src/dap/types.ts` — request/response/capability shapes\n - `packages/coding-agent/src/tools/tool-timeouts.ts` — per-tool timeout clamp\n - `packages/coding-agent/src/debug/index.ts` — interactive debug selector menu\n - `packages/coding-agent/src/debug/log-viewer.ts` — recent-log TUI viewer\n - `packages/coding-agent/src/debug/raw-sse.ts` — raw SSE TUI viewer\n - `packages/coding-agent/src/debug/raw-sse-buffer.ts` — bounded SSE capture buffer\n - `packages/coding-agent/src/debug/profiler.ts` — CPU/heap profiling helpers\n - `packages/coding-agent/src/debug/report-bundle.ts` — `.tar.gz` report bundling, log source, cache cleanup\n - `packages/coding-agent/src/debug/system-info.ts` — system snapshot collection and env redaction\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `action` | `\"launch\" \\| \"attach\" \\| \"set_breakpoint\" \\| \"remove_breakpoint\" \\| \"set_instruction_breakpoint\" \\| \"remove_instruction_breakpoint\" \\| \"data_breakpoint_info\" \\| \"set_data_breakpoint\" \\| \"remove_data_breakpoint\" \\| \"continue\" \\| \"step_over\" \\| \"step_in\" \\| \"step_out\" \\| \"pause\" \\| \"evaluate\" \\| \"stack_trace\" \\| \"threads\" \\| \"scopes\" \\| \"variables\" \\| \"disassemble\" \\| \"read_memory\" \\| \"write_memory\" \\| \"modules\" \\| \"loaded_sources\" \\| \"custom_request\" \\| \"output\" \\| \"terminate\" \\| \"sessions\"` | Yes | Dispatch key for the tool switch in `packages/coding-agent/src/tools/debug.ts`. |\n| `program` | `string` | No | Launch target path. Required for `launch`. Resolved relative to `cwd` if provided, otherwise session cwd. |\n| `args` | `string[]` | No | Program argv for `launch`. |\n| `adapter` | `string` | No | Explicit adapter name. Otherwise `selectLaunchAdapter()` / `selectAttachAdapter()` auto-pick from `packages/coding-agent/src/dap/config.ts`. |\n| `cwd` | `string` | No | Launch/attach working directory. Defaults to session cwd. |\n| `file` | `string` | No | Source file path for source breakpoints. |\n| `line` | `number` | No | Source line for source breakpoints. |\n| `function` | `string` | No | Function breakpoint name. Mutually exclusive with `file`+`line` in breakpoint actions. |\n| `name` | `string` | No | Data breakpoint info target name. Required for `data_breakpoint_info`. |\n| `condition` | `string` | No | Conditional expression for source/function/instruction/data breakpoints. |\n| `hit_condition` | `string` | No | Hit-count condition for instruction/data breakpoints. |\n| `expression` | `string` | No | Expression or raw debugger command. Required for `evaluate`. |\n| `context` | `string` | No | Evaluate context. Defaults to `\"repl\"`. Passed through as DAP evaluate context. |\n| `frame_id` | `number` | No | Frame selector for `evaluate`, `scopes`, `data_breakpoint_info`. `scopes` and `evaluate` default to the current stopped frame when omitted. |\n| `scope_id` | `number` | No | Variables reference from a scope. Accepted by `variables`; also used as a fallback variables reference for `data_breakpoint_info`. |\n| `variable_ref` | `number` | No | Variables reference for `variables`; preferred over `scope_id` when both are present. |\n| `pid` | `number` | No | Local process id for `attach`. `attach` requires `pid` or `port`. |\n| `port` | `number` | No | Remote attach port. If no adapter is forced, attach prefers `debugpy` when `port` is present. |\n| `host` | `string` | No | Remote attach host for `attach`. |\n| `levels` | `number` | No | Max stack frames for `stack_trace`. |\n| `memory_reference` | `string` | No | Memory reference/address for `disassemble`, `read_memory`, `write_memory`. `disassemble` also accepts it via `instruction_reference` fallback logic in `resolveDisassemblyReference()`. |\n| `instruction_reference` | `string` | No | Instruction breakpoint reference; required for instruction breakpoint actions. |\n| `instruction_count` | `number` | No | Required for `disassemble`. |\n| `instruction_offset` | `number` | No | Instruction offset for `disassemble`. |\n| `count` | `number` | No | Byte count for `read_memory`. Required there. |\n| `data` | `string` | No | Base64 payload for `write_memory`. Required there. |\n| `data_id` | `string` | No | Data breakpoint id. Required for `set_data_breakpoint` / `remove_data_breakpoint`. |\n| `access_type` | `\"read\" \\| \"write\" \\| \"readWrite\"` | No | Access filter for `set_data_breakpoint`. |\n| `command` | `string` | No | Custom DAP request command. Required for `custom_request`. |\n| `arguments` | `Record<string, unknown>` | No | Custom DAP request body for `custom_request`. |\n| `offset` | `number` | No | Offset for instruction breakpoints, disassembly, memory read, memory write. |\n| `resolve_symbols` | `boolean` | No | `disassemble` symbol-resolution flag. |\n| `allow_partial` | `boolean` | No | `write_memory` partial-write allowance. |\n| `start_module` | `number` | No | Modules pagination start index for `modules`. |\n| `module_count` | `number` | No | Modules pagination count for `modules`. |\n| `timeout` | `number` | No | Per-request timeout in seconds. Default `30`, clamped to `5..300`. |\n\n### Action-specific requirements\n- `launch`: `program`\n- `attach`: `pid` or `port`\n- `set_breakpoint` / `remove_breakpoint`: `function`, or `file` + `line`\n- `set_instruction_breakpoint` / `remove_instruction_breakpoint`: `instruction_reference`\n- `data_breakpoint_info`: `name`\n- `set_data_breakpoint` / `remove_data_breakpoint`: `data_id`\n- `evaluate`: `expression`\n- `variables`: `variable_ref` or `scope_id`\n- `disassemble`: capability `supportsDisassembleRequest`, plus `instruction_count`\n- `read_memory`: capability `supportsReadMemoryRequest`, plus `memory_reference` and `count`\n- `write_memory`: capability `supportsWriteMemoryRequest`, plus `memory_reference` and `data`\n- `modules`: capability `supportsModulesRequest`\n- `loaded_sources`: capability `supportsLoadedSourcesRequest`\n- `custom_request`: `command`\n\n### Interactive selector values\n`packages/coding-agent/src/debug/index.ts` also exposes a fixed UI-only selector with values `open-artifacts`, `performance`, `work`, `dump`, `memory`, `logs`, `system`, `raw-sse`, `transcript`, `clear-cache`. These are not model-callable through `debugSchema`; they are local TUI menu routes.\n\n## Outputs\nThe agent tool returns a standard `toolResult()` payload from `packages/coding-agent/src/tools/debug.ts`:\n- `content`: one text block. Every action renders human-readable text; there is no structured JSON block in `content`.\n- `details.action`: echoed action.\n- `details.success`: always initialized `true`; failures surface by throwing before a result is returned.\n- `details.snapshot`: present for actions that operate on or create a session, using `DapSessionSummary` from `packages/coding-agent/src/dap/types.ts`.\n- Action-specific `details` fields:\n - `launch` / `attach`: `adapter`\n - breakpoint actions: `breakpoints`, `functionBreakpoints`, `instructionBreakpoints`, `dataBreakpoints`\n - `data_breakpoint_info`: `dataBreakpointInfo`\n - `continue` / `step_*`: `state`, `timedOut`\n - `threads`: `threads`\n - `stack_trace`: `stackFrames`\n - `scopes`: `scopes`\n - `variables`: `variables`\n - `evaluate`: `evaluation`\n - `disassemble`: `disassembly`\n - `read_memory`: `memoryAddress`, `memoryData`, `unreadableBytes`\n - `write_memory`: `bytesWritten`\n - `modules`: `modules`\n - `loaded_sources`: `sources`\n - `custom_request`: `customBody`\n - `output`: `output`\n - `sessions`: `sessions`\n\nStreaming/UI behavior:\n- The tool renderer merges call and result (`mergeCallAndResult: true`) and renders inline.\n- `debug.ts` itself does not emit progress updates through `_onUpdate`; result delivery is single-shot.\n- The interactive selector is UI-driven instead of model-driven. It swaps TUI components, appends status lines to the chat pane, opens files in external viewers, or writes archives/temp files.\n\nSide-channel artifacts outside the model tool result:\n- `createReportBundle()` writes `omp-report-<timestamp>.tar.gz` under the reports dir and returns the filesystem path to the UI handler.\n- `#handleWorkReport()` writes `/tmp/work-profile-<Date.now()>.svg` before opening it.\n- `RawSseViewerComponent` and `DebugLogViewerComponent` can copy captured text to the clipboard.\n\n## Flow\n1. Tool registration is conditional: `DebugTool.createIf()` in `packages/coding-agent/src/tools/debug.ts` returns `null` unless `session.settings.get(\"debug.enabled\")` is true. `packages/coding-agent/src/tools/index.ts` wires the factory and rechecks the same setting in tool filtering.\n2. `DebugTool.execute()` clamps `params.timeout` through `clampTimeout(\"debug\", params.timeout)` and composes the caller `AbortSignal` with `AbortSignal.timeout(...)`.\n3. `launch` and `attach` resolve cwd/program paths, select an adapter in `packages/coding-agent/src/dap/config.ts`, then delegate to `dapSessionManager.launch()` / `.attach()`.\n4. `DapSessionManager.launch()` / `.attach()` enforce the single-session rule with `#ensureLaunchSlot()`, spawn the adapter through `DapClient.spawn()`, register listeners, send `initialize`, cache capabilities, start listening for an initial stop event before sending `launch`/`attach`, then complete the `initialized` → `configurationDone` handshake in `#completeConfigurationHandshake()`.\n5. `DapClient.spawn()` starts the adapter detached with `NON_INTERACTIVE_ENV`. Most adapters use stdio; socket-mode adapters (`dlv`) use `#spawnSocketUnix()` on Linux or `#spawnSocketClientAddr()` on macOS/other.\n6. `#registerSession()` in `packages/coding-agent/src/dap/session.ts` installs reverse-request handlers:\n - `runInTerminal`: spawns the requested debuggee command detached via `ptree.spawn()` and returns `{ processId }`\n - `startDebugging`: logs the child-session request and returns `{}`; it does not create nested sessions\n - events: `output`, `initialized`, `stopped`, `continued`, `exited`, `terminated` update cached session state\n7. Operational actions (`set_breakpoint`, `evaluate`, `threads`, `read_memory`, `custom_request`, and similar) call `dapSessionManager` methods. Most flow through `#sendRequestWithConfig()`, which first sends `configurationDone` when required, then sends the DAP request, then updates `lastUsedAt`.\n8. Breakpoint actions maintain local cached breakpoint sets in `DapSessionManager` and remap adapter responses back onto those cached records.\n9. `continue` and the three step actions clear cached stop state, subscribe for `stopped`/`terminated`/`exited` before sending the DAP request, then `#awaitStopOutcome()` either returns the new stopped location or reports that the program is still running after timeout.\n10. `pause` sends DAP `pause`, waits for a stopped event if needed, and reuses cached stop state if the program was already stopped.\n11. `stack_trace`, `scopes`, `variables`, and `evaluate` default to the current stopped thread/frame when the caller omits ids and cached state is available.\n12. `output` reads the in-memory output ring from `DapSessionManager.getOutput()`. `terminate` sends `terminate` when supported, always attempts `disconnect`, marks the session terminated, and disposes the client.\n13. `sessions` reads the manager’s current map and formats all summaries. Although the manager stores a map, only one active session can exist because new launch/attach calls are blocked until the active one is terminated or cleaned up.\n14. The interactive selector in `packages/coding-agent/src/debug/index.ts` builds a `SelectList` of fixed values and dispatches each to a handler:\n - `performance`: `startCpuProfile()`, wait for Enter/Escape, stop profiling, read a 30-second work profile with `getWorkProfile(30)`, then bundle via `createReportBundle()`\n - `work`: read `getWorkProfile(30)`, write a temp SVG, open it externally\n - `dump`: create a report bundle immediately\n - `memory`: force GC, call `Bun.generateHeapSnapshot(\"v8\")`, then bundle\n - `logs`: build a `DebugLogSource` and mount `DebugLogViewerComponent`\n - `raw-sse`: resolve a `RawSseDebugBuffer` from the session and mount `RawSseViewerComponent`\n - `system`: call `collectSystemInfo()` and render `formatSystemInfo()` into the chat pane\n - `open-artifacts`: open the current session artifact directory if it exists\n - `transcript`: delegates to `ctx.handleDebugTranscriptCommand()`\n - `clear-cache`: show confirmation, then remove artifact directories older than 30 days with `clearArtifactCache()`\n\n## Modes / Variants\n- **Availability gate**\n - Tool hidden when `debug.enabled` is false.\n- **Adapter selection**\n - `launch`: explicit `adapter` wins; otherwise `selectLaunchAdapter()` ranks available adapters by extension match, root-marker match, then native-debugger preference (`gdb`, `lldb-dap`) for extensionless binaries.\n - `attach`: explicit `adapter` wins; otherwise remote `port` prefers `debugpy`, then native debuggers, then first available adapter.\n- **Transport**\n - stdio adapters: direct `stdin`/`stdout` framing.\n - socket adapters: Unix domain socket on Linux; TCP callback on macOS/other.\n- **DAP agent-tool actions**\n - `launch` — spawn adapter, initialize session, maybe stop on entry; returns formatted session snapshot and `details.adapter`.\n - `attach` — connect to a live process or remote port; same output shape as `launch`.\n - `set_breakpoint` — source or function breakpoint add/update; returns the current breakpoint list for that target.\n - `remove_breakpoint` — source or function breakpoint removal; returns the remaining breakpoint list.\n - `set_instruction_breakpoint` / `remove_instruction_breakpoint` — require `supportsInstructionBreakpoints`; return current instruction breakpoint list.\n - `data_breakpoint_info` — require `supportsDataBreakpoints`; asks the adapter for a `dataId`, access types, and description for `name`.\n - `set_data_breakpoint` / `remove_data_breakpoint` — require `supportsDataBreakpoints`; return the cached data-breakpoint list.\n - `continue` / `step_over` / `step_in` / `step_out` — return text describing whether execution stopped, terminated, or kept running, plus `details.state` and `details.timedOut`.\n - `pause` — interrupts a running target and returns a stopped snapshot.\n - `evaluate` — adapter expression evaluation; defaults context to `repl`.\n - `stack_trace` — fetches frames for the resolved thread.\n - `threads` — fetches current threads.\n - `scopes` — frame scopes for an explicit `frame_id` or the current stopped frame.\n - `variables` — variables for `variable_ref` or `scope_id`.\n - `disassemble` — require `supportsDisassembleRequest`; disassembles around a memory reference.\n - `read_memory` — require `supportsReadMemoryRequest`; returns address, base64 data, unreadable-byte count.\n - `write_memory` — require `supportsWriteMemoryRequest`; writes base64 data and reports bytes written.\n - `modules` — require `supportsModulesRequest`; optional pagination via `start_module` / `module_count`.\n - `loaded_sources` — require `supportsLoadedSourcesRequest`; returns loaded source descriptors.\n - `custom_request` — sends any DAP request name with arbitrary arguments.\n - `output` — dumps captured stdout/stderr/console text from the session cache.\n - `terminate` — disconnects and disposes the active session; returns `No debug session to terminate.` when none exists.\n - `sessions` — lists all cached session summaries.\n- **Interactive selector routes (UI-only)**\n - `logs` — loads today’s log tail and optional older daily log files into `DebugLogViewerComponent`; supports copy, range selection, pid filtering, load-older.\n - `raw-sse` — live view over the session’s `RawSseDebugBuffer`; supports tail-follow, scrolling, copy-all.\n - `performance` — CPU profile + 30-second work profile + report bundle.\n - `memory` — heap snapshot + report bundle.\n - `dump` — report bundle without profiler artifacts.\n - `work` — standalone work-profile flamegraph export/open.\n - `system` — formatted OS/arch/CPU/memory/version/cwd/shell/terminal dump.\n - `open-artifacts` / `transcript` / `clear-cache` — artifact directory open, transcript export, artifact-cache pruning.\n\n## Side Effects\n- Filesystem\n - Resolves program/file/cwd paths against the session cwd.\n - Report creation writes `.tar.gz` bundles and may read the session JSONL, artifact files, subagent session JSONLs, and log files.\n - Work-profile export writes `/tmp/work-profile-<timestamp>.svg`.\n - Log source reads daily log files from the logs dir.\n - Artifact-cache cleanup removes session artifact directories older than the cutoff.\n - `resolveRawSseDebugBuffer()` may attach a non-enumerable `rawSseDebugBuffer` property to the owner object.\n- Network\n - Socket-mode adapters bind/connect local sockets.\n - Remote attach may connect through the adapter to a remote debug port.\n- Subprocesses / native bindings\n - Spawns debugger adapters (`gdb`, `lldb-dap`, `python -m debugpy.adapter`, `dlv`, and others from `defaults.json`) detached.\n - Reverse DAP `runInTerminal` requests spawn the debuggee detached via `ptree.spawn()`.\n - `getWorkProfile(30)` comes from `@oh-my-pi/pi-natives`.\n - CPU profiling uses `node:inspector/promises`; heap snapshots use `Bun.generateHeapSnapshot(\"v8\")`; raw/log viewers sanitize text via `@oh-my-pi/pi-natives`.\n - `openPath()` launches the OS default file/browser handler for artifact dirs and SVGs.\n - Log/raw-SSE viewers can call `copyToClipboard()`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - `DapSessionManager` keeps session summaries, breakpoints, threads, stack frames, stop location, output capture, capabilities, and last-used timestamps in memory.\n - Active-session id is global to the singleton `dapSessionManager`.\n - `RawSseDebugBuffer` stores recent SSE events per owner/session.\n - The tool is `exclusive`; concurrent debug tool calls are blocked by the scheduler.\n- User-visible prompts / interactive UI\n - Debug selector shows confirmation before cache deletion.\n - Performance profiling temporarily hijacks editor Enter/Escape handlers until profiling stops.\n - Log/raw-SSE viewers replace the editor pane with custom components.\n- Background work / cancellation\n - Every DAP request accepts an `AbortSignal`; timeouts and caller cancellation abort the active request, not the whole session lifetime.\n - `DapSessionManager` runs a background cleanup loop every 30 seconds.\n - Raw SSE viewers subscribe to buffer updates until closed.\n\n## Limits & Caps\n- Tool timeout clamp: `default=30`, `min=5`, `max=300` in `packages/coding-agent/src/tools/tool-timeouts.ts`.\n- Per-request DAP default timeout: `DEFAULT_REQUEST_TIMEOUT_MS = 30_000` in `packages/coding-agent/src/dap/client.ts`.\n- Single active session: enforced by `#ensureLaunchSlot()` in `packages/coding-agent/src/dap/session.ts`.\n- Idle session cleanup: `IDLE_TIMEOUT_MS = 10 * 60 * 1000`, checked every `CLEANUP_INTERVAL_MS = 30 * 1000`.\n- Adapter liveness heartbeat: `HEARTBEAT_INTERVAL_MS = 5 * 1000`.\n- Output capture cap: `MAX_OUTPUT_BYTES = 128 * 1024`; older text is trimmed in ~1 KiB slices and `outputTruncated` is recorded.\n- Initial stop capture timeout after launch/attach: `STOP_CAPTURE_TIMEOUT_MS = 5_000`.\n- Socket-mode adapter readiness timeout: `10_000` ms in `waitForCondition()` and TCP connect timeout logic in `packages/coding-agent/src/dap/client.ts`.\n- Raw SSE buffer caps in `packages/coding-agent/src/debug/raw-sse-buffer.ts`:\n - `MAX_RAW_SSE_EVENTS = 1_000`\n - `MAX_RAW_SSE_CHARS = 512_000`\n - `MAX_RAW_SSE_EVENT_CHARS = 64_000` per event, with `: omp-debug-truncated ...` marker appended on trim\n- Log viewer window in `packages/coding-agent/src/debug/log-viewer.ts`:\n - `INITIAL_LOG_CHUNK = 50`\n - `LOAD_OLDER_CHUNK = 50`\n- Report/log ingestion caps in `packages/coding-agent/src/debug/report-bundle.ts`:\n - `MAX_LOG_LINES = 5000` for interactive log reading\n - `MAX_LOG_BYTES = 2 * 1024 * 1024` tail-read ceiling\n - report bundles include only the last `1000` log lines\n - subagent session inclusion is capped at the most recent `10` JSONL files\n- Interactive profiling windows in `packages/coding-agent/src/debug/index.ts`: both performance and work reports request `getWorkProfile(30)`.\n- Artifact cache pruning default: `30` days in `clearArtifactCache()` and the selector confirmation text.\n\n## Errors\n- Parameter validation in `packages/coding-agent/src/tools/debug.ts` throws `ToolError` with explicit messages such as:\n - `program is required for launch`\n - `attach requires pid or port`\n - `set_breakpoint requires file+line or function`\n - `variables requires variable_ref or scope_id`\n - `memory_reference is required for read_memory`\n - `count is required for read_memory`\n - `data is required for write_memory`\n - `command is required for custom_request`\n- Adapter selection failure throws `No debugger adapter available. Installed adapters: ...`.\n- Capability-gated actions throw from `requireCapability(...)`, e.g. `Active adapter does not support memory reads.`\n- No-session and state errors come from `DapSessionManager`, e.g. `No active debug session. Launch or attach first.`, `No active stack frame. Run stack_trace first or supply frame_id.`, `Debugger reported no threads.`\n- Launching a second live session throws `Debug session <id> is still active. Terminate it before launching another.`\n- DAP transport/request failures surface as thrown errors from `DapClient`:\n - `DAP request <command> timed out after <ms>ms`\n - `DAP event <event> timed out after <ms>ms`\n - `DAP adapter <name> is not running`\n - `DAP adapter exited (code N): <stderr>` or `DAP adapter exited unexpectedly (code N)`\n - adapter response `message` when a DAP request fails\n- `continue` / `step_*` are intentionally non-fatal when the target stays running past the timeout: they return `details.timedOut = true` and `state: \"running\"` instead of throwing.\n- `terminate` suppresses adapter errors while sending `terminate`/`disconnect`; it still disposes the client and returns the last summary when possible.\n- Interactive selector handlers report UI errors instead of throwing:\n - profiler start/stop, report bundling, log reading, system-info collection, cache clearing, and artifact opening use `ctx.showError(...)` / `ctx.showWarning(...)`\n - empty logs and empty artifact caches are warnings/status messages, not failures\n - copy failures in log/raw-SSE viewers become status/error text in the UI\n- Report-bundle helpers are intentionally best-effort for many file reads: missing session files, missing artifact dirs, unreadable artifact files, missing log dirs, inaccessible cache dirs, and missing subagent files are skipped silently.\n- `collectSystemInfo()` is best-effort for CPU probing; failure there falls back to `Unknown CPU`.\n\n## Notes\n- `packages/coding-agent/src/prompts/tools/debug.md` tells the model only one active session is supported; that is not advisory, it is enforced in code.\n- `configurationDone` is sent automatically both during launch/attach handshake and lazily before later requests if the adapter required it and the initial handshake did not complete.\n- `startDebugging` reverse requests are acknowledged but not implemented; child debug sessions are not spawned.\n- `output` exposes the merged `output` event stream only; the tool does not distinguish stdout, stderr, and console categories.\n- Session summaries expose `needsConfigurationDone`; this is derived from adapter capabilities and whether `configurationDone` has been sent.\n- Source breakpoint file paths are normalized with `path.resolve()` before caching and sending to the adapter.\n- `evaluate` defaults to `repl`, so the tool can forward raw debugger commands when the adapter supports them.\n- `disassemble` resolves its target from `memory_reference` first, then `instruction_reference`; it throws if neither is present.\n- `RawSseDebugBuffer.recordEvent()` increments `totalEvents` before bounded retention. A snapshot can therefore show fewer retained records than total observed events.\n- Raw SSE buffer listener failures are swallowed so viewer bugs do not break capture.\n- `createDebugLogSource()` walks daily log files newest-first, but `loadOlderLogs()` reverses each requested slice before concatenation so older chunks prepend in chronological order.\n- `clearArtifactCache()` deletes directories by directory mtime, not per-file age.\n- `addDirectoryToArchive()` reads artifact files as text with `Bun.file(...).text()`. Binary artifact contents are not preserved byte-for-byte in the report bundle.\n- The tool renderer truncates displayed output for the TUI preview, but the underlying text result still contains the full returned string.\n",
|
|
75
|
-
"tools/edit.md": "# edit\n\n> Applies source edits; default mode is the hashline patch language consumed from a single `input` string.\n\n## Source\n- Entry: `packages/coding-agent/src/edit/index.ts`\n- Model-facing prompt: `packages/hashline/src/prompt.md`\n- Key collaborators:\n - `packages/coding-agent/src/utils/edit-mode.ts` — selects active edit mode\n - `packages/hashline/src/grammar.lark` — hashline grammar\n - `packages/hashline/src/format.ts` — sigils and header constants (`¶`, `#`, `@@`, `+`, `&`, `,`)\n - `packages/hashline/src/input.ts` — parses `¶PATH#TAG` sections\n - `packages/hashline/src/tokenizer.ts` / `packages/hashline/src/parser.ts` — tokenizes and parses ops\n - `packages/hashline/src/apply.ts` — applies parsed edits to file text\n - `packages/hashline/src/mismatch.ts` — stale-anchor mismatch formatting (distinguishes recognized-but-drifted from never-recorded hashes)\n - `packages/hashline/src/recovery.ts` — snapshot-based stale-anchor recovery\n - `packages/hashline/src/snapshots.ts` — mints and resolves per-path opaque snapshot tags\n\n## Inputs\n\n### Hashline mode (default)\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `input` | `string` | Yes | One or more file sections. Anchored sections start with `¶PATH#TAG`; hashless `¶PATH` is allowed only for new-file creation or BOF/EOF-only inserts. Optional `*** Begin Patch` / `*** End Patch` envelope is ignored if present. |\n\nPatch language inside `input`:\n\n- **File header**: `¶PATH#TAG` (or `¶PATH` for new-file / virtual-only hunks). `TAG` is three uppercase-hex chars minted by the session snapshot store.\n- **Hunk header**: bare `A B` selects original lines A..B. Two numbers are REQUIRED — single-line ranges are written `A A` (`5 5`), not `5`. The range separator is normally whitespace; the parser also silently accepts `A-B`, `A..B`, and `A…B` (unicode ellipsis). Virtual variants `BOF` and `EOF` target positions before line 1 / after the last line.\n- **Body rows** (one per line, immediately under the hunk header):\n - `+TEXT` — add the literal line `TEXT` verbatim, including all leading whitespace.\n - `+` alone — add one blank line.\n - `&A..B` — re-emit original file lines A..B. Use this to keep some of the lines you selected. `&A` is accepted as `&A..A`.\n- **Semantics**:\n - The new content of the selected range is just the body rows top-to-bottom.\n - **Empty body deletes the range entirely.**\n - `BOF` / `EOF` with empty body is a no-op (nothing to insert).\n\nAnchors come from `read`/`search` output. `read` emits a `¶PATH#TAG` header from the session snapshot store and lines as `LINE:TEXT`; copy the header into the edit section and copy only the line number into hunk headers.\n\n### Tolerated input shapes (lenient parsing)\n\nBecause models reproduce nearby shapes (`read` output, `apply_patch` envelopes, unified-diff hunks), the parser is liberal about a handful of harmless variants:\n\n- `A` (bare single number) — REJECTED. The parser throws `single-number hunk header \"A\" is no longer accepted`. Spell single-line ranges as `A A`.\n- `A-B`, `A..B`, `A…B` — accepted as `A B` (any of hyphen, double-dot, or unicode ellipsis works as a silent separator).\n- `&A` — accepted as `&A..A`.\n- Bare body rows with no `+`/`&` prefix are auto-prepended with `+` and a `BARE_BODY_AUTO_PIPED_WARNING` is appended, BUT only when every row in that block is uniformly bare. Mixed `+`/raw blocks still throw.\n- `+&A..B` rows (model mistakenly prefixed a repeat with `+`) are silently rerouted as `&A..B` repeats with `PLUS_PREFIXED_REPEAT_WARNING`.\n- Identical-range hunks in the same patch are coalesced last-wins with `REPLACE_PAIR_COALESCED_WARNING`.\n- An overlapping bare hunk followed by a concrete hunk is treated as a stale \"before then after\" pair; the bare hunk is dropped with `REPLACE_PAIR_COALESCED_OVERLAP_WARNING`.\n- `*** Begin Patch` / `*** End Patch` envelopes are silently consumed. `*** Abort` terminates parsing silently — ops parsed before the marker still apply, no warning surfaced.\n- `*** Update File:` / `*** Add File:` / `*** Delete File:` / `*** Move to:` apply_patch sentinels throw an `apply_patch sentinel … is not valid in hashline` error.\n- `@@`-bracketed hunk headers (whether the apply_patch `@@ context @@` form or the unified-diff `@@ -N,M +N,M @@` shape) are rejected with an explicit \"drop the `@@ ... @@` brackets\" message — hashline hunks are bare `A B` lines.\n\n## Outputs\n- Single-shot tool result; hashline mode does not use a `resolve` preview/apply handshake.\n- `content` contains one text block per call. For a successful single-file edit it is either:\n - `<path>:` plus a compact diff preview from `packages/hashline/src/diff-preview.ts`, or\n - `Updated <path>` / `Created <path>` when no compact preview text is emitted.\n- Parse, apply, or recovery warnings are appended as:\n\n```text\nWarnings:\n...\n```\n\n- `details` is `EditToolDetails` from `packages/coding-agent/src/edit/renderer.ts`:\n - `diff`: unified diff string\n - `firstChangedLine`: first changed post-edit line\n - `diagnostics`: LSP/format result if available\n - `op`: `\"create\"` or `\"update\"` for hashline mode\n - `meta`: output metadata\n - `perFileResults`: present for multi-section input\n- Multi-section input returns one aggregated result with combined text and per-file details.\n\n## Worked examples\n\nReference file (the exact shape `read` returns):\n\n```text\n¶a.ts#0A3\n1:const X = \"a\";\n2:const Y = X;\n3:\n4:console.log(X);\n5:console.log(Y);\n6:export { X, Y };\n```\n\nReplace line 1 with two lines:\n\n```text\n¶a.ts#0A3\n1\n+const X = \"b\";\n+export const Y = X;\n```\n\nInsert BELOW line 5 (keep line 5, add after):\n\n```text\n¶a.ts#0A3\n5\n&5\n+console.log(X + Y);\n```\n\nInsert ABOVE line 5 (add before, keep line 5):\n\n```text\n¶a.ts#0A3\n5\n+console.log(X + Y);\n&5\n```\n\nDelete lines 4..5 entirely:\n\n```text\n¶a.ts#0A3\n4 5\n```\n\nInsert at start and end of file:\n\n```text\n¶a.ts#0A3\nBOF\n+// header\nEOF\n+// trailer\n```\n\nMulti-file:\n\n```text\n¶src/a.ts#0A3\n4\n+const enabled = true;\n¶src/b.ts#1F7\n20\n```\n\n## Limits & Caps\n- File snapshot tags are exactly three uppercase-hex chars minted by the per-session snapshot store.\n- The visible mismatch report shows 2 lines of context on each side (`MISMATCH_CONTEXT`) in `packages/hashline/src/messages.ts`.\n- Stale-anchor recovery uses `fuzzFactor: 0` in `packages/hashline/src/recovery.ts`.\n- `HL_FILE_PREFIX` is `¶`, `HL_PAYLOAD_REPLACE` is `+`, `HL_PAYLOAD_REPEAT` is `&`, `HL_RANGE_SEP` is `..` (repeat-row bodies only), and `HL_FILE_HASH_SEP` is `#` (`packages/hashline/src/format.ts`). Hunk headers carry no sigil; the range is just two whitespace-separated line numbers.\n\n## Errors\n- Missing section header:\n - `input must begin with \"¶PATH#HASH\" on the first non-blank line for anchored edits; got: ...`\n- Missing tag for anchored edit:\n - `Missing hashline snapshot tag for anchored edit to <path>; use ¶<path>#tag from your latest read/search output.`\n- Stray payload line:\n - `line N: payload line has no preceding hunk header. Use an \\`A B\\` (or \\`BOF\\` / \\`EOF\\`) line above the body. Got \"...\".`\n- Raw body row with no `+` / `&` prefix in a mixed-prefix block:\n - `line N: payload row in a hashline hunk must start with + or &A..B. Got \"...\".`\n- Range out of order:\n - `line N: range A..B ends before it starts.`\n- Overlapping hunks on the same anchor:\n - `line N: anchor line X is already targeted by another hunk on line Y. Issue ONE hunk per range; payload is only the final desired content, never a before/after pair.`\n- apply_patch / unified-diff contamination:\n - `line N: apply_patch sentinel \"*** …\" is not valid in hashline. File sections start with \\`¶path#HASH\\` (no \\`Update File:\\` / \\`Add File:\\` keyword). Hunks are bare \\`A B\\` lines with \\`+TEXT\\` / \\`&A..B\\` body rows.`\n - `line N: unified-diff hunk header (\\`@@ -N,M +N,M @@\\`) is not valid in hashline. Hashline hunks are bare \\`A B\\` lines (or \\`BOF\\` / \\`EOF\\` keywords).`\n - `line N: \\`@@\\`-bracketed hunk header \"@@ …\" is not valid in hashline. Drop the \\`@@ ... @@\\` brackets and write the range directly: \\`5 7\\` (\\`BOF\\` / \\`EOF\\` for virtual positions).`\n - `line N: single-number hunk header \"N\" is no longer accepted. Spell single-line ranges as \\`N N\\` (two numbers); hashline hunks are bare \\`A B\\` lines (or \\`BOF\\` / \\`EOF\\`).`\n- Out-of-range anchor:\n - `Line N does not exist (file has M lines)`\n- Stale snapshot tag: the `Patcher` first attempts snapshot-based recovery (3-way-merge of the model’s edits onto the current file via `packages/hashline/src/recovery.ts`, `fuzzFactor: 0`). When recovery cannot prove a valid result it throws `MismatchError`, which distinguishes two cases:\n - **Hash recognized but file content drifted** (an in-session edit advanced the hash, or an external write changed the file): \"file changed between read and edit\" / \"Section is bound to #X, but the current file hashes to #Y\". Copy the post-edit hash from the prior edit response, or re-read.\n - **Hash never recorded for this path** (likely fabricated or carried over from a prior session): \"hash #X is not from this session\". Re-read; never invent the tag.\n In both cases the error includes the current file hash plus 2 lines of context around each anchor (`*LINE:TEXT` / ` LINE:TEXT`).\n- No-op edit:\n - `Edits to <path> parsed and applied cleanly, but produced no change: your body row(s) are byte-identical to the file at the targeted lines. The bug is somewhere else — re-read the file before issuing another edit. Do NOT widen the payload or add lines; verify the anchor first.`\n- Recovery failure is silent internally: if cache-based merge cannot prove a valid result, the mismatch error is surfaced unchanged.\n\n## Warnings\n- `Detected two identical-range hashline hunks; kept only the second hunk. …` (`REPLACE_PAIR_COALESCED_WARNING`)\n- `Detected an overlapping bare hashline hunk immediately followed by a concrete hunk; dropped the earlier bare hunk. …` (`REPLACE_PAIR_COALESCED_OVERLAP_WARNING`)\n- `Auto-prefixed bare body row(s) with +. Always start payload rows with +TEXT (literal) or &A..B (repeat) …` (`BARE_BODY_AUTO_PIPED_WARNING`)\n- `A body row started with `+&A..B`. `+` (literal text) and `&A..B` (repeat) are sibling row kinds …` (`PLUS_PREFIXED_REPEAT_WARNING`)\n- Recovery banners: `RECOVERY_EXTERNAL_WARNING`, `RECOVERY_SESSION_CHAIN_WARNING`, `RECOVERY_SESSION_REPLAY_WARNING` (`packages/hashline/src/messages.ts`).\n",
|
|
76
|
-
"tools/eval.md": "# eval\n\n> Execute Python or JavaScript code in persistent cell-based runtimes.\n\n> **Notice:** Do not shell out to `python -c`/`python -e`, `bun -e`, or `node -e` via the `bash` tool for ad-hoc code execution. Use this tool instead — it gives you persistent state across cells, structured `display()` output, image/JSON capture, and proper cancellation/timeout handling that one-shot `-e`/`-c` invocations cannot provide.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/eval.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/eval.md`\n- Key collaborators:\n - `packages/coding-agent/src/eval/backend.ts` — backend execution contract\n - `packages/coding-agent/src/eval/js/index.ts` — JS backend adapter\n - `packages/coding-agent/src/eval/js/executor.ts` — JS execution + output sink\n - `packages/coding-agent/src/eval/js/context-manager.ts` — persistent VM contexts, prelude, tool bridge\n - `packages/coding-agent/src/eval/js/prelude.txt` — JS global helpers\n - `packages/coding-agent/src/eval/py/index.ts` — Python backend adapter\n - `packages/coding-agent/src/eval/py/executor.ts` — kernel session retention, reset, cleanup\n - `packages/coding-agent/src/eval/py/kernel.ts` — Jupyter gateway/kernel protocol, display capture\n - `packages/coding-agent/src/eval/py/prelude.py` — Python helper functions and status events\n - `packages/coding-agent/src/session/streaming-output.ts` — truncation, artifacts, streamed chunks\n - `docs/python-repl.md` — Python kernel/gateway internals\n\n## Inputs\n\nTool parameters are a JSON object with a single `cells` field — an ordered array of cell objects. Each cell is a structured record; there is no `*** Cell` header parsing, no language sniffing, and no implicit single-cell fallback. Cells run in array order; state persists within each language across cells and across tool calls.\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `cells` | `EvalCellInput[]` | Yes | Cells executed in order. At least one cell is required (`.min(1)`). |\n\nEach `EvalCellInput` (from `evalCellSchema` in `packages/coding-agent/src/tools/eval.ts`):\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `language` | `\"py\" \\| \"js\"` | Yes | Backend selector. `\"py\"` maps to the IPython/Jupyter kernel (`python` backend); `\"js\"` maps to the persistent JavaScript VM. |\n| `code` | `string` | Yes | Cell body, verbatim. JSON-encoded — embed newlines, quotes, and indentation directly; no fences, no headers. |\n| `title` | `string` | No | Short label rendered in the transcript (e.g. `\"imports\"`, `\"load config\"`). |\n| `timeout` | `integer` | No | Per-cell timeout in seconds, clamped to `1..600`. Defaults to 30 when omitted. |\n| `reset` | `boolean` | No | Wipe this cell's language kernel before running. Reset is per-language: a `py` cell's reset does not touch the JS VM and vice versa. Defaults to `false`. |\n\nMinimal example matching the live schema:\n\n```json\n{\n \"cells\": [\n { \"language\": \"py\", \"title\": \"imports\", \"timeout\": 10, \"code\": \"import json\\nfrom pathlib import Path\" },\n { \"language\": \"py\", \"title\": \"load config\", \"code\": \"data = json.loads(read('package.json'))\\ndisplay(data)\" },\n { \"language\": \"js\", \"title\": \"summary\", \"reset\": true, \"code\": \"const data = JSON.parse(await read('package.json'));\\ndisplay(data);\\nreturn data.name;\" }\n ]\n}\n```\n\n## Outputs\n\nFinal result from `EvalTool.execute()` is single-shot, but `onUpdate` streams partial text and `details` while cells run.\n\nReturned shape:\n\n- `content`: one text block containing combined cell output, or `(no text output)` / `(no output)` when only rich outputs exist.\n- `details` (`EvalToolDetails` from `packages/coding-agent/src/eval/types.ts`):\n - `cells`: per-cell code, status (`pending`/`running`/`complete`/`error`), output, duration, exit code, status events, markdown flag\n - `language`: first backend used\n - `languages`: distinct backends used, in first-use order\n - `jsonOutputs`: structured values emitted via `display(...)`\n - `images`: image payloads emitted by Python rich display or JS `display({ type: \"image\", ... })`\n - `statusEvents`: aggregated helper/tool status events\n - `notice`: backend fallback notice (currently unused; reserved for future per-cell notices)\n - `meta`: truncation metadata\n - `isError`: set on cell failure or cancellation\n\nRenderer behavior in `packages/coding-agent/src/tools/eval.ts`:\n\n- call preview renders each cell's `code` with syntax highlighting based on its declared `language`\n- result view renders each cell separately, including status, duration, and output\n- markdown outputs are rendered with the Markdown component instead of plain text\n- `jsonOutputs` render as a tree, collapsed or expanded depending on UI state\n- timeout / truncation notices render as dim metadata lines\n- images are carried in `details.images`; generic tool UI image handling renders them outside the text block\n\nSide-channel artifacts:\n\n- `session.allocateOutputArtifact?.(\"eval\")` may allocate an `artifact://...` backing store for spilled output.\n- Truncated output metadata points at that artifact when available.\n\n## Flow\n\n1. `EvalTool.execute()` in `packages/coding-agent/src/tools/eval.ts` receives `params.cells` already validated by the Zod schema — no string parsing step.\n2. For each cell, `execute()` maps `cell.language` to an `EvalLanguage` (`\"py\"` → `\"python\"`, `\"js\"` → `\"js\"`) and calls `resolveBackend(session, language)`:\n - `python` is gated on `eval.py !== false` and `pythonBackend.isAvailable(session)`.\n - `js` is gated on `eval.js !== false`.\n - A disabled or unavailable requested backend throws `ToolError`; there is no auto-fallback or sniffing.\n3. The tool allocates an `OutputSink`, a `TailBuffer`, per-cell result objects, and a `sessionAbortController`. `session.trackEvalExecution?.(...)` can wrap the whole run for external cancellation tracking.\n4. It resolves the executor session id from `session.getEvalSessionId?.()`, falling back to `defaultEvalSessionId(session)`. Subagents inherit the parent's id so both sides share the same JS VM and Python kernel for each backend.\n5. Cells execute sequentially within one eval tool call. For each cell, `execute()`:\n - clamps `(cell.timeout ?? 30) * 1000` ms through `clampTimeout(\"eval\", ...)`\n - builds a combined abort signal from the tool signal, the timeout, and the session abort controller\n - marks the cell `running` and emits an update\n - calls the backend’s `execute()` with `cwd`, `sessionId`, `sessionFile`, `kernelOwnerId`, `deadlineMs`, `reset` (defaults to `false`), artifact info, and chunk callback\n6. JS cells dispatch through `packages/coding-agent/src/eval/js/index.ts` into `executeJs()`; Python cells dispatch through `packages/coding-agent/src/eval/py/index.ts` into `executePython()`.\n7. Backend text chunks stream into the shared `OutputSink`; rich outputs are accumulated separately as JSON, images, markdown markers, and status events.\n8. After each cell:\n - text output is trimmed and stored on that cell result\n - multi-cell runs prefix text with `[i/n]` and the optional title\n - cancellations return early with `isError: true` and a cell-specific abort message\n - non-zero exit codes return early with `isError: true` and a message naming the failed cell\n - later cells are skipped after the first error, but earlier cell state persists in the underlying runtime\n9. On success, the tool joins all cell outputs, synthesizes `(no text output)` or `(no output)` when needed, and attaches truncation metadata from `summarizeFinal()`.\n10. The renderer uses `details.cells`, `details.jsonOutputs`, and `details.statusEvents` to build notebook-style output. `mergeCallAndResult = true` and `inline = true`, so call and result render together in the transcript.\n\n## Modes / Variants\n\n### Backend selection\n\nBackend choice is **explicit per cell** — there is no auto-detection.\n\n- `language: \"py\"` → Python (IPython/Jupyter) backend\n- `language: \"js\"` → JavaScript VM backend\n\nIf the requested backend is disabled or unavailable, the tool throws `ToolError` for that cell. The caller chooses; the tool does not silently substitute.\n\n### JavaScript runtime\n\nImplemented in `packages/coding-agent/src/eval/js/context-manager.ts` and `packages/coding-agent/src/eval/js/prelude.txt`.\n\n- Persistent worker-backed VM sessions keyed by `js:${sessionId}`\n- `reset: true` calls `resetVmContext(sessionKey)` before the cell executes; reset is destructive for all live runs on that JS session\n- Top-level `await` and bare `return` are supported by wrapping code in an async IIFE when `wrapCode()` sees `await` or `return`\n- Top-level static `import ... from ...` and dynamic `import(...)` calls are routed through `rewriteImports()`, which sends them via `__omp_import__` so the specifier resolves against the session cwd\n- Module cache is busted for **local** imports between cells so edits to source files are picked up without restarting the runtime. `__omp_import__` deletes `require.cache[absPath]` before re-importing whenever the original specifier is a filesystem path: relative (`./x`, `../x`, `.`, `..`), POSIX-absolute (`/...`), home-prefixed (`~/...`), or Windows drive-letter (`C:\\...` / `C:/...`). Bare specifiers (`react`, `lodash/x`) and URL/scheme specifiers (`node:fs`, `file://...`, `https://...`) are left in cache so package identity stays stable across cells. The cache-bust only fires when the resolved target is an absolute path — unresolved bare-package fallbacks (`resolveImportSpecifier()` returning the original specifier) skip it.\n- The prelude installs globals:\n - `display`, `print`\n - `read`, `write`, `append`, `sort`, `uniq`, `counter`, `diff`, `tree`, `env`, `output`\n - `tool.<name>(args)` proxy for arbitrary session tool calls\n- JS helpers are async because they cross the VM/tool boundary\n- `display(value)` behavior:\n - plain objects/arrays become JSON outputs\n - `{ type: \"image\", data, mimeType }` becomes an image output\n - scalars become text\n- The VM exposes a restricted `process` subset plus `Buffer`, `fetch`, `Blob`, `File`, `Headers`, `Request`, `Response`, `fs`, `require`, and browser-style globals\n- Concurrent runs on the same VM are not queued end-to-end. Synchronous JS still runs on the single event loop; awaited regions can interleave with sibling runs.\n\n### Python runtime\n\nImplemented in `packages/coding-agent/src/eval/py/executor.ts`, `packages/coding-agent/src/eval/py/kernel.ts`, and `packages/coding-agent/src/eval/py/prelude.py`. See `docs/python-repl.md` for gateway and kernel details.\n\n- Default mode is retained `session` kernels keyed by `python:${sessionId}`\n- Optional `python.kernelMode = \"per-call\"` creates a fresh kernel for each cell and shuts it down afterward\n- `reset: true` disposes the retained kernel for that session before the cell runs; later Python cells in the same tool call reuse the fresh kernel\n- Startup path:\n - availability check\n - create/connect kernel\n - initialize cwd / env / `sys.path`\n - execute `PYTHON_PRELUDE`\n- Python cells run in the runner's persistent asyncio event loop, so top-level `await` works; the prompt warns not to use `asyncio.run(...)`\n- The Python prelude defines helpers with the same surface as JS where practical, including `tool.<name>(args)` through a per-run loopback bridge\n- Synchronous statement blocks run in the default executor with ContextVar state copied in; the GIL still serializes bytecode execution, but awaited regions can interleave with sibling cells\n- Kernel `display_data` / `execute_result` messages map to:\n - `application/x-omp-status` → status event\n - `image/png` → image output\n - `application/json` → JSON output\n - `text/markdown` → markdown output\n - `text/plain` → text output\n - `text/html` → HTML converted to markdown with `htmlToBasicMarkdown()`\n- Interactive stdin is rejected: `input_request` sends an empty reply, marks `stdinRequested`, and the executor returns exit code `1`\n\n### Multi-language call behavior\n\nA single tool call can mix Python and JS cells. Persistence is per language runtime:\n\n- `reset: true` on a Python cell does not touch JS state\n- `reset: true` on a JS cell does not touch Python state\n- each backend keeps its own retained session keyed from the same session-derived ID\n\n## Side Effects\n\n- Filesystem\n - JS/Python prelude helpers can read, write, append, diff, and traverse files under the session cwd or absolute paths.\n - Output may spill to an artifact file via `OutputSink`.\n- Network\n - Python backend speaks NDJSON to a local `python3` subprocess over stdin/stdout (no network).\n - JS runtime exposes `fetch` and `tool.<name>()`; those tools may perform additional network I/O.\n- Subprocesses / native bindings\n - Python availability check runs `<python> -c ...`.\n - Python backend spawns one `python -u runner.py` subprocess per kernel; cancellation sends `SIGINT`. Details in `docs/python-repl.md`.\n- Session state\n - `session.assertEvalExecutionAllowed?.()` can block execution.\n - `session.trackEvalExecution?.(...)` can register cancellable eval work.\n - `session.getSessionFile?.()`, `session.getEvalSessionId?.()`, and `session.getEvalKernelOwnerId?.()` influence VM/kernel reuse and artifact lookup.\n - JS VM contexts persist across eval calls until reset/disposal.\n - Python retained kernels persist until reset, owner cleanup, or process exit.\n- User-visible prompts / interactive UI\n - none; stdin requests are rejected programmatically\n- Background work / cancellation\n - Python retained kernels have heartbeat and idle cleanup timers.\n - Cancellation hard-kills/resets the shared executor for that backend: JS terminates the worker, Python sends SIGINT and may escalate to subprocess shutdown.\n\n## Limits & Caps\n\n- Per-cell timeout default: 30s (applied when `timeout` is omitted in `EvalTool.execute()`; clamped through `TOOL_TIMEOUTS.eval.default` in `packages/coding-agent/src/tools/tool-timeouts.ts`)\n- Schema-level `timeout` range: integer `1..600` seconds (enforced by Zod on the cell schema)\n- Timeout clamp at runtime: 1s minimum, 600s maximum (`TOOL_TIMEOUTS.eval` in `packages/coding-agent/src/tools/tool-timeouts.ts`)\n- Transcript code/output preview: 10 lines by default (`EVAL_DEFAULT_PREVIEW_LINES` in `packages/coding-agent/src/tools/eval.ts`)\n- Output truncation window: 50KB default (`DEFAULT_MAX_BYTES` in `packages/coding-agent/src/session/streaming-output.ts`)\n- Output line cap inside truncation helpers: 3000 lines (`DEFAULT_MAX_LINES` in `packages/coding-agent/src/session/streaming-output.ts`)\n- Streaming tail buffer for live updates: `DEFAULT_MAX_BYTES * 2` = 100KB (`packages/coding-agent/src/tools/eval.ts`)\n- Python retained kernel idle timeout: 5 minutes (`IDLE_TIMEOUT_MS` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python retained kernel cap: 4 sessions (`MAX_KERNEL_SESSIONS` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python retained kernel cleanup sweep: every 30s (`CLEANUP_INTERVAL_MS` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python owner-cleanup shutdown wait: 2000ms (`OWNER_CLEANUP_KERNEL_SHUTDOWN_TIMEOUT_MS` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python heartbeat interval: 5s (`ensureKernelHeartbeat()` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python external gateway availability check timeout: 5s (`AbortSignal.timeout(5000)` in `packages/coding-agent/src/eval/py/kernel.ts`)\n- Python auto-restart budget: one restart per retained session before hard failure (`restartCount > 1` in `packages/coding-agent/src/eval/py/executor.ts`)\n\n## Errors\n\n- Zod validation rejects malformed `cells` arrays before `execute()` runs (missing `language`/`code`, out-of-range `timeout`, empty `cells`).\n- Missing session without proxy executor throws `ToolError(\"Eval tool requires a session when not using proxy executor\")`.\n- Disabled/unavailable backends throw `ToolError` from `resolveBackend()`:\n - `eval.py = false` and a `py` cell is requested\n - `eval.js = false` and a `js` cell is requested\n - Python kernel unavailable and a `py` cell is requested\n- JS runtime exceptions are converted into text output plus `exitCode: 1`; cancellations return `cancelled: true` and may append `Command timed out`.\n- Python execution errors from the kernel become text output and `exitCode: 1`; later cells are skipped.\n- Python stdin requests are treated as errors with the message `Kernel requested stdin; interactive input is not supported.`\n- Cancellation is returned, not thrown, once backend execution has started. The tool formats it as a cell failure and sets `details.isError = true`.\n- If output truncates, the tool still succeeds; truncation is surfaced through `details.meta` and artifact-backed full output when available.\n\n## Shared executor trade-offs\n\n- Parent agents and subagents share eval state bidirectionally when a subagent inherits the parent's executor id. Mutations in either direction are visible to the other participant.\n- Async regions of concurrent runs can interleave. Synchronous JS still blocks the VM event loop; synchronous Python still contends on the GIL.\n- Cancelling one run is destructive to the shared backend executor. This is intentional: JS worker termination and Python SIGINT/subprocess shutdown are the only reliable way to interrupt arbitrary user code.\n- `reset: true` is destructive for every live run on that backend session id. New starts on that backend are rejected while reset is in flight.\n\n## Notes\n\n- Backend selection is now strictly explicit per cell: `language` must be `\"py\"` or `\"js\"`. The previous `*** Cell` header parser, the `eval.lark` constrained grammar, and the sniffer-based fallback have all been removed.\n- `EvalTool.customFormat` no longer exists. Tool calls flow through the standard JSON schema; there is no Lark-constrained sampling path.\n- `tool.<name>()` exists in both JS and Python. Python calls route through a per-run loopback bridge keyed by the current cell id.\n- JS helper paths reject protocol URIs (`://`) in `resolvePath()`; the JS prelude is filesystem-only unless the code calls `tool.read(...)` or another tool explicitly.\n- Python helper `output(...)` depends on `PI_SESSION_FILE`; it fails outside a session-backed run.\n- `display()` can produce text and structured outputs from the same value; the renderer prefers markdown over `text/plain` when both exist.\n- JS static imports are rewritten only at top level. Nested imports stay invalid and surface normal JS syntax/runtime errors.\n- `EvalTool` is `concurrency = \"exclusive\"` within one agent session, but parent and subagent sessions can run eval concurrently when they share an inherited executor id.\n- The tool description shown to the model is templated by backend availability (`getEvalToolDescription()`); if Python is unavailable, the prompt omits Python-specific instructions.\n",
|
|
74
|
+
"tools/edit.md": "# edit\n\n> Applies source edits; default mode is the hashline patch language consumed from a single `input` string.\n\n## Source\n- Entry: `packages/coding-agent/src/edit/index.ts`\n- Model-facing prompt: `packages/hashline/src/prompt.md`\n- Key collaborators:\n - `packages/coding-agent/src/utils/edit-mode.ts` — selects active edit mode\n - `packages/hashline/src/grammar.lark` — canonical constrained-decoding grammar\n - `packages/hashline/src/format.ts` — sigils and header constants (`¶`, `#`, `+`, `replace`, `delete`, `insert`)\n - `packages/hashline/src/input.ts` — parses `¶PATH#TAG` sections\n - `packages/hashline/src/tokenizer.ts` / `packages/hashline/src/parser.ts` — tokenizes and parses ops\n - `packages/hashline/src/apply.ts` — applies parsed edits to file text\n - `packages/hashline/src/mismatch.ts` — stale-anchor mismatch formatting\n - `packages/hashline/src/recovery.ts` — snapshot-based stale-anchor recovery\n - `packages/hashline/src/snapshots.ts` — mints and resolves per-path opaque snapshot tags\n\n## Inputs\n\n### Hashline mode (default)\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `input` | `string` | Yes | One or more file sections. Anchored sections start with `¶PATH#TAG`; hashless `¶PATH` is allowed only for new-file creation or purely `insert head:` / `insert tail:` inserts. Optional `*** Begin Patch` / `*** End Patch` envelope is ignored if present. |\n\nPatch language inside `input`:\n\n- **File header**: `¶PATH#TAG` (or `¶PATH` for new-file / head/tail-only inserts). `TAG` is three uppercase-hex chars minted by the session snapshot store.\n- **Operations**:\n - `replace N..M:` — replace original lines N..M with the body rows below.\n - `delete N..M` — delete original lines N..M. No body.\n - `insert before N:` — insert body rows immediately before line N.\n - `insert after N:` — insert body rows immediately after line N.\n - `insert head:` — insert body rows at the start of the file.\n - `insert tail:` — insert body rows at the end of the file.\n- **Body rows**:\n - Only body-bearing headers end in `:`.\n - Every body row is `+TEXT`; `+` alone adds a blank line.\n - `delete` never has body rows.\n - There is no repeat row kind. To keep a line, leave it out of every range; split edits into multiple hunks when needed.\n - `-` rows are invalid. Literal text beginning with `-` or `+` must be written as `+-text` / `++text`.\n\nAnchors come from `read`/`search` output. `read` emits a `¶PATH#TAG` header from the session snapshot store and lines as `LINE:TEXT`; copy the header into the edit section and copy only the line number into hunk headers.\n\n### Tolerated input shapes (lenient parsing)\n\nThe canonical grammar is strict, but the hand parser accepts a few non-dangerous variants:\n\n- `replace N:` — accepted as `replace N..N:`.\n- `delete N` — accepted as single-line delete.\n- Missing trailing colon on `replace` or `insert` — accepted.\n- `replace N-M:`, `replace N…M:`, and `replace N M:` — accepted as `replace N..M:`.\n- Bare body rows with no `+` prefix are auto-prepended with `+` and a `BARE_BODY_AUTO_PIPED_WARNING` is appended.\n- `*** Begin Patch` / `*** End Patch` envelopes are silently consumed. `*** Abort` terminates parsing silently — ops parsed before the marker still apply, no warning surfaced.\n- `*** Update File:` / `*** Add File:` / `*** Delete File:` / `*** Move to:` apply_patch sentinels throw an `apply_patch sentinel … is not valid in hashline` error.\n- `@@`-bracketed hunk headers are rejected with guidance to write a verb header.\n- Bare `N` and bare `N M` / `N..M` headers are rejected with guidance to write `replace` or `delete`.\n- `delete N..M:` and any body rows under `delete` are rejected.\n- Empty `replace` / `insert` hunks are rejected.\n- `-` body rows are rejected with `MINUS_ROW_REJECTED`.\n\n## Outputs\n- Single-shot tool result; hashline mode does not use a `resolve` preview/apply handshake.\n- `content` contains one text block per call. For a successful single-file edit it is either:\n - `<path>:` plus a compact diff preview from `packages/hashline/src/diff-preview.ts`, or\n - `Updated <path>` / `Created <path>` when no compact preview text is emitted.\n- Parse, apply, or recovery warnings are appended as:\n\n```text\nWarnings:\n...\n```\n\n- `details` is `EditToolDetails` from `packages/coding-agent/src/edit/renderer.ts`:\n - `diff`: unified diff string\n - `firstChangedLine`: first changed post-edit line\n - `diagnostics`: LSP/format result if available\n - `op`: `\"create\"` or `\"update\"` for hashline mode\n - `meta`: output metadata\n - `perFileResults`: present for multi-section input\n- Multi-section input returns one aggregated result with combined text and per-file details.\n\n## Worked examples\n\nReference file (the exact shape `read` returns):\n\n```text\n¶a.ts#0A3\n1:const X = \"a\";\n2:const Y = X;\n3:\n4:console.log(X);\n5:console.log(Y);\n6:export { X, Y };\n```\n\nReplace line 1 with two lines:\n\n```text\n¶a.ts#0A3\nreplace 1..1:\n+const X = \"b\";\n+export const Y = X;\n```\n\nInsert below line 5:\n\n```text\n¶a.ts#0A3\ninsert after 5:\n+console.log(X + Y);\n```\n\nInsert above line 5:\n\n```text\n¶a.ts#0A3\ninsert before 5:\n+console.log(X + Y);\n```\n\nDelete lines 4..5 entirely:\n\n```text\n¶a.ts#0A3\ndelete 4..5\n```\n\nInsert at start and end of file:\n\n```text\n¶a.ts#0A3\ninsert head:\n+// header\ninsert tail:\n+// trailer\n```\n\nMulti-file:\n\n```text\n¶src/a.ts#0A3\nreplace 4..4:\n+const enabled = true;\n¶src/b.ts#1F7\ndelete 20\n```\n\n## Limits & Caps\n- File snapshot tags are exactly three uppercase-hex chars minted by the per-session snapshot store.\n- The visible mismatch report shows 2 lines of context on each side (`MISMATCH_CONTEXT`) in `packages/hashline/src/messages.ts`.\n- Stale-anchor recovery uses `fuzzFactor: 0` in `packages/hashline/src/recovery.ts`.\n- `HL_FILE_PREFIX` is `¶`, `HL_PAYLOAD_REPLACE` is `+`, `HL_RANGE_SEP` is `..`, `HL_FILE_HASH_SEP` is `#`, and hunk keyword constants are `replace` / `delete` / `insert` (`packages/hashline/src/format.ts`).\n\n## Errors\n- Missing section header:\n - `input must begin with \"¶PATH#HASH\" on the first non-blank line for anchored edits; got: ...`\n- Missing tag for anchored edit:\n - `Missing hashline snapshot tag for anchored edit to <path>; use ¶<path>#tag from your latest read/search output.`\n- Stray payload line:\n - `line N: payload line has no preceding hunk header. Use \\`replace N..M:\\`, \\`delete N..M\\`, or \\`insert before|after|head|tail:\\` above the body. Got \"...\".`\n- Minus row:\n - ``line N: `-` rows are not valid; hashline ranges already name the lines being changed. To insert a literal line starting with `-`, write `+-…`.``\n- Empty body-bearing hunk:\n - `line N: \\`replace N..M:\\` needs at least one \\`+TEXT\\` body row. To delete lines, use \\`delete N..M\\`.`\n - `line N: \\`insert\\` needs at least one \\`+TEXT\\` body row.`\n- Delete with body:\n - `line N: \\`delete N..M\\` does not take body rows. Remove the body, or use \\`replace N..M:\\`.`\n- Range out of order:\n - `line N: range A..B ends before it starts.`\n- Overlapping hunks on the same anchor:\n - `line N: anchor line X is already targeted by another hunk on line Y. Issue ONE hunk per range; payload is only the final desired content, never a before/after pair.`\n- apply_patch / unified-diff contamination:\n - `line N: apply_patch sentinel \"*** …\" is not valid in hashline. File sections start with \\`¶path#HASH\\` (no \\`Update File:\\` / \\`Add File:\\` keyword). Use \\`replace N..M:\\`, \\`delete N..M\\`, or \\`insert before|after|head|tail:\\` ops.`\n - `line N: unified-diff hunk header (\\`@@ -N,M +N,M @@\\`) is not valid in hashline. Use \\`replace N..M:\\`, \\`delete N..M\\`, or \\`insert before|after|head|tail:\\` ops.`\n - `line N: \\`@@\\`-bracketed hunk header \"@@ …\" is not valid in hashline. Drop the \\`@@ ... @@\\` brackets and write a verb header such as \\`replace N..M:\\`.`\n - `line N: hunk headers need a verb. Use \\`replace N..N:\\` to replace, or \\`delete N\\` to delete.`\n - `line N: bare range hunk header \"N M\" is not valid. Hunk headers need a verb: write \\`replace N..M:\\` or \\`delete N..M\\`.`\n- Out-of-range anchor:\n - `Line N does not exist (file has M lines)`\n- Stale snapshot tag: the `Patcher` first attempts snapshot-based recovery. When recovery cannot prove a valid result it throws `MismatchError`, which distinguishes recognized-but-drifted hashes from never-recorded hashes. The error includes the current file hash plus context around each anchor.\n- No-op edit:\n - `Edits to <path> parsed and applied cleanly, but produced no change: your body row(s) are byte-identical to the file at the targeted lines. The bug is somewhere else — re-read the file before issuing another edit. Do NOT widen the payload or add lines; verify the anchor first.`\n- Recovery failure is silent internally: if cache-based merge cannot prove a valid result, the mismatch error is surfaced unchanged.\n\n## Warnings\n- `Auto-prefixed bare body row(s) with +. Body rows must be +TEXT literal lines …` (`BARE_BODY_AUTO_PIPED_WARNING`)\n- Recovery banners: `RECOVERY_EXTERNAL_WARNING`, `RECOVERY_SESSION_CHAIN_WARNING`, `RECOVERY_SESSION_REPLAY_WARNING` (`packages/hashline/src/messages.ts`).\n",
|
|
75
|
+
"tools/eval.md": "# eval\n\n> Execute Python or JavaScript code in persistent cell-based runtimes.\n\n> **Notice:** Do not shell out to `python -c`/`python -e`, `bun -e`, or `node -e` via the `bash` tool for ad-hoc code execution. Use this tool instead — it gives you persistent state across cells, structured `display()` output, image/JSON capture, and proper cancellation/timeout handling that one-shot `-e`/`-c` invocations cannot provide.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/eval.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/eval.md`\n- Key collaborators:\n - `packages/coding-agent/src/eval/backend.ts` — backend execution contract\n - `packages/coding-agent/src/eval/js/index.ts` — JS backend adapter\n - `packages/coding-agent/src/eval/js/executor.ts` — JS execution + output sink\n - `packages/coding-agent/src/eval/js/context-manager.ts` — persistent VM contexts, prelude, tool bridge\n - `packages/coding-agent/src/eval/js/prelude.txt` — JS global helpers\n - `packages/coding-agent/src/eval/py/index.ts` — Python backend adapter\n - `packages/coding-agent/src/eval/py/executor.ts` — kernel session retention, reset, cleanup\n - `packages/coding-agent/src/eval/py/kernel.ts` — Jupyter gateway/kernel protocol, display capture\n - `packages/coding-agent/src/eval/py/prelude.py` — Python helper functions and status events\n - `packages/coding-agent/src/session/streaming-output.ts` — truncation, artifacts, streamed chunks\n - `docs/python-repl.md` — Python kernel/gateway internals\n\n## Inputs\n\nTool parameters are a JSON object with a single `cells` field — an ordered array of cell objects. Each cell is a structured record; there is no `*** Cell` header parsing, no language sniffing, and no implicit single-cell fallback. Cells run in array order; state persists within each language across cells and across tool calls.\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `cells` | `EvalCellInput[]` | Yes | Cells executed in order. At least one cell is required (`.min(1)`). |\n\nEach `EvalCellInput` (from `evalCellSchema` in `packages/coding-agent/src/tools/eval.ts`):\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `language` | `\"py\" \\| \"js\"` | Yes | Backend selector. `\"py\"` maps to the IPython/Jupyter kernel (`python` backend); `\"js\"` maps to the persistent JavaScript VM. |\n| `code` | `string` | Yes | Cell body, verbatim. JSON-encoded — embed newlines, quotes, and indentation directly; no fences, no headers. |\n| `title` | `string` | No | Short label rendered in the transcript (e.g. `\"imports\"`, `\"load config\"`). |\n| `timeout` | `integer` | No | Per-cell timeout in seconds, clamped to `1..600`. Defaults to 30 when omitted. |\n| `reset` | `boolean` | No | Wipe this cell's language kernel before running. Reset is per-language: a `py` cell's reset does not touch the JS VM and vice versa. Defaults to `false`. |\n\nMinimal example matching the live schema:\n\n```json\n{\n \"cells\": [\n { \"language\": \"py\", \"title\": \"imports\", \"timeout\": 10, \"code\": \"import json\\nfrom pathlib import Path\" },\n { \"language\": \"py\", \"title\": \"load config\", \"code\": \"data = json.loads(read('package.json'))\\ndisplay(data)\" },\n { \"language\": \"js\", \"title\": \"summary\", \"reset\": true, \"code\": \"const data = JSON.parse(await read('package.json'));\\ndisplay(data);\\nreturn data.name;\" }\n ]\n}\n```\n\n## Outputs\n\nFinal result from `EvalTool.execute()` is single-shot, but `onUpdate` streams partial text and `details` while cells run.\n\nReturned shape:\n\n- `content`: one text block containing combined cell output, or `(no text output)` / `(no output)` when only rich outputs exist.\n- `details` (`EvalToolDetails` from `packages/coding-agent/src/eval/types.ts`):\n - `cells`: per-cell code, status (`pending`/`running`/`complete`/`error`), output, duration, exit code, status events, markdown flag\n - `language`: first backend used\n - `languages`: distinct backends used, in first-use order\n - `jsonOutputs`: structured values emitted via `display(...)`\n - `images`: image payloads emitted by Python rich display or JS `display({ type: \"image\", ... })`\n - `statusEvents`: aggregated helper/tool status events\n - `notice`: backend fallback notice (currently unused; reserved for future per-cell notices)\n - `meta`: truncation metadata\n - `isError`: set on cell failure or cancellation\n\nRenderer behavior in `packages/coding-agent/src/tools/eval.ts`:\n\n- call preview renders each cell's `code` with syntax highlighting based on its declared `language`\n- result view renders each cell separately, including status, duration, and output\n- markdown outputs are rendered with the Markdown component instead of plain text\n- `jsonOutputs` render as a tree, collapsed or expanded depending on UI state\n- timeout / truncation notices render as dim metadata lines\n- images are carried in `details.images`; generic tool UI image handling renders them outside the text block\n\nSide-channel artifacts:\n\n- `session.allocateOutputArtifact?.(\"eval\")` may allocate an `artifact://...` backing store for spilled output.\n- Truncated output metadata points at that artifact when available.\n\n## Flow\n\n1. `EvalTool.execute()` in `packages/coding-agent/src/tools/eval.ts` receives `params.cells` already validated by the Zod schema — no string parsing step.\n2. For each cell, `execute()` maps `cell.language` to an `EvalLanguage` (`\"py\"` → `\"python\"`, `\"js\"` → `\"js\"`) and calls `resolveBackend(session, language)`:\n - `python` is gated on `eval.py !== false` and `pythonBackend.isAvailable(session)`.\n - `js` is gated on `eval.js !== false`.\n - A disabled or unavailable requested backend throws `ToolError`; there is no auto-fallback or sniffing.\n3. The tool allocates an `OutputSink`, a `TailBuffer`, per-cell result objects, and a `sessionAbortController`. `session.trackEvalExecution?.(...)` can wrap the whole run for external cancellation tracking.\n4. It resolves the executor session id from `session.getEvalSessionId?.()`, falling back to `defaultEvalSessionId(session)`. Subagents inherit the parent's id so both sides share the same JS VM and Python kernel for each backend.\n5. Cells execute sequentially within one eval tool call. For each cell, `execute()`:\n - clamps `(cell.timeout ?? 30) * 1000` ms through `clampTimeout(\"eval\", ...)`\n - builds a combined abort signal from the tool signal, the timeout, and the session abort controller\n - marks the cell `running` and emits an update\n - calls the backend’s `execute()` with `cwd`, `sessionId`, `sessionFile`, `kernelOwnerId`, `deadlineMs`, `reset` (defaults to `false`), artifact info, and chunk callback\n6. JS cells dispatch through `packages/coding-agent/src/eval/js/index.ts` into `executeJs()`; Python cells dispatch through `packages/coding-agent/src/eval/py/index.ts` into `executePython()`.\n7. Backend text chunks stream into the shared `OutputSink`; rich outputs are accumulated separately as JSON, images, markdown markers, and status events.\n8. After each cell:\n - text output is trimmed and stored on that cell result\n - multi-cell runs prefix text with `[i/n]` and the optional title\n - cancellations return early with `isError: true` and a cell-specific abort message\n - non-zero exit codes return early with `isError: true` and a message naming the failed cell\n - later cells are skipped after the first error, but earlier cell state persists in the underlying runtime\n9. On success, the tool joins all cell outputs, synthesizes `(no text output)` or `(no output)` when needed, and attaches truncation metadata from `summarizeFinal()`.\n10. The renderer uses `details.cells`, `details.jsonOutputs`, and `details.statusEvents` to build notebook-style output. `mergeCallAndResult = true` and `inline = true`, so call and result render together in the transcript.\n\n## Modes / Variants\n\n### Backend selection\n\nBackend choice is **explicit per cell** — there is no auto-detection.\n\n- `language: \"py\"` → Python (IPython/Jupyter) backend\n- `language: \"js\"` → JavaScript VM backend\n\nIf the requested backend is disabled or unavailable, the tool throws `ToolError` for that cell. The caller chooses; the tool does not silently substitute.\n\n### JavaScript runtime\n\nImplemented in `packages/coding-agent/src/eval/js/context-manager.ts` and `packages/coding-agent/src/eval/js/prelude.txt`.\n\n- Persistent worker-backed VM sessions keyed by `js:${sessionId}`\n- `reset: true` calls `resetVmContext(sessionKey)` before the cell executes; reset is destructive for all live runs on that JS session\n- Top-level `await` and bare `return` are supported by wrapping code in an async IIFE when `wrapCode()` sees `await` or `return`\n- Top-level static `import ... from ...` and dynamic `import(...)` calls are routed through `rewriteImports()`, which sends them via `__omp_import__` so the specifier resolves against the session cwd\n- Module cache is busted for **local** imports between cells so edits to source files are picked up without restarting the runtime. `__omp_import__` deletes `require.cache[absPath]` before re-importing whenever the original specifier is a filesystem path: relative (`./x`, `../x`, `.`, `..`), POSIX-absolute (`/...`), home-prefixed (`~/...`), or Windows drive-letter (`C:\\...` / `C:/...`). Bare specifiers (`react`, `lodash/x`) and URL/scheme specifiers (`node:fs`, `file://...`, `https://...`) are left in cache so package identity stays stable across cells. The cache-bust only fires when the resolved target is an absolute path — unresolved bare-package fallbacks (`resolveImportSpecifier()` returning the original specifier) skip it.\n- The prelude installs globals:\n - `display`, `print`\n - `read`, `write`, `append`, `sort`, `uniq`, `counter`, `diff`, `tree`, `env`, `output`\n - `tool.<name>(args)` proxy for arbitrary session tool calls\n - `llm(prompt, opts)` for oneshot, stateless LLM calls (see _Oneshot LLM helper_ below)\n- JS helpers are async because they cross the VM/tool boundary\n- `display(value)` behavior:\n - plain objects/arrays become JSON outputs\n - `{ type: \"image\", data, mimeType }` becomes an image output\n - scalars become text\n- The VM exposes a restricted `process` subset plus `Buffer`, `fetch`, `Blob`, `File`, `Headers`, `Request`, `Response`, `fs`, `require`, and browser-style globals\n- Concurrent runs on the same VM are not queued end-to-end. Synchronous JS still runs on the single event loop; awaited regions can interleave with sibling runs.\n\n### Python runtime\n\nImplemented in `packages/coding-agent/src/eval/py/executor.ts`, `packages/coding-agent/src/eval/py/kernel.ts`, and `packages/coding-agent/src/eval/py/prelude.py`. See `docs/python-repl.md` for gateway and kernel details.\n\n- Default mode is retained `session` kernels keyed by `python:${sessionId}`\n- Optional `python.kernelMode = \"per-call\"` creates a fresh kernel for each cell and shuts it down afterward\n- `reset: true` disposes the retained kernel for that session before the cell runs; later Python cells in the same tool call reuse the fresh kernel\n- Startup path:\n - availability check\n - create/connect kernel\n - initialize cwd / env / `sys.path`\n - execute `PYTHON_PRELUDE`\n- Python cells run in the runner's persistent asyncio event loop, so top-level `await` works; the prompt warns not to use `asyncio.run(...)`\n- The Python prelude defines helpers with the same surface as JS where practical, including `tool.<name>(args)` through a per-run loopback bridge\n- Synchronous statement blocks run in the default executor with ContextVar state copied in; the GIL still serializes bytecode execution, but awaited regions can interleave with sibling cells\n- Kernel `display_data` / `execute_result` messages map to:\n - `application/x-omp-status` → status event\n - `image/png` → image output\n - `application/json` → JSON output\n - `text/markdown` → markdown output\n - `text/plain` → text output\n - `text/html` → HTML converted to markdown with `htmlToBasicMarkdown()`\n- Interactive stdin is rejected: `input_request` sends an empty reply, marks `stdinRequested`, and the executor returns exit code `1`\n\n### Oneshot LLM helper (`llm`)\n\nBoth runtimes expose `llm()` — a single stateless completion against a model tier. It is intentionally minimal: no conversation history, no agent-visible tools, pure text in / text (or object) out. Implemented host-side in `packages/coding-agent/src/eval/llm-bridge.ts` and routed through the existing tool bridge under the reserved name `__llm__`.\n\n- Signatures:\n - JS: `await llm(prompt, { model?, system?, schema? })`\n - Python: `llm(prompt, *, model=\"default\", system=None, schema=None)`\n- `model` selects a tier (default `\"default\"`):\n - `\"smol\"` → `pi/smol` role (fast / cheap)\n - `\"default\"` → the session's active model, falling back to the `pi/default` role\n - `\"slow\"` → `pi/slow` role; requests high reasoning effort only on reasoning-capable models\n- `system` (optional) supplies a system prompt.\n- `schema` (optional) is a plain JSON-Schema object. When present, the model is forced to call a single synthetic `respond` tool with that schema (loose, non-strict), and the helper returns the parsed object. When absent, the helper returns the completion string.\n- Errors surface as exceptions: unresolved tier, missing API key, an `error`/`aborted` stop reason, or empty output each raise.\n\n### Multi-language call behavior\n\nA single tool call can mix Python and JS cells. Persistence is per language runtime:\n\n- `reset: true` on a Python cell does not touch JS state\n- `reset: true` on a JS cell does not touch Python state\n- each backend keeps its own retained session keyed from the same session-derived ID\n\n## Side Effects\n\n- Filesystem\n - JS/Python prelude helpers can read, write, append, diff, and traverse files under the session cwd or absolute paths.\n - Output may spill to an artifact file via `OutputSink`.\n- Network\n - Python backend speaks NDJSON to a local `python3` subprocess over stdin/stdout (no network).\n - JS runtime exposes `fetch` and `tool.<name>()`; those tools may perform additional network I/O.\n- Subprocesses / native bindings\n - Python availability check runs `<python> -c ...`.\n - Python backend spawns one `python -u runner.py` subprocess per kernel; cancellation sends `SIGINT`. Details in `docs/python-repl.md`.\n- Session state\n - `session.assertEvalExecutionAllowed?.()` can block execution.\n - `session.trackEvalExecution?.(...)` can register cancellable eval work.\n - `session.getSessionFile?.()`, `session.getEvalSessionId?.()`, and `session.getEvalKernelOwnerId?.()` influence VM/kernel reuse and artifact lookup.\n - JS VM contexts persist across eval calls until reset/disposal.\n - Python retained kernels persist until reset, owner cleanup, or process exit.\n- User-visible prompts / interactive UI\n - none; stdin requests are rejected programmatically\n- Background work / cancellation\n - Python retained kernels have heartbeat and idle cleanup timers.\n - Cancellation hard-kills/resets the shared executor for that backend: JS terminates the worker, Python sends SIGINT and may escalate to subprocess shutdown.\n\n## Limits & Caps\n\n- Per-cell timeout default: 30s (applied when `timeout` is omitted in `EvalTool.execute()`; clamped through `TOOL_TIMEOUTS.eval.default` in `packages/coding-agent/src/tools/tool-timeouts.ts`)\n- Schema-level `timeout` range: integer `1..600` seconds (enforced by Zod on the cell schema)\n- Timeout clamp at runtime: 1s minimum, 600s maximum (`TOOL_TIMEOUTS.eval` in `packages/coding-agent/src/tools/tool-timeouts.ts`)\n- Transcript code/output preview: 10 lines by default (`EVAL_DEFAULT_PREVIEW_LINES` in `packages/coding-agent/src/tools/eval.ts`)\n- Output truncation window: 50KB default (`DEFAULT_MAX_BYTES` in `packages/coding-agent/src/session/streaming-output.ts`)\n- Output line cap inside truncation helpers: 3000 lines (`DEFAULT_MAX_LINES` in `packages/coding-agent/src/session/streaming-output.ts`)\n- Streaming tail buffer for live updates: `DEFAULT_MAX_BYTES * 2` = 100KB (`packages/coding-agent/src/tools/eval.ts`)\n- Python retained kernel idle timeout: 5 minutes (`IDLE_TIMEOUT_MS` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python retained kernel cap: 4 sessions (`MAX_KERNEL_SESSIONS` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python retained kernel cleanup sweep: every 30s (`CLEANUP_INTERVAL_MS` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python owner-cleanup shutdown wait: 2000ms (`OWNER_CLEANUP_KERNEL_SHUTDOWN_TIMEOUT_MS` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python heartbeat interval: 5s (`ensureKernelHeartbeat()` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python external gateway availability check timeout: 5s (`AbortSignal.timeout(5000)` in `packages/coding-agent/src/eval/py/kernel.ts`)\n- Python auto-restart budget: one restart per retained session before hard failure (`restartCount > 1` in `packages/coding-agent/src/eval/py/executor.ts`)\n\n## Errors\n\n- Zod validation rejects malformed `cells` arrays before `execute()` runs (missing `language`/`code`, out-of-range `timeout`, empty `cells`).\n- Missing session without proxy executor throws `ToolError(\"Eval tool requires a session when not using proxy executor\")`.\n- Disabled/unavailable backends throw `ToolError` from `resolveBackend()`:\n - `eval.py = false` and a `py` cell is requested\n - `eval.js = false` and a `js` cell is requested\n - Python kernel unavailable and a `py` cell is requested\n- JS runtime exceptions are converted into text output plus `exitCode: 1`; cancellations return `cancelled: true` and may append `Command timed out`.\n- Python execution errors from the kernel become text output and `exitCode: 1`; later cells are skipped.\n- Python stdin requests are treated as errors with the message `Kernel requested stdin; interactive input is not supported.`\n- Cancellation is returned, not thrown, once backend execution has started. The tool formats it as a cell failure and sets `details.isError = true`.\n- If output truncates, the tool still succeeds; truncation is surfaced through `details.meta` and artifact-backed full output when available.\n\n## Shared executor trade-offs\n\n- Parent agents and subagents share eval state bidirectionally when a subagent inherits the parent's executor id. Mutations in either direction are visible to the other participant.\n- Async regions of concurrent runs can interleave. Synchronous JS still blocks the VM event loop; synchronous Python still contends on the GIL.\n- Cancelling one run is destructive to the shared backend executor. This is intentional: JS worker termination and Python SIGINT/subprocess shutdown are the only reliable way to interrupt arbitrary user code.\n- `reset: true` is destructive for every live run on that backend session id. New starts on that backend are rejected while reset is in flight.\n\n## Notes\n\n- Backend selection is now strictly explicit per cell: `language` must be `\"py\"` or `\"js\"`. The previous `*** Cell` header parser, the `eval.lark` constrained grammar, and the sniffer-based fallback have all been removed.\n- `EvalTool.customFormat` no longer exists. Tool calls flow through the standard JSON schema; there is no Lark-constrained sampling path.\n- `tool.<name>()` exists in both JS and Python. Python calls route through a per-run loopback bridge keyed by the current cell id.\n- JS helper paths reject protocol URIs (`://`) in `resolvePath()`; the JS prelude is filesystem-only unless the code calls `tool.read(...)` or another tool explicitly.\n- Python helper `output(...)` depends on `PI_SESSION_FILE`; it fails outside a session-backed run.\n- `display()` can produce text and structured outputs from the same value; the renderer prefers markdown over `text/plain` when both exist.\n- JS static imports are rewritten only at top level. Nested imports stay invalid and surface normal JS syntax/runtime errors.\n- `EvalTool` is `concurrency = \"exclusive\"` within one agent session, but parent and subagent sessions can run eval concurrently when they share an inherited executor id.\n- The tool description shown to the model is templated by backend availability (`getEvalToolDescription()`); if Python is unavailable, the prompt omits Python-specific instructions.\n",
|
|
77
76
|
"tools/find.md": "# find\n\n> Find filesystem paths by glob; use `search` when you need content matches instead of path matches.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/find.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/find.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/path-utils.ts` — normalize inputs; split base path vs glob.\n - `packages/coding-agent/src/tools/list-limit.ts` — apply result-count caps.\n - `packages/coding-agent/src/session/streaming-output.ts` — truncate text output at byte cap.\n - `packages/coding-agent/src/tools/tool-result.ts` — build `content` and `details.meta`.\n - `packages/coding-agent/src/tools/output-meta.ts` — encode limit / truncation metadata.\n - `packages/coding-agent/src/tools/tool-errors.ts` — map user-facing tool errors.\n - `packages/coding-agent/src/tools/index.ts` — register the built-in local implementation.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `paths` | `string[]` | Yes | One or more globs, files, or directories. Empty strings are rejected. Multiple entries may be merged into one brace-union search when their base paths can be resolved together. |\n| `hidden` | `boolean` | No | Whether hidden files are included. Defaults to `true` (`hidden ?? true`). |\n| `limit` | `number` | No | Max returned paths. Defaults to `1000`. Must be a finite positive number; non-integers are floored. |\n\n## Outputs\nThe tool returns a single text block plus structured `details`.\n\n- Success text: newline-delimited paths, one per line, relative to the session cwd when possible; absolute when outside cwd. Exact file inputs return that file path as one line.\n- Empty result text: `No files found matching pattern`.\n- Multi-path partial miss: appends `Skipped missing paths: ...` after the result block, or after the empty-result line.\n- `details` may include:\n - `scopePath`: display form of the searched root or merged roots.\n - `fileCount`: number of paths returned after result limiting.\n - `files`: returned paths as an array.\n - `truncated`: whether result count or byte truncation occurred.\n - `resultLimitReached`: reached result limit.\n - `missingPaths`: skipped missing inputs in multi-path calls.\n - `truncation` / `meta.limits`: structured truncation and limit metadata for renderers.\n- Streaming: when the runtime supplies `onUpdate`, the local implementation emits incremental newline-delimited text snapshots during globbing, throttled to 200 ms.\n\n## Flow\n1. `FindTool.execute()` normalizes each `paths` entry with `normalizePathLikeInput()` and `/\\\\/g -> \"/\"` (`packages/coding-agent/src/tools/find.ts`). Empty normalized entries fail with `` `paths` must contain non-empty globs or paths ``.\n2. For multi-path local calls, `partitionExistingPaths(..., parseFindPattern)` (`packages/coding-agent/src/tools/path-utils.ts`) stats each base path. Missing entries are skipped; if all are missing, the tool throws `Path not found: ...`. Single missing paths still hard-fail.\n3. The tool tries `resolveExplicitFindPatterns()` to merge multiple inputs into one search rooted at a common base path. If that does not apply, it parses one input with `parseFindPattern()`.\n4. `parseFindPattern()` determines `(basePath, globPattern, hasGlob)`:\n - no glob chars (`*`, `?`, `[`, `{`) => search that path with implicit `**/*`.\n - glob in the first segment => search from `.` and, unless the pattern already starts with `**/`, prefix it with `**/`.\n - glob later in the path => split at the first glob-bearing segment.\n5. `resolveToCwd()` converts the base path to an absolute path under the session cwd. A resolved `/` is rejected with `Searching from root directory '/' is not allowed`.\n6. `limit` is defaulted to `DEFAULT_LIMIT` (`1000`) and validated as a positive finite integer. `hidden` defaults to `true`. The tool also creates a 5 s timeout via `AbortSignal.timeout(GLOB_TIMEOUT_MS)`.\n7. Execution then branches:\n - **Custom operations branch**: if `FindToolOptions.operations.glob` exists, the tool checks existence with `operations.exists()`, short-circuits exact-file inputs via `operations.stat()` when available, then calls `operations.glob(globPattern, searchPath, { ignore: [\"**/node_modules/**\", \"**/.git/**\"], limit })`.\n - **Built-in local branch**: the tool stats `searchPath`. Exact-file inputs return immediately. Directory inputs call `natives.glob()` with `fileType: File`, `hidden`, `maxResults: limit`, `sortByMtime: true`, `gitignore: true`, and the combined abort signal.\n8. In the local branch, optional `onMatch` callbacks convert each match to a cwd-relative display path and emit throttled progress updates.\n9. After native glob returns, JS sorts `result.matches` by `mtime` descending (`(b.mtime ?? 0) - (a.mtime ?? 0)`) before formatting paths.\n10. `buildResult()` applies `applyListLimit()` to cap the array again at `limit`, joins paths with `\\n`, then runs `truncateHead()` with `maxLines: Number.MAX_SAFE_INTEGER`. In practice this leaves the 50 KB byte cap in place while disabling the default 3000-line cap.\n11. `toolResult()` packages text plus `details`, and records result-limit / truncation metadata for renderers.\n\n## Modes / Variants\n- **Exact file path**: if the parsed input has no glob and the resolved path stats as a file, output is that one path.\n- **Directory path**: if the parsed input has no glob and stats as a directory, the tool searches it with implicit `**/*`.\n- **Single glob path**: one input parsed by `parseFindPattern()`.\n- **Merged multi-path search**: multiple inputs resolved by `resolveExplicitFindPatterns()` into one brace-union glob rooted at a common base path.\n- **Partial multi-path search with missing inputs**: local multi-path calls skip missing base paths and surface them as `missingPaths` / `Skipped missing paths: ...`.\n- **Custom delegated search**: uses injected `FindOperations` instead of local fs + native glob.\n\n## Side Effects\n- Filesystem\n - Stats the resolved base path, and in local multi-path mode stats every candidate base path up front.\n - Does not write files.\n- Subprocesses / native bindings\n - Built-in local mode calls the native `@oh-my-pi/pi-natives` glob implementation.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Emits structured progress updates when `onUpdate` is provided.\n - Adds truncation / limit metadata to the tool result.\n- Background work / cancellation\n - Local globbing is cancellable through the caller abort signal plus an internal 5 s timeout.\n\n## Limits & Caps\n- Default result limit: `1000` (`DEFAULT_LIMIT` in `packages/coding-agent/src/tools/find.ts`).\n- Local glob timeout: `5000` ms (`GLOB_TIMEOUT_MS` in `packages/coding-agent/src/tools/find.ts`).\n- Output byte cap: `50 * 1024` bytes (`DEFAULT_MAX_BYTES` in `packages/coding-agent/src/session/streaming-output.ts`).\n- Default generic line cap in `truncateHead()` is `3000`, but `find` overrides `maxLines` to `Number.MAX_SAFE_INTEGER`, so byte size — not line count — is the practical output truncation cap.\n- Streaming update throttle: `200` ms between `onUpdate` emissions.\n- Sort order: most recent `mtime` first in the built-in local branch and promised in the prompt. The tool re-sorts in JS even though native glob receives `sortByMtime: true` so native code can still stop early at `maxResults`.\n\n## Errors\n- User-facing `ToolError`s from `FindTool.execute()` include:\n - `` `paths` must contain non-empty globs or paths ``\n - `Path not found: ...`\n - `Searching from root directory '/' is not allowed`\n - `Limit must be a positive number`\n - `Path is not a directory: ...`\n - `find timed out after 5s`\n- If the caller aborts, the local branch converts `AbortError` into `ToolAbortError`.\n- Non-`ENOENT` stat failures and other unexpected errors are rethrown.\n- Empty matches are not errors; they return the no-files text result.\n\n## Notes\n- Reach for `find` for filename / path discovery. Reach for `search` when the selection criterion is file contents or regex matches; `search` takes a `pattern` and returns anchored content matches, while `find` only returns matching paths (`packages/coding-agent/src/prompts/tools/find.md`, `packages/coding-agent/src/prompts/tools/search.md`).\n- Bare top-level globs are made recursive. `*.ts` is parsed as base `.` plus glob `**/*.ts`; `src/*.ts` stays rooted at `src` with a non-recursive `*.ts` segment; `src/**/*.ts` preserves explicit recursion.\n- `.gitignore` is always enabled in the built-in local branch (`gitignore: true`). There is no model-facing flag to disable it.\n- `hidden` defaults to `true`; hidden-file exclusion is opt-out, not opt-in.\n- Multi-path missing-input tolerance only applies in the built-in local branch. The custom-operations branch hard-fails the first missing `searchPath` it checks.\n- The custom `FindOperations.glob()` hook receives `ignore` and `limit`, but not the `hidden` flag or an explicit `.gitignore` toggle. A remote delegate must account for that itself if it wants parity with the local branch.\n- Built-in local globbing asks the native layer for `fileType: File`, so recursive directory searches yield files, not directories. Directory outputs are only possible through exact-path passthrough or custom delegates that return them.\n",
|
|
78
77
|
"tools/github.md": "# github\n\n> Dispatch GitHub CLI operations for repositories, issues, pull requests, search, and Actions run watching.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/gh.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/github.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/gh-format.ts` — shorten commit SHAs for summaries.\n - `packages/coding-agent/src/tools/gh-renderer.ts` — TUI rendering, especially `run_watch` live/result views.\n - `packages/coding-agent/src/utils/git.ts` — `gh`/`git` process wrappers, repo locking, branch config writes.\n - `packages/utils/src/dirs.ts` — base directory for dedicated PR worktrees.\n - `packages/coding-agent/src/sdk.ts` — session artifact allocation hook.\n - `packages/coding-agent/src/session/artifacts.ts` — artifact filename format `<id>.<toolType>.log`.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `op` | `\"repo_view\" \\| \"pr_create\" \\| \"pr_checkout\" \\| \"pr_push\" \\| \"search_issues\" \\| \"search_prs\" \\| \"search_code\" \\| \"search_commits\" \\| \"search_repos\" \\| \"run_watch\"` | Yes | Dispatch selector. `GithubTool.execute()` switches only on this field. |\n| `repo` | `string` | No | `owner/repo` override. Ignored when the identifier argument is already a full GitHub URL. For `search_issues`/`search_prs`/`search_code`/`search_commits`, defaults to the current checkout's `owner/repo` when omitted (skipped when the query already contains a `repo:`/`org:`/`user:`/`owner:` qualifier or when current-repo resolution fails). Required in practice when `gh` cannot infer repo context from the current checkout. |\n| `branch` | `string` | No | Used by `repo_view`, `pr_push`, and `run_watch`. `run_watch` falls back to current git branch when `run` is omitted; `pr_push` falls back to current branch. |\n| `pr` | `string \\| string[]` | No | Used by `pr_checkout`. Each item may be a PR number, branch name, or GitHub PR URL. Array form enables batching. Omitted means current branch PR. |\n| `force` | `boolean` | No | Used only by `pr_checkout`. Defaults to `false`; allows resetting an existing `pr-<number>` local branch to the PR head commit. |\n| `forceWithLease` | `boolean` | No | Used only by `pr_push`; passed through to git push. |\n| `title` | `string` | No | Used only by `pr_create`. Required unless `fill` is `true`. |\n| `body` | `string` | No | Used only by `pr_create`. Mutually exclusive with `fill`. Empty/omitted body becomes `--body \"\"` to suppress the interactive editor. Non-empty body is written to a temp file and passed as `--body-file`. |\n| `base` | `string` | No | Used only by `pr_create`; passed as `--base`. |\n| `head` | `string` | No | Used only by `pr_create`; passed as `--head`. |\n| `draft` | `boolean` | No | Used only by `pr_create`. Defaults to `false`. |\n| `fill` | `boolean` | No | Used only by `pr_create`. Defaults to `false`. Mutually exclusive with `title` and `body`. |\n| `reviewer` | `string[]` | No | Used only by `pr_create`; each entry becomes `--reviewer`. |\n| `assignee` | `string[]` | No | Used only by `pr_create`; each entry becomes `--assignee`. |\n| `label` | `string[]` | No | Used only by `pr_create`; each entry becomes `--label`. |\n| `query` | `string` | No | Used by all `search_*` ops. Required there. |\n| `limit` | `number` | No | Used by all `search_*` ops. Defaults to `10`, floored, clamped to `50`, and must be `> 0`. |\n| `run` | `string` | No | Used only by `run_watch`. Must be a numeric run ID or full GitHub Actions run URL. |\n| `tail` | `number` | No | Used only by `run_watch`. Defaults to `15`, floored, clamped to `200`, and must be `> 0`. |\n\n## Outputs\nThe tool returns a single text result built by `buildTextResult()` in `packages/coding-agent/src/tools/gh.ts`.\n\n- `content`: one text block. Multi-item ops join sections with blank lines and `---` separators.\n- `sourceUrl`: set for single repo/PR/run results when a canonical URL is known.\n- `details`: optional structured metadata used by the TUI renderer.\n - Common fields: `artifactId`, `repo`, `branch`, `worktreePath`, `remote`, `remoteBranch`, `headSha`, `runId`, `runIds`, `status`, `conclusion`, `failedJobs`.\n - `pr_checkout` adds `checkouts: GhPrCheckoutSummary[]`.\n - `run_watch` adds `watch: GhRunWatchViewDetails`, which drives the custom live/result renderer in `packages/coding-agent/src/tools/gh-renderer.ts`.\n- Artifact trailer: when `artifactId` is present, the text body gets an appended line like `Full failed-job logs: artifact://<id>`.\n - `run_watch` allocates artifacts with `session.allocateOutputArtifact(\"github\")`; persistent sessions therefore save failed-log bodies as `<artifact-dir>/<id>.github.log`.\n\n`run_watch` is the only streaming op. It emits `onUpdate` snapshots while polling, then returns one final text result.\n\n## Flow\n1. `GithubTool.createIf()` exposes the tool only when `git.github.available()` finds `gh` on `PATH`.\n2. `GithubTool.execute()` wraps dispatch in `untilAborted()` and switches on `params.op`.\n3. Each op normalizes optional strings, arrays, booleans, and numeric caps locally in `packages/coding-agent/src/tools/gh.ts`.\n4. CLI execution goes through `git.github.run/json/text()` in `packages/coding-agent/src/utils/git.ts`:\n - spawns `gh ...` with `Bun.spawn()`;\n - trims stdout/stderr unless `trimOutput: false`;\n - maps common auth/repo-context failures into tool-facing `ToolError` messages;\n - `json()` rejects empty or invalid JSON.\n5. Read-style ops (`repo_view`, `search_*`) fetch JSON and format Markdown-like text summaries. Single-issue and single-PR views were moved out of the tool and now resolve through the `issue://` / `pr://` internal URL schemes, which share the same SQLite cache.\n7. PR diffs moved out of the tool. `pr://<N>/diff` lists changed files, `pr://<N>/diff/<i>` slices a single file, and `pr://<N>/diff/all` returns the full unified diff — see `docs/tools/read.md`. All three variants share one `gh pr diff` invocation through the `pr-diff` cache row.\n8. `pr_checkout` resolves PR metadata first, then enters `git.withRepoLock()` before any git mutation so parallel checkout calls for the same primary repo do not race on shared `.git` state.\n9. `pr_push` reads PR head metadata back from git branch config, derives a refspec, then pushes with `git.push()`.\n10. `pr_create` shells out once, then best-effort re-reads the created PR for a richer summary.\n11. `run_watch` chooses either run mode (`run` supplied) or commit mode (`run` omitted), polls GitHub Actions APIs every 3 seconds, emits streaming updates, and may save a full failed-log artifact before returning.\n12. Final text goes through `toolResult().text(...)`; if `session.allocateOutputArtifact()` returns a slot, failed-log text is persisted with `Bun.write()`.\n\n## Modes / Variants\n\n### `repo_view`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` |\n| Optional fields | `repo`, `branch` |\n| `gh` command | `gh repo view [<repo>] [--branch <branch>] --json <GH_REPO_FIELDS>` |\n| Batching | None |\n| Output | `# <owner/repo>` header, description, URL, default branch, requested branch, visibility, permission, primary language, stars, forks, archive/fork flags, updated timestamp, homepage, topics. `sourceUrl = data.url`. |\n\nIf `repo` is omitted, `gh` repository resolution is used.\n\nSingle-issue and single-PR reads live in the `issue://<N>` / `pr://<N>` URL schemes (see `docs/tools/read.md`). They share `~/.omp/cache/github-cache.db` (override via `OMP_GITHUB_CACHE_DB`) and the `github.cache.softTtlSec` / `github.cache.hardTtlSec` / `github.cache.enabled` settings. The cache retains rendered Markdown plus the raw JSON payload returned by `gh`, including private bodies, comments, reviews, and review comments when comments are enabled; rows are scoped by the local GitHub credential fingerprint. Root and repo-scoped reads (`issue://`, `pr://owner/repo`) issue a live `gh issue list` / `gh pr list` for browsing; query params `state`, `limit`, `author`, `label` pass through to `gh` (`issue://` accepts `state=open|closed|all`; `pr://` also accepts `merged`). PR diffs ride the same cache under `pr://<N>/diff[/…]`: the listing, full diff, and per-file slices all share one `pr-diff` row keyed by repo and PR number.\n\n### `pr_create`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` plus either `fill=true` or `title` |\n| Optional fields | `repo`, `title`, `body`, `base`, `head`, `draft`, `fill`, `reviewer[]`, `assignee[]`, `label[]` |\n| `gh` command | `gh pr create ...` with flags assembled from provided fields |\n| Batching | None |\n| Output | `# Created Pull Request ...` summary with URL, state, draft flag, base/head, author, created time, labels, optional body. `sourceUrl` is the created PR URL. |\n\nBranches:\n- `fill && (title || body !== undefined)` throws.\n- Non-empty `body` is written under a temp dir `gh-pr-body-*` in `os.tmpdir()`, passed as `--body-file`, then removed in `finally`.\n- After creation, the tool parses the returned URL and best-effort runs `gh pr view <number> --repo <repo> --json <GH_PR_FIELDS_NO_COMMENTS>`; failures there are swallowed.\n\n### `pr_checkout`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` |\n| Optional fields | `repo`, `pr`, `force` |\n| `gh` command | For each requested PR: `gh pr view [<pr>] [--repo <repo>] --json <GH_PR_CHECKOUT_FIELDS>`; cross-repo PRs may also call `gh repo view <headRepository> --json <GH_REPO_CLONE_FIELDS>`. |\n| Batching | Yes. `pr` may be `string[]`; each PR is resolved in parallel, but git mutations are serialized per primary repo by `git.withRepoLock()`. |\n| Output | Single PR: checkout/worktree summary plus `details.repo`, `details.branch`, `details.worktreePath`, `details.remote`, `details.remoteBranch`, `details.checkouts`. Batched: `# <n> Pull Request Worktrees (...)` plus one section per PR and aggregated `details.checkouts`. |\n\nWorktree and metadata behavior:\n- Local branch name is always `pr-<number>`.\n- Worktree path is `path.join(getWorktreesDir(), encodeRepoPathForFilesystem(primaryRepoRoot), localBranch)`, where `getWorktreesDir()` is `~/.omp/wt`; effective path is `~/.omp/wt/<encoded-primary-repo-root>/pr-<number>`.\n- Existing worktree detection is by branch ref `refs/heads/pr-<number>` from `git.worktree.list()`.\n- New worktree creation calls `git.worktree.add(repoRoot, finalWorktreePath, localBranch, { signal })` after verifying the path is neither already registered nor already present on disk.\n- For same-repo PRs, remote is `origin`. For cross-repo PRs, the tool resolves a clone URL for the head repo, reuses an existing remote with the same URL when possible, or creates `fork-<owner>` / `fork-<owner>-<n>`.\n- The branch push metadata is persisted with `git config` under the repository's shared `.git/config` as:\n - `branch.pr-<number>.remote`\n - `branch.pr-<number>.merge`\n - `branch.pr-<number>.pushRemote`\n - `branch.pr-<number>.ompPrHeadRef`\n - `branch.pr-<number>.ompPrUrl`\n - `branch.pr-<number>.ompPrIsCrossRepository`\n - `branch.pr-<number>.ompPrMaintainerCanModify`\n- If `refs/heads/pr-<number>` already exists at a different commit, checkout fails unless `force=true`, in which case `git branch --force` resets it to the fetched PR head.\n- If a matching worktree already exists, the tool reuses it and reports `reused: true`.\n\n### `pr_push`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` |\n| Optional fields | `branch`, `forceWithLease` |\n| `gh` command | None. This path uses git, not `gh`. |\n| Batching | None |\n| Output | `# Pushed Pull Request Branch` summary with local branch, remote, remote branch, remote URL, PR URL, and force-with-lease flag. `sourceUrl = prUrl` when known. |\n\nPush target resolution reads the `branch.<name>.ompPrHeadRef`, `pushRemote`/`remote`, `ompPrUrl`, `ompPrMaintainerCanModify`, and `ompPrIsCrossRepository` git-config keys written by `pr_checkout`. If the current checked-out branch matches the target branch, the source ref is `HEAD`; otherwise it pushes `refs/heads/<branch>`. The refspec is `HEAD:refs/heads/<headRef>` or `refs/heads/<branch>:refs/heads/<headRef>`.\n\n### `search_issues`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op`, `query` |\n| Optional fields | `repo`, `limit` |\n| `gh` command | `gh api -X GET /search/issues -f q=\"<query> [repo:<repo>] is:issue\" -F per_page=<limit>` |\n| Batching | None |\n| Output | `# GitHub issues search`, echoed query, optional repo, result count, then one bullet per issue with repo/state/author/labels/timestamps/URL. |\n\n`repo` defaults to the current checkout's `owner/repo` via `resolveSearchRepoScope()` when omitted. The default is suppressed when the query already contains a leading `repo:`/`org:`/`user:`/`owner:` qualifier or when `gh repo view` fails to resolve the current checkout (e.g. outside a github remote).\n\n### `search_prs`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op`, `query` |\n| Optional fields | `repo`, `limit` |\n| `gh` command | `gh api -X GET /search/issues -f q=\"<query> [repo:<repo>] is:pr\" -F per_page=<limit>` |\n| Batching | None |\n| Output | Same shape as `search_issues`, labeled as pull requests. |\n\n`repo` defaults to the current checkout's `owner/repo` as in `search_issues`.\n\n### `search_code`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op`, `query` |\n| Optional fields | `repo`, `limit` |\n| `gh` command | `gh api -X GET /search/code -f q=\"<query> [repo:<repo>]\" -F per_page=<limit> -H \"Accept: application/vnd.github.text-match+json\"` |\n| Batching | None |\n| Output | `# GitHub code search`, result count, then one bullet per match with path, repo, short commit SHA, URL, and first normalized text-match fragment line when present. |\n\n`repo` defaults to the current checkout's `owner/repo` as in `search_issues`.\n\n### `search_commits`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op`, `query` |\n| Optional fields | `repo`, `limit` |\n| `gh` command | `gh api -X GET /search/commits -f q=\"<query> [repo:<repo>]\" -F per_page=<limit>` |\n| Batching | None |\n| Output | `# GitHub commits search`, result count, then one bullet per commit: short SHA + first commit-message line, repo, author, date, URL. |\n\n`repo` defaults to the current checkout's `owner/repo` as in `search_issues`.\n\n### `search_repos`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op`, `query` |\n| Optional fields | `limit` |\n| `gh` command | `gh api -X GET /search/repositories -f q=\"<query>\" -F per_page=<limit>` |\n| Batching | None |\n| Output | `# GitHub repositories search`, result count, then one bullet per repo with first description line, language, stars, forks, open issues, visibility, archive/fork flags, updated time, URL. |\n\n`repo` is intentionally not used for this op.\n\n### `run_watch`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` |\n| Optional fields | `repo`, `branch`, `run`, `tail` |\n| `gh` command | Repo resolution: `gh repo view --json nameWithOwner -q .nameWithOwner` when `repo` and run URL repo are both absent. Single-run mode uses `gh api --method GET /repos/<repo>/actions/runs/<runId>` and `gh api --method GET /repos/<repo>/actions/runs/<runId>/jobs`. Commit mode uses `gh api --method GET /repos/<repo>/branches/<branch>`, `gh api --method GET /repos/<repo>/actions/runs`, `gh api --method GET /repos/<repo>/actions/runs/<runId>/jobs`, and `gh api /repos/<repo>/actions/jobs/<jobId>/logs` for failed jobs. |\n| Batching | Implicit batching only in commit mode: all workflow runs for one commit are tracked together. |\n| Output | Streaming watch snapshots via `onUpdate`, then a final text report. On failure, appends `Full failed-job logs: artifact://<id>` and sets `details.artifactId`. |\n\nWatch flow:\n- `run` parsing accepts either a decimal run ID or a full run URL. URL repo must match explicit `repo` when both are given.\n- Poll interval is fixed at 3 seconds (`RUN_WATCH_INTERVAL_DEFAULT`).\n- Failure grace period is fixed at 5 seconds (`RUN_WATCH_GRACE_DEFAULT`). When any failed job appears before completion, the tool emits a note, waits once, re-fetches state, then collects logs so concurrent failures are included.\n- Failed-job logs are fetched with `gh api /repos/<repo>/actions/jobs/<jobId>/logs` via `git.github.run()`, not `json()`. Non-zero exit leaves `available: false` instead of failing the whole watch.\n- Inline result includes only the last `tail` lines per failed job. The saved artifact contains full logs (`mode: \"full\"`).\n- In commit mode, success is intentionally double-checked: once all known runs are successful, the tool waits one more poll interval and succeeds only if the set of run IDs is unchanged. This avoids returning before late workflow runs appear for the same commit.\n- `details.watch` drives a specialized renderer in `packages/coding-agent/src/tools/gh-renderer.ts`; non-watch results fall back to generic text rendering.\n\n## Side Effects\n- Filesystem\n - `pr_create` may create a temp dir under `os.tmpdir()` named `gh-pr-body-*`, write `body.md`, then remove the dir in `finally`.\n - `pr_checkout` may create directories under `~/.omp/wt/<encoded-primary-repo-root>/` and add git worktrees there.\n - `run_watch` may write a session artifact with full failed-job logs.\n- Network\n - Every op shells out to `gh`, which then talks to GitHub APIs except `pr_push`.\n - `pr_push` uses git network transport to the configured remote.\n- Subprocesses / native bindings\n - All `gh` calls use `Bun.spawn([\"gh\", ...args])`.\n - `pr_checkout` and `pr_push` also invoke git helpers from `packages/coding-agent/src/utils/git.ts`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - `run_watch` consumes `session.allocateOutputArtifact()` when failed-job logs are persisted.\n - Returned `details` objects carry run/checkouts metadata for the renderer/UI.\n- User-visible prompts / interactive UI\n - `gh` interactive editor fallback is suppressed for `pr_create` by forcing either `--body-file` or `--body \"\"`.\n - `gh-renderer` provides compact headers for all ops and a custom live watch view for `run_watch`.\n- Background work / cancellation\n - `run_watch` loops until success/failure and uses `scheduler.wait()` between polls.\n - `GithubTool.execute()` is wrapped in `untilAborted()`; `git.github.run()` forwards the abort signal into `Bun.spawn()`.\n\n## Limits & Caps\n- Search result default: `10` (`SEARCH_LIMIT_DEFAULT` in `packages/coding-agent/src/tools/gh.ts`).\n- Search result max: `50` (`SEARCH_LIMIT_MAX`).\n- PR file preview inside the `pr://` view: first `50` files only (`FILE_PREVIEW_LIMIT` in `gh.ts`).\n- Run-watch poll interval: `3s` (`RUN_WATCH_INTERVAL_DEFAULT`).\n- Run-watch failure grace period: `5s` (`RUN_WATCH_GRACE_DEFAULT`).\n- Run-watch failed-log tail default: `15` lines (`RUN_WATCH_TAIL_DEFAULT`).\n- Run-watch failed-log tail max: `200` lines (`RUN_WATCH_TAIL_MAX`).\n- PR review comments page size: `100` (`REVIEW_COMMENTS_PAGE_SIZE`).\n- Actions jobs page size: `100` (`RUN_JOBS_PAGE_SIZE`).\n- Search and tail numeric inputs are floored with `Math.floor()`, clamped to the max, and rejected when non-finite or `<= 0`.\n- `pr_checkout` batch fan-out is unbounded in tool code; all requested PRs are launched with `Promise.all()`.\n\n## Errors\n- Tool creation is skipped entirely when `gh` is not installed.\n- `git.github.run()` throws `ToolError(\"GitHub CLI (gh) is not installed...\")` if `gh` is missing at execution time.\n- `git.github.text/json()` map common failures to model-facing messages:\n - not authenticated → `GitHub CLI is not authenticated. Run \\`gh auth login\\`.`\n - missing repo context without explicit `repo` → `GitHub repository context is unavailable. Pass \\`repo\\` explicitly or run the tool inside a GitHub checkout.`\n - otherwise stderr/stdout text, or fallback `GitHub CLI command failed: gh ...`\n- `json()` also throws on empty stdout or invalid JSON.\n- Local validation errors throw `ToolError`, including:\n - missing required per-op fields (`query`, `title unless fill=true`)\n - invalid numeric `limit` / `tail`\n - invalid `run` format\n - `fill` combined with `title` or `body`\n - missing git repo / branch / HEAD context for checkout, push, or watch\n - `pr_push` on a branch without `ompPrHeadRef` metadata\n - conflicting existing worktree path or branch without `force`\n- `run_watch` treats failed-job log fetches specially: missing log content does not fail the watch; it marks that log `available: false` and prints `Log tail unavailable.` / `Full log unavailable.`.\n- `pr_create` swallows only the post-create best-effort `gh pr view` refresh; the create step itself still fails normally.\n\n## Notes\n- `appendRepoFlag()` intentionally skips `--repo` when the identifier argument is already a full GitHub URL; that lets `gh` derive repo/number from the URL.\n- `normalizePrIdentifierList()` accepts `reviewer`, `assignee`, and `label` arrays too; the helper name is broader than its callers.\n- `pr_push` depends on `pr_checkout` having run first for that local branch; there is no alternate metadata source.\n- `pr_checkout` stores push metadata in branch config, not in the worktree directory. Reusing the same `pr-<number>` branch reuses those config keys.\n- Worktree write serialization is keyed by the primary repo root, not the current worktree path, because git worktrees share `.git/config`, `packed-refs`, commit-graph, and worktree metadata files.\n- `search_repos` is the only search op that never forwards `repo`; repository scoping must be expressed in the query itself.\n- `run_watch` success on commit mode means “all observed runs succeeded and no additional runs appeared one poll later”, not merely “latest poll looked green”.\n- The TUI renderer collapses failed log previews unless the result view is expanded; the underlying text result still contains the same tailed lines plus any artifact reference.\n",
|
|
79
78
|
"tools/inspect_image.md": "# inspect_image\n\n> Send a local image file to a vision-capable model and return text analysis.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/inspect-image.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/inspect-image.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/inspect-image-renderer.ts` — TUI call/result rendering.\n - `packages/coding-agent/src/utils/image-loading.ts` — path resolution, type detection, size gate, optional resize.\n - `packages/coding-agent/src/utils/image-resize.ts` — downscale and recompress oversized images.\n - `packages/coding-agent/src/tools/path-utils.ts` — resolve input path relative to session cwd.\n - `packages/utils/src/mime.ts` — detect supported image formats from file bytes.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `path` | `string` | Yes | Image path passed to `loadImageInput`; resolved relative to `session.cwd` by `resolveReadPath(...)`. |\n| `question` | `string` | Yes | User prompt sent as a text content block alongside the image. |\n\n## Outputs\nThe tool returns a single `AgentToolResult`:\n\n- `content`: one text block, `[{ type: \"text\", text }]`, where `text` is the concatenated assistant text content from the model response.\n- `details`:\n - `model`: `<provider>/<id>` of the selected model.\n - `imagePath`: resolved filesystem path returned by `loadImageInput(...)`.\n - `mimeType`: MIME type actually sent to the model after optional resize/re-encode.\n\nModel-visible output is single-shot, not streamed by this tool.\n\nTUI rendering adds presentation-only truncation from `packages/coding-agent/src/tools/inspect-image-renderer.ts`:\n\n- call preview truncates `question` to 100 columns,\n- result view shows 4 lines collapsed or 16 lines expanded,\n- each rendered output line is truncated to 120 columns,\n- footer metadata shows `model · mimeType` when present.\n\n## Flow\n1. `InspectImageTool.execute(...)` rejects immediately if `images.blockImages` is enabled in session settings.\n2. It reads `session.modelRegistry`; missing registry, empty registry, missing API key, or unresolved model each raise `ToolError` from `packages/coding-agent/src/tools/inspect-image.ts`.\n3. Model selection tries, in order, `pi/vision`, `pi/default`, the active model string from the session, then `availableModels[0]`. `expandRoleAlias(...)` and `resolveModelFromString(...)` handle each lookup.\n4. The chosen model must advertise `input.includes(\"image\")`; otherwise execution fails before reading the file.\n5. `loadImageInput(...)` in `packages/coding-agent/src/utils/image-loading.ts` resolves the path with `resolveReadPath(...)`, detects MIME type with `readImageMetadata(...)`, and rejects files larger than `MAX_IMAGE_INPUT_BYTES` (`20 * 1024 * 1024`, 20 MiB) using `ImageInputTooLargeError`.\n6. `readImageMetadata(...)` in `packages/utils/src/mime.ts` inspects file headers only. Supported detected MIME types are `image/png`, `image/jpeg`, `image/gif`, and `image/webp`.\n7. If `images.autoResize` is true, `loadImageInput(...)` calls `resizeImage(...)`. Resize failures are swallowed there and the original bytes are kept.\n8. If MIME detection returned no supported image type, `execute(...)` throws `ToolError(\"inspect_image only supports PNG, JPEG, GIF, and WEBP files detected by file content.\")`.\n9. The tool calls `completeSimple(...)` with one user message containing two content parts in order:\n - `{ type: \"image\", data: imageInput.data, mimeType: imageInput.mimeType }`\n - `{ type: \"text\", text: params.question }`\n10. `systemPrompt` is a one-element array rendered from `packages/coding-agent/src/prompts/tools/inspect-image-system.md`.\n11. If the model response stop reason is `error` or `aborted`, the tool maps that to `ToolError`.\n12. `extractResponseText(...)` concatenates only `text` content blocks from the assistant message, trims the result, and fails if nothing remains.\n13. Success returns the text plus `details`; `inspectImageToolRenderer` formats the result for the TUI.\n\n## Modes / Variants\n- **Original image path**: `images.autoResize` disabled. The original file bytes are base64-encoded and sent with the detected MIME type.\n- **Auto-resized path**: `images.autoResize` enabled. `resizeImage(...)` may downscale and re-encode the image before upload.\n- **Unsupported image path**: file exists but header sniffing does not identify PNG/JPEG/GIF/WEBP. The tool returns a `ToolError` before any model call.\n- **Oversize image path**: file size exceeds 20 MiB before upload. The tool returns a `ToolError` before any model call.\n\n## Side Effects\n- Filesystem\n - Resolves and reads the target image from disk.\n - Stats the file once with `Bun.file(...).stat()` and reads it fully with `fs.readFile(...)`.\n- Network\n - Sends the final base64 image payload plus question text to the selected model through `completeSimple(...)`.\n- Session state\n - Reads session settings, active model preferences, cwd, and model registry.\n- Background work / cancellation\n - Passes the caller `AbortSignal` into `completeSimple(...)`.\n - Image preprocessing is local and not cancellation-aware in these helpers.\n\n## Limits & Caps\n- Supported detected input formats: `image/png`, `image/jpeg`, `image/gif`, `image/webp` (`SUPPORTED_IMAGE_MIME_TYPES` in `packages/utils/src/mime.ts`).\n- Metadata sniff cap: `DEFAULT_IMAGE_METADATA_HEADER_BYTES = 256 * 1024` bytes. Format detection only reads up to 256 KiB from the file header.\n- Upload input cap: `MAX_IMAGE_INPUT_BYTES = 20 * 1024 * 1024` bytes (20 MiB) in `packages/coding-agent/src/utils/image-loading.ts`.\n- Auto-resize defaults in `packages/coding-agent/src/utils/image-resize.ts`:\n - `maxWidth: 1568`\n - `maxHeight: 1568`\n - `maxBytes: 500 * 1024` bytes (500 KiB target)\n - `jpegQuality: 75`\n- Resize fast path: if the original image is already within `1568x1568` and within `maxBytes / 4` (125 KiB by default), `resizeImage(...)` returns the original bytes unchanged.\n- Resize quality ladder: after the first encode pass, lossy retries use qualities `[70, 60, 50, 40]`.\n- Resize dimension ladder: if quality reduction still misses the byte target, retries scale dimensions by `[1.0, 0.75, 0.5, 0.35, 0.25]` and stop if either dimension would fall below `100` pixels.\n- First resize pass encodes PNG, JPEG, and WebP, then keeps the smallest encoded buffer. Fallback passes encode JPEG and WebP only, again keeping the smaller output.\n- Renderer caps:\n - `INSPECT_QUESTION_PREVIEW_WIDTH = 100`\n - `INSPECT_OUTPUT_COLLAPSED_LINES = 4`\n - `INSPECT_OUTPUT_EXPANDED_LINES = 16`\n - `INSPECT_OUTPUT_LINE_WIDTH = 120`\n\n## Errors\n- Settings gate:\n - `Image submission is disabled by settings (images.blockImages=true). Disable it to use inspect_image.`\n- Model resolution / capability:\n - `Model registry is unavailable for inspect_image.`\n - `No models available for inspect_image.`\n - `Unable to resolve a model for inspect_image.`\n - `Resolved model <provider>/<id> does not support image input. Configure a vision-capable model for modelRoles.vision.`\n - `No API key available for <provider>/<id>. Configure credentials for this provider or choose another vision-capable model.`\n- Input file:\n - `Image file too large: <size> exceeds <limit> limit.` from `ImageInputTooLargeError`, remapped to `ToolError`.\n - `inspect_image only supports PNG, JPEG, GIF, and WEBP files detected by file content.` when header sniffing fails.\n- Model call:\n - `inspect_image request failed.` if the response stop reason is `error` without a provider message.\n - Provider `errorMessage` is passed through when present.\n - `inspect_image request aborted.` on aborted responses.\n - `inspect_image model returned no text output.` when the assistant message contains no text blocks after filtering.\n\nFailures surface as thrown `ToolError`s from `execute(...)`; the normal success return shape is not used for error reporting.\n\n## Notes\n- The model-facing prompt path on disk is `packages/coding-agent/src/prompts/tools/inspect-image.md`; the assignment's underscore form does not exist.\n- Format support is based on file content, not filename extension. Renaming a non-image file to `.png` does not make it valid.\n- `resolveReadPath(...)` tries macOS-specific path variants: shell-unescaped spaces, AM/PM narrow no-break-space filenames, NFD normalization, and curly-quote variants.\n- `loadImageInput(...)` also computes `textNote`, `dimensionNote`, and final `bytes`, but `inspect_image` does not include those in tool output.\n- Auto-resize can change the MIME type sent to the model. A JPEG or GIF input may be uploaded as PNG, JPEG, or WebP depending on which encoder output is smallest.\n- If `resizeImage(...)` throws or cannot decode the image, `loadImageInput(...)` silently keeps the original base64 payload instead of failing.\n",
|