@oh-my-pi/pi-coding-agent 16.0.0 → 16.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +140 -133
- package/dist/cli.js +250 -218
- package/dist/types/config/model-resolver.d.ts +14 -0
- package/dist/types/config/settings-schema.d.ts +22 -0
- package/dist/types/discovery/helpers.d.ts +7 -0
- package/dist/types/eval/__tests__/prelude-agent.test.d.ts +1 -0
- package/dist/types/exec/non-interactive-env.d.ts +2 -0
- package/dist/types/extensibility/plugins/runtime-config.d.ts +3 -0
- package/dist/types/modes/types.d.ts +5 -0
- package/dist/types/session/agent-session.d.ts +11 -1
- package/dist/types/session/messages.d.ts +3 -0
- package/dist/types/session/session-manager.d.ts +4 -1
- package/dist/types/task/index.d.ts +21 -0
- package/dist/types/tools/github-cache.d.ts +5 -4
- package/dist/types/tools/job.d.ts +1 -0
- package/dist/types/utils/markit.d.ts +8 -0
- package/dist/types/web/search/index.d.ts +2 -2
- package/dist/types/web/search/provider.d.ts +2 -0
- package/package.json +12 -12
- package/src/advisor/__tests__/advisor.test.ts +44 -0
- package/src/cli/args.ts +2 -0
- package/src/collab/host.ts +1 -1
- package/src/config/model-resolver.ts +35 -1
- package/src/config/settings-schema.ts +23 -1
- package/src/discovery/claude-plugins.ts +3 -42
- package/src/discovery/github.ts +189 -6
- package/src/discovery/helpers.ts +11 -0
- package/src/eval/__tests__/prelude-agent.test.ts +73 -0
- package/src/eval/js/shared/prelude.txt +12 -3
- package/src/eval/py/prelude.py +26 -2
- package/src/exec/bash-executor.ts +2 -2
- package/src/exec/non-interactive-env.ts +71 -0
- package/src/extensibility/custom-commands/bundled/review/index.ts +289 -80
- package/src/extensibility/extensions/runner.ts +17 -1
- package/src/extensibility/plugins/loader.ts +157 -23
- package/src/extensibility/plugins/manager.ts +44 -36
- package/src/extensibility/plugins/marketplace/fetcher.ts +32 -34
- package/src/extensibility/plugins/runtime-config.ts +9 -0
- package/src/internal-urls/docs-index.generated.ts +9 -9
- package/src/internal-urls/issue-pr-protocol.ts +8 -4
- package/src/main.ts +5 -1
- package/src/modes/acp/acp-agent.ts +3 -3
- package/src/modes/components/settings-defs.ts +7 -0
- package/src/modes/components/tips.txt +1 -1
- package/src/modes/controllers/extension-ui-controller.ts +4 -3
- package/src/modes/controllers/input-controller.ts +1 -0
- package/src/modes/controllers/selector-controller.ts +7 -0
- package/src/modes/interactive-mode.ts +47 -0
- package/src/modes/rpc/rpc-mode.ts +3 -3
- package/src/modes/runtime-init.ts +2 -1
- package/src/modes/types.ts +5 -0
- package/src/prompts/agents/designer.md +8 -0
- package/src/prompts/review-request.md +1 -1
- package/src/prompts/system/subagent-system-prompt.md +4 -1
- package/src/prompts/tools/eval.md +13 -3
- package/src/prompts/tools/irc.md +1 -1
- package/src/sdk.ts +9 -1
- package/src/session/agent-session.ts +260 -50
- package/src/session/messages.ts +1 -1
- package/src/session/session-manager.ts +3 -1
- package/src/slash-commands/builtin-registry.ts +5 -2
- package/src/system-prompt.ts +7 -1
- package/src/task/executor.ts +105 -8
- package/src/task/index.ts +70 -9
- package/src/tools/github-cache.ts +32 -7
- package/src/tools/job.ts +14 -1
- package/src/utils/lang-from-path.ts +5 -0
- package/src/utils/markit.ts +24 -1
- package/src/web/search/index.ts +2 -2
- package/src/web/search/provider.ts +14 -2
|
@@ -5,7 +5,7 @@ export const EMBEDDED_DOC_FILENAMES: readonly string[] = ["ERRATA-GPT5-HARMONY.m
|
|
|
5
5
|
export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
|
|
6
6
|
"ERRATA-GPT5-HARMONY.md": "# ERRATA — GPT-5 Harmony-Header Leakage\n\nHistorical research note, not a current runtime contract. The statistics below\ncome from the named local stats database snapshot, not from checked-in tests or\nruntime code.\n\n## 1. The problem\n\nOpenAI frames tool calls in the Harmony chat protocol:\n\n```\n<|start|>assistant<|channel|>commentary to=functions.<NAME><|message|>{ARGS}<|call|>\n```\n\n`<|channel|>commentary to=functions.NAME` is the **routing header** —\ncontrol tokens consumed by the runtime to dispatch the call. These\ntokens never appear as content under normal operation; the runtime\nstrips them.\n\nThe defect: gpt-5 models occasionally emit, **as ordinary content\ninside `{ARGS}`**, the **plain-text shadow** of these routing tokens —\nthe same characters without the `<|…|>` brackets — and continue\nproducing more pseudo-routing structure (channel name, body marker,\nmultilingual spam, fake tool-result framing). The contamination lives\ninside the visible tool argument and is dispatched to the tool as if it\nwere intended content.\n\n**Critical detail.** The actual `<|start|>` / `<|channel|>` /\n`<|message|>` / `<|call|>` special tokens almost never appear in tool\nargs. What leaks is the bracket-less spelling — `analysis to=functions.X\ncode …` — because OpenAI applies a logit mask suppressing the\ncontrol-token IDs inside the args region. The mass that would have gone\nto those special tokens redistributes onto the un-bracketed plain-text\nrepresentation the model also learned. This makes the leak structurally\ninvisible to the routing parser and lands it in the tool input verbatim.\n\nManifestation in tool args (real corpus example):\n\n```\n~ add_function(iso, ctx, ns, \"installSystemChangeObserver\",\n os_install_system_change_observer);】【\"】【analysis to=functions.edit\n code above เงินไทยฟรีuser to=functions.edit code …\n```\n\nThe leading code is real and intended. Everything after the first\nnon-Latin token through the next clean structural boundary is corruption.\n\n---\n\n## 2. Observed statistics & failure modes\n\nSource: `~/.omp/stats.db` (`ss_tool_calls`, `ss_assistant_msgs`), through\n2026-05-10. 1.05M tool calls scanned.\n\n### 2.1 Rate\n\n| Model | Leaks in tool args | Calls | per million |\n| ------------- | -----------------: | ------: | ----------: |\n| gpt-5.4 | 37 | 226,957 | 163 |\n| gpt-5.3-codex | 17 | 112,243 | 151 |\n| gpt-5.5 | 2 | 80,750 | 25 |\n| gpt-5.2-codex | 0 | — | — |\n\nPlus 15 hits in assistant visible text / thinking blobs.\n\n### 2.2 Tool distribution\n\n| Tool | Hits |\n| ------------------------------ | -----: |\n| `edit` | 38 |\n| `eval` | 11 |\n| `report_tool_issue` | 3 |\n| `grep`/`read`/`search`/`yield` | 1 each |\n\nConcentrated in tools with free-form (non-JSON-schema) argument formats.\n\n### 2.3 Leak shape (deterministic)\n\n```\nLEAK ::= JUNK_PREFIX MARKER CHANNEL_BODY (LEAK)?\nMARKER ::= \"to=functions.\" TOOL_NAME\nCHANNEL_BODY ::= \" code \" (SPAM | reasoning_prose | fake_tool_output)*\nJUNK_PREFIX ::= (GLITCH_TOKEN | CHANNEL_WORD | NON_LATIN_RUN | \"}\" | \"】【\")+\n```\n\n**Cascading is common.** Of 96 marker occurrences across 71 contaminated\nrecords, 39 contain ≥2 markers and 7 contain ≥3 — the model emits\nmultiple fake `to=functions.X code …` blocks back-to-back, often with\nfake `code_output\\nCell N:\\n…` framing between them. Once the\nplain-text scaffolding is in the residual stream, the prefix now _looks\nlike_ a fresh tool envelope start, so the macro prior over continuations\nkeeps voting for more scaffolding. Self-amplifying.\n\n### 2.4 Glitch tokens\n\nSingle-token identifiers in `o200k_base` whose embeddings appear to be\nnear-init from underrepresentation in post-training. ASCII residue\nimmediately before the marker in the natural corpus:\n\n| Surface string | Single-token | Token ID | Hits in corpus |\n| ----------------- | :----------: | -------: | ------------------------------: |\n| `Japgolly` | ✅ | 199,745 | 1 |\n| `Jsii` | ✅ | 114,318 | (subtoken of `Jsii_commentary`) |\n| `Jsii_commentary` | — (3 toks) | — | 2 |\n| `changedFiles` | — (2 toks) | — | 8 |\n| `RTLU` | — (2 toks) | — | 3 |\n\n`Japgolly` is in the last 0.13% of the vocabulary — the same family of\nGitHub-corpus residue that produced `SolidGoldMagikarp` in the 2023\nGPT-2 vocabulary (Rumbelow & Watkins). `SolidGoldMagikarp` itself\ntokenizes to 5 tokens in `o200k_base` — that specific token was retired,\nbut the class wasn't.\n\nFor the multi-token entries, the corpus-level signature is the surface\nstring; the underlying glitch trigger is a sub-token (e.g. `Jsii` inside\n`Jsii_commentary`). The detector list (`G` signal) keys on the surface\nstrings.\n\nStable across unrelated sessions. Treated as a high-precision detector\nsignal.\n\n### 2.5 Channel-word leakage\n\n`analysis` (5), `assistant` (5), `commentary` (3), `user` (1) appear\ndirectly preceding `to=`. Always bare words; never `<|channel|>analysis`\nor any other bracketed form. Consistent with §1 — the brackets are\nmasked, the words are not.\n\n### 2.6 Non-Latin spam residue\n\n96 marker hits, by script: CJK 40, Cyrillic 12, Telugu/Kannada/Malayalam\n18, Thai 8, Georgian 7, Armenian 7, Arabic 1. Recurring fragments are\nChinese gambling SEO (`大发时时彩`, `天天中彩票`), Georgian/Abkhaz junk,\nand Thai casino spam — well-known low-quality crawl residue.\n\nThis is the same script distribution observed in the controlled\nreproduction (§7.3), independent of the prompt's natural language.\n\n### 2.7 Failure-mode breakdown for the `edit` tool\n\nThe `edit` tool exists in two variants in the corpus:\n\n| Variant | Calls | Recovery |\n| ------------------------------------ | ----: | ---------------------------------------------------------------------------------------------------------------------------------------------------- |\n| Patch-DSL (`§PATH`/anchor/`«»≔` ops) | 27 | **Recoverable** by op-truncation (§3.3) |\n| JSON-schema (`{path,edits:[…]}`) | 11 | **Not recoverable** — contamination is escaped _inside_ JSON strings, parser accepts it cleanly, content would be written verbatim into source files |\n\nFor Patch-DSL leaks specifically:\n\n- 20/27 cases: contamination on the last input line; nothing follows.\n- 7/27 cases: contamination mid-input; what follows is one of: a\n duplicate replay of an earlier file/anchor, intended content for a\n _different_ tool call (the model started its next call inline), or\n pure hallucination. Post-contamination content is never trustworthy.\n\n### 2.8 Mechanism (confirmed)\n\n**Prior collapse from null-embedding glitch tokens, into a\ncontrol-token-masked basin whose mass redistributes onto the\nplain-text shadow of the Harmony protocol.**\n\nStep by step:\n\n1. The model is mid-`{ARGS}` of a Harmony tool call. The runtime applies\n a logit mask suppressing structural control tokens (`<|channel|>`,\n `<|message|>`, `<|call|>`, `<|start|>`, `<|end|>`) inside the args\n region. Without this mask, normal generation would constantly\n hallucinate envelope-closes; with it, those token IDs have logit\n `-∞` in args.\n2. A glitch token `g` is sampled. By construction `g` was in the BPE\n merge corpus but barely in LM/RL training, so its **input embedding\n `e_g` ≈ near-init noise of small norm**.\n3. At position t+1, the residual update `h_{t+1} ≈ LN(h_t + e_g + Attn +\nMLP)` is dominated by the prefix-derived terms; the just-emitted-token\n signal is effectively absent. Generation diversity normally comes\n from `e_x` steering the residual into different sub-regions —\n stripped here.\n4. The next-token distribution therefore collapses onto the **conditional\n prior over continuations of the prefix, with local conditioning\n removed**. In a tool-calling rollout context, that prior is sharply\n peaked on Harmony scaffolding (control tokens + routing tokens) —\n that's what RL trained.\n5. The mask zeros the control-token IDs. Mass redistributes onto the\n **next-best continuation**: the un-bracketed surface-form spelling of\n the same protocol (`analysis`, `commentary`, ` to=functions.X`,\n `code`). This spelling is unmasked because those characters are\n ordinary tokens.\n6. Once a few tokens of plain-text scaffolding land in the residual\n stream, the prefix now resembles a fresh envelope start. The macro\n prior keeps voting for more scaffolding. Cascading (§2.3) follows.\n7. Multilingual spam after the marker is the same prior-collapse\n continuation, drawn from the training neighborhood of the glitch\n token (often ESL/auto-generated multilingual web junk — exactly the\n crawl residue in §2.6).\n\n**Two corollaries the corpus data demanded but only the experiment\nexplained:**\n\n- **The brackets never appear** (§1, §2.5). The mask is what makes the\n leak land in plain text instead of as a real envelope-close.\n- **Counterintuitive grammar dependency** (§7.4). The leak is _worse_ in\n formats closest to OpenAI's training distribution. Off-distribution\n custom grammars dampen the macro-prior basin; the official\n `*** Begin Patch` format is the strongest collapse target.\n\nThe 2023 SolidGoldMagikarp paper documented mechanism (1)+(2)+(4). The\nnew piece is (5): when constrained decoding masks the natural collapse\ntarget, the mass laundered through the un-masked plain-text shadow\nbecomes a structurally-invisible exfiltration channel.\n",
|
|
7
7
|
"adding-a-provider.md": "# Adding a provider\n\nA provider is described in two halves:\n\n- **Catalog half** (`packages/catalog`): one entry in the `CATALOG_PROVIDERS`\n table (`packages/catalog/src/provider-models/descriptors.ts`) carrying the\n `id`, `defaultModel`, runtime model-discovery factory, and catalog-generation\n wiring. `KnownProvider`, `PROVIDER_DESCRIPTORS`, and\n `DEFAULT_MODEL_PER_PROVIDER` are derived from this table.\n- **Auth half** (`packages/ai`): one declarative `ProviderDefinition` in the\n registry carrying env-key fallbacks and login/refresh flows. The\n `OAuthProvider` union, the env-key map, the `/login` provider list, the\n `refreshOAuthToken` / `AuthStorage.login` dispatch, and the coding-agent\n callback maps are derived from the registry.\n\n**Scope.** This is for a provider that reuses an existing wire API\n(`openai-completions`, `anthropic-messages`, `google-generative-ai`, …) — the\ncommon case for gateways and API-key providers, since stream dispatch keys on\n`model.api`, not `model.provider`. Adding a *new wire protocol* (a new\n`KnownApi`) is a separate task that also touches `stream.ts` dispatch,\n`api-registry.ts`, and the catalog `types.ts`.\n\n## Shape\n\nFor the common case, a provider is **one catalog entry + one def file + one registry line**:\n\n1. **Add an entry to `CATALOG_PROVIDERS`** in\n `packages/catalog/src/provider-models/descriptors.ts` with the `id`,\n `defaultModel`, the plain API-key env var(s) as `envVars`, and (usually) a\n `createModelManagerOptions` factory. For a\n simple OpenAI-compatible gateway, build the factory in\n `packages/catalog/src/provider-models/openai-compat.ts` or inline with the\n exported `createSimpleOpenAICompletionsOptions(providerId, baseUrl, config)`.\n2. **Create `packages/ai/src/registry/<id>.ts`** exporting one\n `export const <camelId>Provider = { … } as const satisfies ProviderDefinition;`\n with the auth fields (`login`, …). Plain env-var names live in the catalog\n entry's `envVars`; set `envKeys` only for computed resolvers (Foundry/ADC/\n Bedrock-style probes).\n3. **Add it to the `ALL` array** in `packages/ai/src/registry/registry.ts`\n (one import + one array entry). `ALL` order is the `/login` list order for\n loginable providers.\n\nThat is the full change for:\n- env-key-only providers,\n- providers with a simple inline API-key login flow,\n- most OpenAI-compatible gateways.\n\nFor a **non-trivial provider-local OAuth flow**, put the implementation in\n`packages/ai/src/registry/oauth/<vendor>.ts` and lazy-import it from the def\nfile. The shared OAuth flow infrastructure it builds on lives in the same\n`registry/oauth/` directory.\n\nDescriptors, the default-model map, env-key map, login list, and refresh\ndispatch all update automatically; the `KnownProvider` union gains the new id\nfrom the catalog table and `OAuthProvider` from the registry.\n\n## Field reference\n\n**Catalog table entry** (`ProviderCatalogEntry`, see\n`packages/catalog/src/provider-models/descriptor-types.ts` for JSDoc):\n\n| Field | Effect |\n|---|---|\n| `id` | Required. Member of `KnownProvider`. |\n| `defaultModel` | Required. Preferred model when no explicit selection is made. |\n| `envVars` | Env var name(s), in order, for the runtime API-key fallback (`getEnvApiKey`). |\n| `createModelManagerOptions` | Runtime model-discovery factory. Present (and not `specialModelManager`) ⇒ appears in `PROVIDER_DESCRIPTORS`. |\n| `allowUnauthenticated` | Runtime creates a model manager even without a key. |\n| `dynamicModelsAuthoritative` | Successful discovery replaces bundled models. |\n| `catalogDiscovery` | `{ label, envVars?, oauthProvider?, allowUnauthenticated? }` for offline catalog generation (`generate-models.ts`). `envVars` here overrides the entry-level list when generation uses different credentials (e.g. `cursor`). |\n| `specialModelManager` | Bespoke runtime factory (`google-antigravity` / `google-gemini-cli` / `openai-codex`); excluded from `PROVIDER_DESCRIPTORS`. |\n\n**Registry definition** (`ProviderDefinition`, see\n`packages/ai/src/registry/types.ts`):\n\n| Field | Effect |\n|---|---|\n| `id`, `name` | Required. `name` shows in the `/login` list. |\n| `envKeys` | Computed env fallback for `getEnvApiKey`, overriding the catalog entry's `envVars`: a var name string or a `() => string \\| undefined` resolver. Omit when `envVars` covers it. |\n| `login` | Interactive login. Present ⇒ member of `OAuthProvider`, shown in `/login`, dispatchable via `AuthStorage.login`. Returns an api-key `string` or `OAuthCredentials`. |\n| `refreshToken` | OAuth refresher; omit for static-token providers (the dispatch returns credentials unchanged). |\n| `storeCredentialsAs` | Store credentials under a different provider id (e.g. `openai-codex-device` ⇒ `openai-codex`). |\n| `callbackPort` | Present ⇒ entry in the auth-broker `CALLBACK_PORTS` map. |\n| `pasteCodeFlow` | OAuth flow needs a pasted code/redirect URL ⇒ member of `PASTE_CODE_LOGIN_PROVIDERS`. |\n\n## Conventions\n\n- Use `... as const satisfies ProviderDefinition` so the literal `id` is preserved\n for the union derivation.\n- `login` / `refreshToken` for simple API-key or validation-based flows can live\n directly in the provider def file (export the named login function there so\n tests can import it directly).\n- `login` / `refreshToken` for heavy provider-local OAuth flows MUST reach the\n adjacent `registry/oauth/*` module via a dynamic-import\n thunk (`const { loginX } = await import(\"./oauth/x\"); return loginX(cb);`),\n keeping those flows out of the eager startup graph.\n- All OAuth code lives under `registry/oauth/`: the shared flow infra\n (`callback-server`, `pkce`, `google-oauth-shared`, `types`, the runtime API\n `index`) plus every provider flow, including the `github-copilot` / `kimi` /\n `openai-codex` helpers reused by the streaming and usage layers. The non-OAuth\n API-key helpers (`api-key-login`, `api-key-validation`) sit beside the def\n files in `registry/`, since they back simple paste-an-API-key logins.\n- For a simple OpenAI-compatible gateway, build the manager inline with the\n exported `createSimpleOpenAICompletionsOptions(providerId, baseUrl, config)` —\n no edits to `openai-compat.ts` required.\n- A `ProviderDefinition` may also be registered at runtime by an extension via\n `registerOAuthProvider` (the `AuthStorage.login` dispatcher handles built-ins\n and extensions through the same path).\n",
|
|
8
|
-
"advisor-watchdog.md": "# Advisor and WATCHDOG.md\n\nThe advisor is an optional second model attached to a session. It reviews the primary agent's transcript after each turn, can inspect the workspace with read-only tools, and injects concise advice back into the primary session.\n\nThe advisor is not a second executor. It cannot edit files, run commands, approve actions, or change session state directly.\n\n## Implementation files\n\n- [`src/advisor/runtime.ts`](../packages/coding-agent/src/advisor/runtime.ts)\n- [`src/advisor/advise-tool.ts`](../packages/coding-agent/src/advisor/advise-tool.ts)\n- [`src/advisor/watchdog.ts`](../packages/coding-agent/src/advisor/watchdog.ts)\n- [`src/prompts/advisor/system.md`](../packages/coding-agent/src/prompts/advisor/system.md)\n- [`src/prompts/advisor/advise-tool.md`](../packages/coding-agent/src/prompts/advisor/advise-tool.md)\n- [`src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts)\n- [`src/slash-commands/builtin-registry.ts`](../packages/coding-agent/src/slash-commands/builtin-registry.ts)\n- [`src/config/settings-schema.ts`](../packages/coding-agent/src/config/settings-schema.ts)\n\n---\n\n## Enabling the advisor\n\nThe advisor requires both:\n\n1. `advisor.enabled: true`\n2. a model assigned to the `advisor` model role\n\nExample:\n\n```yaml\nmodelRoles:\n advisor: anthropic/claude-sonnet-4-5:medium\n\nadvisor:\n enabled: true\n```\n\nThe advisor role uses normal model-role resolution, including provider-prefixed ids, canonical ids, and optional thinking suffixes.\n\nSlash commands:\n\n| Command | Effect |\n|---|---|\n| `/advisor` | Toggle the persisted `advisor.enabled` setting. |\n| `/advisor on` | Enable the setting and start the runtime when an advisor model is assigned. |\n| `/advisor off` | Disable the setting and stop the runtime. |\n| `/advisor status` | Show active model, context usage, token usage, and cost. |\n| `/advisor dump` | Print the advisor's compact transcript. |\n| `/advisor dump raw` | Print the advisor's full dump with system prompt, tools, thinking, and calls. |\n\nIf `advisor.enabled` is true but no `modelRoles.advisor` value resolves to an available model, status reports that the setting is enabled but no advisor model is assigned.\n\n## What the advisor sees\n\nAt each primary turn end, `AdvisorRuntime` receives only the new transcript delta since the last advisor update. Deltas are rendered with `formatSessionHistoryMarkdown(..., { includeThinking: true })`, so the advisor can review assistant reasoning as well as user-visible text, tool calls, and tool results.\n\nAdvisor messages already injected into the primary transcript are filtered out before the next delta is rendered. This prevents the advisor from recursively reviewing its own advice.\n\nWhen the primary transcript is rewritten, the advisor runtime is reset:\n\n- compaction\n- session switch/resume\n- branch/fork style history replacement\n- context-maintenance re-prime when the advisor's own context cannot fit\n\nReset clears the advisor's private in-memory transcript and rewinds its cursor. The next advisor update replays the current bounded primary transcript instead of continuing from stale pre-rewrite context.\n\nWhen the advisor is enabled mid-session, the cursor seeds to the current primary transcript length. That avoids replaying the whole old conversation on the first enabled turn.\n\n## Tools and isolation\n\nThe advisor receives a hard-isolated read-only tool set:\n\n- `read`\n- `search`\n- `find`\n- `advise`\n\nThe read/search/find tools are built against a distinct `ToolSession` whose session id is suffixed with `-advisor`. The advisor therefore does not share the primary agent's file snapshots, seen-lines tracking, conflict state, summary cache, or edit/yield capabilities.\n\nThe `advise` tool accepts one note and an optional severity:\n\n| Severity | Delivery | Intended use |\n|---|---|---|\n| omitted / `nit` | Non-interrupting aside, batched into the primary transcript at the next step boundary. | Cleanup, simplification, low-risk edge cases. |\n| `concern` | Interrupting steering message. | Material risk, likely wrong direction, missing constraint, hallucinated API. |\n| `blocker` | Interrupting steering message. | Continuing would clearly waste work or produce broken output. |\n\nInterrupting advice is sent through the steering channel and can abort in-flight tools at the next steering boundary. Non-interrupting notes are batched into one custom `advisor` transcript card with this prefix:\n\n```text\nAdvisor (a senior reviewer watching your work — weigh it, don't blindly obey):\n```\n\n## Bounded catch-up with `advisor.syncBacklog`\n\n`advisor.syncBacklog` is not lockstep turn execution. It is a bounded catch-up delay for the primary agent when the advisor falls behind.\n\nAllowed values:\n\n- `off` — never wait for advisor catch-up\n- `1`\n- `3`\n- `5`\n\nOn primary turn end:\n\n1. the primary turn delta is queued for the advisor\n2. the advisor drain loop starts or continues in the background\n3. if `advisor.syncBacklog` is not `off`, the primary agent waits only while advisor backlog is at or above the configured threshold\n4. the wait is capped at 30 seconds\n5. if the advisor catches up below the threshold, the primary continues immediately\n6. if the cap expires, the primary continues anyway\n\nPractical interpretation:\n\n- `off` favors maximum primary throughput.\n- `1` is the closest mode to synchronous review: after each queued advisor delta, the primary waits up to 30 seconds for backlog to return to zero.\n- `3` and `5` allow more advisor lag before the primary pauses.\n\nAdvisor failures do not permanently stall the primary. A failed advisor prompt is retried; after three consecutive advisor failures, the runtime logs a warning, drops the backlog, and lets the session continue.\n\n## WATCHDOG.md\n\n`WATCHDOG.md` is advisor-only guidance. It is appended to the advisor system prompt; it is not injected into the primary agent's normal context and does not behave like `AGENTS.md`, `RULES.md`, or other context files.\n\nUse it for review priorities: risks the advisor should watch for, project-specific traps, dangerous APIs, architectural boundaries, and quality bars that are useful to a reviewer but too noisy for the main executor.\n\nExample:\n\n```markdown\n# Watchdog notes\n\nEspecially watch for:\n\n- Changes that bypass the durable queue in `src/jobs/`.\n- UI renderer paths that display unsanitized tool output.\n- New worker spawns that do not re-enter the CLI host.\n```\n\n### Discovery locations\n\n`discoverWatchdogFiles(cwd, agentDir)` loads every readable candidate from these locations:\n\n1. user level: `<active agent dir>/WATCHDOG.md` (`~/.omp/agent/WATCHDOG.md` by default; relocated by `PI_CODING_AGENT_DIR`)\n2. project levels while walking from `cwd` upward to the git repository root, or to the home directory when no repo root is found:\n - `<dir>/WATCHDOG.md`\n - `<dir>/.omp/WATCHDOG.md`\n\nUnlike native context files, watchdog discovery does not stop at the nearest project file. Multiple project watchdog files can load together.\n\nCandidates in hidden owner directories are ignored unless the file is inside an `.omp` directory. This keeps unrelated dot-directory conventions from being picked up accidentally while still allowing `.omp/WATCHDOG.md`.\n\n### `@` imports\n\n`WATCHDOG.md` content is expanded with the same `@` import helper used by context files:\n\n- relative imports resolve from the importing file's directory\n- `~/` resolves from the user's home directory\n- imports inside fenced code blocks and inline code spans stay literal\n- cycles are skipped\n- missing or unreadable imports leave the original `@path` text in place\n\n### Prompt order\n\nLoaded watchdog blocks are sorted as:\n\n1. user-level `WATCHDOG.md`\n2. project-level files from farther ancestors down toward `cwd`\n\nEach file is appended to the advisor system prompt as:\n\n```xml\nEspecially pay attention to:\n<attention>\n...expanded watchdog content...\n</attention>\n```\n\nLater project files sit closer to the end of the advisor prompt, so narrower directory guidance is more prominent than broad ancestor guidance.\n\n## Subagents\n\n`advisor.subagents` controls whether spawned task/eval subagents also get an advisor runtime.\n\n- `false` (default): only the main session can run an advisor.\n- `true`: eligible subagent sessions build their own advisor with the same settings/model-role resolution, then rerun `WATCHDOG.md` discovery for that subagent session's `cwd` and agent directory.\n\nSubagent advisors remain isolated from the subagent's primary tool session in the same way the main advisor is isolated from the main agent.\n\n## Cost and context behavior\n\nAdvisor usage is separate model usage. `/advisor status` reports advisor token counts and cost from the advisor agent's own transcript.\n\nThe advisor has its own append-only context. Before each advisor prompt, `AgentSession` estimates incoming tokens and may maintain advisor context:\n\n1. try model-level context promotion when enabled and a larger compatible model is available\n2. if promotion cannot fit enough context, compact the advisor's own message history\n3. if compaction has no candidates or still cannot fit, re-prime from the current bounded primary transcript\n\nThe advisor transcript is in-memory for the session. It is retained while the session runs so `/advisor dump` can inspect it, but advisor state is not a replacement for the primary persisted transcript.\n",
|
|
8
|
+
"advisor-watchdog.md": "# Advisor and WATCHDOG.md\n\nThe advisor is an optional second model attached to a session. It reviews the primary agent's transcript after each turn, can inspect the workspace with read-only tools, and injects concise advice back into the primary session.\n\nThe advisor is not a second executor. It cannot edit files, run commands, approve actions, or change session state directly.\n\n## Implementation files\n\n- [`src/advisor/runtime.ts`](../packages/coding-agent/src/advisor/runtime.ts)\n- [`src/advisor/advise-tool.ts`](../packages/coding-agent/src/advisor/advise-tool.ts)\n- [`src/advisor/watchdog.ts`](../packages/coding-agent/src/advisor/watchdog.ts)\n- [`src/prompts/advisor/system.md`](../packages/coding-agent/src/prompts/advisor/system.md)\n- [`src/prompts/advisor/advise-tool.md`](../packages/coding-agent/src/prompts/advisor/advise-tool.md)\n- [`src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts)\n- [`src/slash-commands/builtin-registry.ts`](../packages/coding-agent/src/slash-commands/builtin-registry.ts)\n- [`src/config/settings-schema.ts`](../packages/coding-agent/src/config/settings-schema.ts)\n\n---\n\n## Enabling the advisor\n\nThe advisor requires both:\n\n1. `advisor.enabled: true`\n2. a model assigned to the `advisor` model role\n\nExample:\n\n```yaml\nmodelRoles:\n advisor: anthropic/claude-sonnet-4-5:medium\n\nadvisor:\n enabled: true\n```\n\nThe advisor role uses normal model-role resolution, including provider-prefixed ids, canonical ids, and optional thinking suffixes.\n\nSlash commands:\n\n| Command | Effect |\n|---|---|\n| `/advisor` | Toggle the persisted `advisor.enabled` setting. |\n| `/advisor on` | Enable the setting and start the runtime when an advisor model is assigned. |\n| `/advisor off` | Disable the setting and stop the runtime. |\n| `/advisor status` | Show active model, context usage, token usage, and cost. |\n| `/advisor dump` | Print the advisor's compact transcript. |\n| `/advisor dump raw` | Print the advisor's full dump with system prompt, tools, thinking, and calls. |\n\nIf `advisor.enabled` is true but no `modelRoles.advisor` value resolves to an available model, status reports that the setting is enabled but no advisor model is assigned.\n\n## What the advisor sees\n\nAt each primary turn end, `AdvisorRuntime` receives only the new transcript delta since the last advisor update. Deltas are rendered with `formatSessionHistoryMarkdown(..., { includeThinking: true })`, so the advisor can review assistant reasoning as well as user-visible text, tool calls, and tool results.\n\nAdvisor messages already injected into the primary transcript are filtered out before the next delta is rendered. This prevents the advisor from recursively reviewing its own advice.\n\nWhen the primary transcript is rewritten, the advisor runtime is reset:\n\n- compaction\n- session switch/resume\n- branch/fork style history replacement\n- context-maintenance re-prime when the advisor's own context cannot fit\n\nReset clears the advisor's private in-memory transcript and rewinds its cursor. The next advisor update replays the current bounded primary transcript instead of continuing from stale pre-rewrite context.\n\nWhen the advisor is enabled mid-session, the cursor seeds to the current primary transcript length. That avoids replaying the whole old conversation on the first enabled turn.\n\n## Tools and isolation\n\nThe advisor receives a hard-isolated read-only tool set:\n\n- `read`\n- `search`\n- `find`\n- `advise`\n\nThe read/search/find tools are built against a distinct `ToolSession` whose session id is suffixed with `-advisor`. The advisor therefore does not share the primary agent's file snapshots, seen-lines tracking, conflict state, summary cache, or edit/yield capabilities.\n\nThe `advise` tool accepts one note and an optional severity:\n\n| Severity | Delivery | Intended use |\n|---|---|---|\n| omitted / `nit` | Non-interrupting aside, batched into the primary transcript at the next step boundary. | Cleanup, simplification, low-risk edge cases. |\n| `concern` | Interrupting steering message. | Material risk, likely wrong direction, missing constraint, hallucinated API. |\n| `blocker` | Interrupting steering message. | Continuing would clearly waste work or produce broken output. |\n\nInterrupting advice is sent through the steering channel and can abort in-flight tools at the next steering boundary. Non-interrupting notes are batched into one custom `advisor` transcript card with this prefix:\n\n```text\nAdvisor (a senior reviewer watching your work — weigh it, don't blindly obey):\n```\n\nWhen you deliberately interrupt the agent (Esc, or a cancel from collab, ACP, RPC, the SDK, or an extension), the advisor stops auto-resuming it. An interrupting `concern`/`blocker` raised while the run is stopped is recorded as a visible advisor card instead of restarting the turn, and a concern already in flight when you interrupt is preserved the same way rather than driving a surprise resume. The advice re-enters context the next time you resume — a new message, the `.`/`c` continue shortcut, or a steer/follow-up. A normal yield is unaffected: the advisor can still steer and resume a run the agent ended on its own.\n\n## Bounded catch-up with `advisor.syncBacklog`\n\n`advisor.syncBacklog` is not lockstep turn execution. It is a bounded catch-up delay for the primary agent when the advisor falls behind.\n\nAllowed values:\n\n- `off` — never wait for advisor catch-up\n- `1`\n- `3`\n- `5`\n\nOn primary turn end:\n\n1. the primary turn delta is queued for the advisor\n2. the advisor drain loop starts or continues in the background\n3. if `advisor.syncBacklog` is not `off`, the primary agent waits only while advisor backlog is at or above the configured threshold\n4. the wait is capped at 30 seconds\n5. if the advisor catches up below the threshold, the primary continues immediately\n6. if the cap expires, the primary continues anyway\n\nPractical interpretation:\n\n- `off` favors maximum primary throughput.\n- `1` is the closest mode to synchronous review: after each queued advisor delta, the primary waits up to 30 seconds for backlog to return to zero.\n- `3` and `5` allow more advisor lag before the primary pauses.\n\nAdvisor failures do not permanently stall the primary. A failed advisor prompt is retried; after three consecutive advisor failures, the runtime logs a warning, drops the backlog, and lets the session continue.\n\n## WATCHDOG.md\n\n`WATCHDOG.md` is advisor-only guidance. It is appended to the advisor system prompt; it is not injected into the primary agent's normal context and does not behave like `AGENTS.md`, `RULES.md`, or other context files.\n\nUse it for review priorities: risks the advisor should watch for, project-specific traps, dangerous APIs, architectural boundaries, and quality bars that are useful to a reviewer but too noisy for the main executor.\n\nExample:\n\n```markdown\n# Watchdog notes\n\nEspecially watch for:\n\n- Changes that bypass the durable queue in `src/jobs/`.\n- UI renderer paths that display unsanitized tool output.\n- New worker spawns that do not re-enter the CLI host.\n```\n\n### Discovery locations\n\n`discoverWatchdogFiles(cwd, agentDir)` loads every readable candidate from these locations:\n\n1. user level: `<active agent dir>/WATCHDOG.md` (`~/.omp/agent/WATCHDOG.md` by default; relocated by `PI_CODING_AGENT_DIR`)\n2. project levels while walking from `cwd` upward to the git repository root, or to the home directory when no repo root is found:\n - `<dir>/WATCHDOG.md`\n - `<dir>/.omp/WATCHDOG.md`\n\nUnlike native context files, watchdog discovery does not stop at the nearest project file. Multiple project watchdog files can load together.\n\nCandidates in hidden owner directories are ignored unless the file is inside an `.omp` directory. This keeps unrelated dot-directory conventions from being picked up accidentally while still allowing `.omp/WATCHDOG.md`.\n\n### `@` imports\n\n`WATCHDOG.md` content is expanded with the same `@` import helper used by context files:\n\n- relative imports resolve from the importing file's directory\n- `~/` resolves from the user's home directory\n- imports inside fenced code blocks and inline code spans stay literal\n- cycles are skipped\n- missing or unreadable imports leave the original `@path` text in place\n\n### Prompt order\n\nLoaded watchdog blocks are sorted as:\n\n1. user-level `WATCHDOG.md`\n2. project-level files from farther ancestors down toward `cwd`\n\nEach file is appended to the advisor system prompt as:\n\n```xml\nEspecially pay attention to:\n<attention>\n...expanded watchdog content...\n</attention>\n```\n\nLater project files sit closer to the end of the advisor prompt, so narrower directory guidance is more prominent than broad ancestor guidance.\n\n## Subagents\n\n`advisor.subagents` controls whether spawned task/eval subagents also get an advisor runtime.\n\n- `false` (default): only the main session can run an advisor.\n- `true`: eligible subagent sessions build their own advisor with the same settings/model-role resolution, then rerun `WATCHDOG.md` discovery for that subagent session's `cwd` and agent directory.\n\nSubagent advisors remain isolated from the subagent's primary tool session in the same way the main advisor is isolated from the main agent.\n\n## Cost and context behavior\n\nAdvisor usage is separate model usage. `/advisor status` reports advisor token counts and cost from the advisor agent's own transcript.\n\nThe advisor has its own append-only context. Before each advisor prompt, `AgentSession` estimates incoming tokens and may maintain advisor context:\n\n1. try model-level context promotion when enabled and a larger compatible model is available\n2. if promotion cannot fit enough context, compact the advisor's own message history\n3. if compaction has no candidates or still cannot fit, re-prime from the current bounded primary transcript\n\nThe advisor transcript is in-memory for the session. It is retained while the session runs so `/advisor dump` can inspect it, but advisor state is not a replacement for the primary persisted transcript.\n",
|
|
9
9
|
"ai-schema-normalize.md": "# AI tool-schema normalization\n\n`@oh-my-pi/pi-ai` exposes one unified schema normalizer that providers consume\nbefore tools are sent on the wire. All walkers live in\n`packages/ai/src/utils/schema/normalize.ts`; the operational contract is\n`packages/ai/src/utils/schema/CONSTRAINTS.md`.\n\nThere is no separate `strict-mode.ts` module any more — OpenAI strict-mode\nsanitization, OpenAI Responses `oneOf` rewriting, Google/Vertex/Gemini-CLI\nsanitization, Cloud Code Assist Claude sanitization, and MCP sanitization all\nshare the same option-driven walk.\n\n## Entry points\n\nAll exports live under `@oh-my-pi/pi-ai/utils/schema`:\n\n- `normalizeSchema(value, options)` — generic option-driven walker.\n- `normalizeSchemaForGoogle(value)` — Gemini / Vertex / Gemini CLI.\n- `normalizeSchemaForCCA(value)` — Cloud Code Assist Claude (Antigravity + GCA).\n- `normalizeSchemaForMCP(value)` — MCP inputSchemas before they enter the\n custom-tool registry. `tool-bridge.ts` runs every MCP `inputSchema` through\n this dispatcher.\n- `normalizeSchemaForOpenAIResponses(schema)` (alias\n `sanitizeSchemaForOpenAIResponses`) — rewrites `oneOf` → `anyOf` for the\n Responses family.\n- `sanitizeSchemaForStrictMode(schema)` and\n `enforceStrictSchema(schema)` / `tryEnforceStrictSchema(schema)` — the\n OpenAI strict-mode pipeline (sanitize → enforce). All three are exported\n from `normalize.ts`.\n- `adaptSchemaForStrict(schema, strict)` from `./adapt` — thin composer that\n upgrades draft-07 inputs to 2020-12 and wraps `tryEnforceStrictSchema` for\n provider call sites. `./adapt` also exports the `NO_STRICT` global-bypass\n flag (env `PI_NO_STRICT`) honored by every provider that emits `strict: true`.\n\nRemoved in the unified-flow refactor:\n\n- `strict-mode.ts` (merged into `normalize.ts`).\n- `sanitize-google.ts` and `normalize-cca.ts` (replaced by\n `normalizeSchemaFor*` dispatchers).\n- `StringEnum` helper — use `z.enum([...])` directly; Zod's emitted JSON\n Schema is already wire-compatible with Google and other providers.\n- `sanitizeSchemaFor{Google,CCA,MCP}` / `prepareSchemaForCCA` — renamed to\n `normalizeSchemaFor{Google,CCA,MCP}`.\n\n## Dispatcher mapping\n\n| Provider transport(s) | Dispatcher |\n| ------------------------------------------------------------------ | ------------------------------------------- |\n| `openai-completions`, `openai-responses`, `openai-codex-responses` | `adaptSchemaForStrict` (sanitize + enforce) |\n| `openai-responses` family (`oneOf` → `anyOf` only) | `normalizeSchemaForOpenAIResponses` |\n| `google-generative-ai`, `google-vertex`, Gemini CLI | `normalizeSchemaForGoogle` |\n| Cloud Code Assist Claude (Antigravity + GCA, `claude-*` model ids) | `normalizeSchemaForCCA` |\n| MCP `inputSchema` ingestion | `normalizeSchemaForMCP` |\n| `anthropic-messages` (native, not CCA) | per-provider whitelist in `anthropic.ts` |\n\nGemini CLI / Antigravity CCA MUST run the full `normalizeSchemaForCCA`\npipeline (not just the first keyword-stripping pass) to keep parity with the\nshared Google Claude path.\n\n## Walk semantics\n\n`normalizeSchema` first detoxifies serialized Zod-instance-shaped inputs, upgrades them to\nJSON Schema 2020-12, dereferences the tree, then walks it with the option set\npinned by the dispatcher. Each node:\n\n1. Renames `snake_case` combinator/property keys to camelCase\n (`any_of` → `anyOf`, etc.; collisions follow python-genai\n `pop(from)`/`set(to)` semantics — snake_case wins).\n2. Applies the `handle_null_fields` collapse for nullable unions before\n recursing into children.\n3. Strips keys the target provider does not support, optionally lifting\n human-meaningful keys (`pattern`, `format`, min/max, `default`,\n `examples`, ...) into the sibling `description` via the spill formatter\n (`spill.ts`). Structural/meta keys (`$ref`, `$defs`,\n `additionalProperties`) are not spilled.\n4. Normalizes type unions (`type: [\"T\", \"null\"]` → `type: \"T\"` + nullable\n marker on Google, plain `type: \"T\"` on CCA).\n5. Collapses object-only / same-type combiners, optionally lossy-collapses\n mixed-type combiners (CCA only), and runs the residual-combiner fixpoint.\n6. Validates against AJV 2020 when `validateAndFallback` is set (CCA path)\n and emits the per-tool fallback `{ \"type\": \"object\", \"properties\": {} }`\n on residual incompatibility — `type` array, `type: \"null\"`, `nullable`\n key, or any remaining `anyOf`/`oneOf`/`allOf`.\n\n## OpenAI strict-mode pipeline\n\n`adaptSchemaForStrict(schema, strict)` runs `tryEnforceStrictSchema`,\nwhich composes:\n\n1. **Sanitize** (`sanitizeSchemaForStrictMode`): strips non-structural\n keywords (`format`, `pattern`, min/max, `examples`, `default`,\n `if`/`then`/`else`, `not`, `unevaluated*`, `patternProperties`,\n `dependent*`, `content*`, `min/maxProperties`, `$dynamicRef`, etc.). The\n `default` value is inlined into the sibling `description` as\n ` (default: X)` before being dropped, unless `description` already\n contains `(default:` or no `description` exists.\n2. **Enforce** (`enforceStrictSchema`): every object node gets\n `additionalProperties: false`, every property goes into `required`, and\n optional properties become nullable unions\n (`anyOf: [<original>, { \"type\": \"null\" }]`). Tuple `prefixItems` are\n strictified recursively.\n\nThe two passes use cache/cycle guards, so refs, `allOf`, and nullable wrapping\nstay deterministic without recursing forever. `tryEnforceStrictSchema` is\nfail-open: if anything throws, it returns `{ strict: false, schema: upgraded }`\nso callers MUST emit `strict: true` only when enforcement actually succeeded.\n\n### Edge cases the strict-mode normalizer handles\n\n- **Local `$ref` inlining.** OpenAI strict mode rejects\n `{ \"$ref\": \"...\", \"description\": \"...\" }` with sibling keys. The\n sanitizer pre-resolves local `#/...` refs against the root and merges\n with **sibling keys winning** over the resolved def — same precedence\n as `openai-python`'s `_ensure_strict_json_schema`. Recursive refs are\n guarded by the per-walk epoch.\n- **Single-item `allOf`.** A `{ \"allOf\": [X], ...siblings }` collapses to\n `{ ...X, ...siblings }` with the inlined entry's keys winning over the\n original siblings (matches `openai-python`'s `_pydantic.py:79-83`). Multi-\n item `allOf` is left intact for the downstream validator to reject if\n needed.\n- **Type-array branches and nullable unions.** When a node has\n `type: [\"T\", \"U\"]`, the sanitizer emits one variant schema per type,\n pruning type-specific keywords (e.g. `properties`/`required` only stay on\n the `object` variant, `items` only on the `array` variant). The shared\n `description` is **hoisted onto the `anyOf` wrapper** instead of being\n duplicated on every branch — so a strict nullable union becomes\n `{ anyOf: [T, { type: \"null\" }], description: \"...\" }`, not\n `anyOf: [{ ..., description }, { ..., description }]`.\n- **Enum/const without a `type`.** Both sanitize and enforce paths call\n `inferStrictPrimitiveTypeFromEnumOrConst` to infer the primitive `type`\n from `enum` / `const` values. Mixed-primitive enums (`[1, \"two\", null]`),\n enums containing objects/arrays, and non-primitive `const` values\n (`{a:1}`, `[1,2,3]`) cannot be described by a single `type` keyword and\n trigger the strict-mode fail-open path — emitting a typeless schema\n would just be rejected on the wire by OpenAI.\n\n## Performance: static fingerprint cache\n\n`resolveProviderModels` in `packages/catalog/src/model-manager.ts` and\n`readModelCache`/`writeModelCache` in `packages/catalog/src/model-cache.ts`\ncooperate via a `static_fingerprint` column on the `model_cache` SQLite\ntable (current cache schema version 5).\n\n- `fingerprintStatic(staticModels)` hashes the static catalog slice\n (`Bun.hash(JSON.stringify(models))` in base36) and memoizes the result\n by tagging the array with a symbol property. Multiple cold-start arms\n calling `resolveProviderModels` with the same `staticModels` array pay\n the JSON+hash cost once.\n- On cache read, if the network fetch is being skipped, the cached row is\n fresh + authoritative, and the cached `static_fingerprint` matches the\n current one, `resolveProviderModels` returns the cached models verbatim\n — the cache already incorporates the same static state, so re-running\n `mergeDynamicModels(static, cache)` would just rebuild the same objects.\n- `mergeModelSources` and `mergeDynamicModels` short-circuit on\n empty-source inputs (the common shape after `(static, [])` or for\n providers without a static catalog), avoiding Map churn entirely.\n\nCache rows written before the current schema version are dropped by the\ncache-version check; the column defaults to `''` for any row that survives\na version upgrade so the fingerprint-equality check naturally fails closed\nand the full merge re-runs.\n\n## Related\n\n- `docs/models.md` — registry, equivalence, compat flags\n (`supportsStrictMode`, `toolStrictMode`, `disableStrictTools`).\n- `docs/provider-streaming-internals.md` — how the normalized schemas are\n used downstream during the provider stream loop.\n- `docs/mcp-server-tool-authoring.md` — MCP `inputSchema` ingestion via\n `normalizeSchemaForMCP`.\n- `packages/ai/src/utils/schema/CONSTRAINTS.md` — operational contract for\n every normalization rule.\n",
|
|
10
10
|
"approval-mode.md": "# Tool approval mode\n\nTool approval has two independent inputs:\n\n1. **Tool declaration** — every tool may declare an `approval` tier:\n - `read`: reads data or updates UI-only session metadata.\n - `write`: mutates workspace/session state but does not execute arbitrary code.\n - `exec`: executes code, shells out, drives a browser, spawns agents, or performs similarly broad actions.\n2. **User policy** — `tools.approval.<toolName>: allow | deny | prompt` overrides the mode for that tool unless a non-yolo safety override forces a prompt.\n\nTools without an `approval` declaration are treated as `exec`. This is the safe default for MCP and unknown custom tools.\n\n## Modes\n\nConfigure with `tools.approvalMode`:\n\n| Mode | Auto-approves | Prompts for |\n| ---------------- | ----------------------- | --------------- |\n| `always-ask` | `read` | `write`, `exec` |\n| `write` | `read`, `write` | `exec` |\n| `yolo` (default) | `read`, `write`, `exec` | none |\n\n`--auto-approve` and `--yolo` force `tools.approvalMode: yolo` for the session.\n\n## User overrides\n\n`tools.approval` is honored in every mode:\n\n```yaml\ntools:\n approvalMode: write\n approval:\n bash: prompt\n read: allow\n mcp__filesystem__delete: deny\n```\n\nResolution per tool call:\n\n1. Compute the tool's approval decision from `tool.approval(args)`; omitted means `exec`.\n2. Normalize `tools.approval.<tool>` if present; invalid values are ignored.\n3. In `yolo` mode, the user policy is used when present; otherwise the call is allowed. Safety `override` reasons do not force a prompt in `yolo`.\n4. In non-yolo modes, if the tool sets `override: true`, `deny` is blocked and all other cases prompt, even if user policy says `allow`.\n5. Otherwise, a valid user policy wins.\n6. Otherwise, the active mode auto-approves or prompts by tier.\n\n## Safety overrides\n\nA tool can force a prompt with object-form approval:\n\n```ts\napproval: { tier: \"exec\", override: true, reason: \"Critical pattern detected\" }\n```\n\n`bash` uses this for critical destructive patterns such as `rm -rf /`, fork bombs, remote-fetch-then-execute, writes to `/etc/passwd`, and host shutdown commands. These surface as `reason` in the approval prompt, but in `yolo` mode they are auto-approved unless a user policy for the tool is set to `prompt` or `deny`.\n\n## Per-tool prompt details\n\nTools can add approval-prompt body lines with `formatApprovalDetails(args)`. The standard prompt includes:\n\n- `Allow tool: <name>`\n- `Origin: MCP server tool` for unannotated `mcp__...` tools\n- `Reason: <reason>` when the tool decision supplies one\n- tool-specific details such as command, path, code, browser action, or subagent assignment\n\n## Defining approval on tools\n\nBuilt-in and custom tools share the same shape:\n\n```ts\nexport type ToolTier = \"read\" | \"write\" | \"exec\";\nexport type ToolApprovalDecision = ToolTier | { tier: ToolTier; reason?: string; override?: boolean };\nexport type ToolApproval = ToolApprovalDecision | ((args: unknown) => ToolApprovalDecision);\n\napproval?: ToolApproval;\nformatApprovalDetails?: (args: unknown) => string | string[] | undefined;\n```\n\nExamples:\n\n```ts\napproval: \"read\";\n\napproval: (args) => (LSP_READONLY_ACTIONS.has(args.action) ? \"read\" : \"write\");\n\napproval: (args) =>\n isCritical(args.command)\n ? { tier: \"exec\", override: true, reason: \"Critical pattern detected\" }\n : \"exec\";\n```\n\n## Subagents\n\nSubagents run headless with `tools.approvalMode: yolo` so they do not stall waiting for UI. The parent `task` approval is the authorization boundary. User `tools.approval.<tool>` settings continue to control whether a tool is allowed, prompted, or blocked.\n",
|
|
11
11
|
"auth-broker-gateway.md": "# Auth Broker and Auth Gateway\n\nThe auth broker and auth gateway are two cooperating HTTP services that move OAuth refresh tokens and provider access tokens off developer laptops and into a single broker host.\n\n- **`omp auth-broker serve`** holds the canonical SQLite credential vault, performs OAuth refreshes, and exposes a small REST API (`/v1/snapshot`, `/v1/snapshot/stream`, `/v1/credential/:id/refresh`, `/v1/credential/:id/disable`, `/v1/credential`, `/v1/usage`, `/v1/healthz`).\n- **`omp auth-gateway serve`** is a forward-proxy. It accepts OpenAI Chat Completions, Anthropic Messages, OpenAI Responses, and pi-native stream requests, resolves the broker-backed credential, and dispatches through `pi-ai` provider logic. Clients (containerised omp, llm-git, the macOS usage widget, …) never see the access token.\n\nTransport security between operator, broker, and gateway is delegated to the operator (Tailscale / Wireguard / reverse proxy + TLS). Every endpoint except `/v1/healthz` (broker) and `/healthz` (gateway) requires a bearer token.\n\nSource: `packages/ai/src/auth-broker/`, `packages/ai/src/auth-gateway/`, `packages/coding-agent/src/cli/auth-broker-cli.ts`, `packages/coding-agent/src/cli/auth-gateway-cli.ts`, `packages/coding-agent/src/session/auth-broker-config.ts`.\n\n## Data flow\n\n```\n ┌────────────────────────────────────────────────────────────┐\n │ broker host │\n │ │\n developer ──▶ │ ┌──────────────────────────┐ ┌────────────────────┐ │\n laptop / │ │ omp auth-broker serve │◀──▶│ SQLite agent.db │ │\n CI / robomp │ │ - holds refresh tokens │ │ (canonical writer)│ │\n │ │ - background refresher │ └────────────────────┘ │\n │ │ /v1/{snapshot,refresh,…}│ │\n │ └─────────┬────────────────┘ │\n │ │ bearer ($CONFIG_DIR/auth-broker.token) │\n │ ▼ │\n │ ┌──────────────────────────┐ │\n │ │ omp auth-gateway serve │ RemoteAuthCredentialStore │\n │ │ /v1/{chat,messages,…} │ receives snapshot stream, │\n │ │ /v1/usage,/v1/models │ refreshes credentials by id │\n │ │ /v1/credentials/check │ via the broker on expiry │\n │ └─────────┬────────────────┘ │\n └────────────┼───────────────────────────────────────────────┘\n │ bearer ($CONFIG_DIR/auth-gateway.token)\n ▼\n gateway clients\n (llm-git, macOS widget, robomp containers, IDE plugins, …)\n │\n ▼ provider request with broker-resolved credential\n api.anthropic.com / api.openai.com / …\n```\n\nThe broker is the only writer of OAuth refresh tokens. Clients (including the gateway itself) load a redacted snapshot in which every `refresh` field has been replaced with `REMOTE_REFRESH_SENTINEL`; when an access token expires the client calls `POST /v1/credential/:id/refresh` and the broker performs the refresh server-side. `RemoteAuthCredentialStore` rejects local replace/upsert/delete-by-provider mutations, with errors pointing at `omp auth-broker login` / `omp auth-broker logout`.\n\n## auth-broker\n\n### CLI\n\n```\nomp auth-broker serve [--bind=host:port] # boot the broker\nomp auth-broker token [--regenerate] [--json] # print or rotate the bearer token\nomp auth-broker login [<provider>] [--via=user@host] [--dry-run]\nomp auth-broker logout [<provider>]\nomp auth-broker list [--json]\nomp auth-broker import <file|dir> [--provider=<id>] [--include-disabled] [--dry-run] [--json]\nomp auth-broker migrate --from-local [--include-oauth] [--include-env] [--dry-run] [--json]\nomp auth-broker status [--json]\n```\n\n- `serve` opens the local SQLite store at `getAgentDbPath()` and binds an HTTP listener (default `127.0.0.1:8765`). On startup a token is ensured at `<config-dir>/auth-broker.token` (mode `0600`, `0700` parent dir). The background refresher refreshes any OAuth credential whose `expires - Date.now() < refreshSkewMs` (default 5 min) every `refreshIntervalMs` (default 60 s).\n- `token` prints the cached bearer or generates a new one. `--regenerate` rotates it.\n- `login [<provider>]` runs the per-provider OAuth flow locally — when no provider is supplied, it falls back to an interactive numbered picker. With `--via=user@host` it shells out `ssh -L <callback-port>:127.0.0.1:<callback-port> user@host omp auth-broker login <provider>` so the OAuth callback hits the local browser but the credential is written on the broker host (`--via` requires `<provider>`). Built-in callback ports: `anthropic:54545`, `openai-codex:1455`, `google-gemini-cli:8085`, `google-antigravity:51121`, `gitlab-duo:8080`. The OAuth dance is driven in-process via `AuthStorage.login()` — there is no longer a `pi-ai` bin to spawn.\n- `logout [<provider>]` deletes every credential row for `<provider>`. With no argument it shows an interactive numbered picker of currently-stored providers.\n- `list` enumerates every registered OAuth provider id/name (the union of built-ins + `registerOAuthProvider` custom providers). `--json` emits a machine-readable array.\n- `import <file|dir>` imports CLIProxyAPI-style JSON credentials into the local SQLite store. Maps `type` field → omp provider (`claude → anthropic`, `codex → openai-codex`, `gemini → google-gemini-cli`, `antigravity → google-antigravity`, `gemini-cli → google-gemini-cli`).\n- `migrate --from-local` uploads local SQLite credentials to the configured broker (`POST /v1/credential`). Local API keys are included by default; local OAuth rows are skipped unless `--include-oauth` is set; environment-derived API keys are skipped unless `--include-env` is set. Re-runs are idempotent against the broker snapshot.\n- `status` health-pings the configured remote broker.\n\n### Endpoints\n\n| Method | Path | Auth | Purpose |\n| ------ | ---------------------------- | ------ | ------------------------------------------------------- |\n| `GET` | `/v1/healthz` | none | Liveness + version |\n| `GET` | `/v1/snapshot` | bearer | Redacted snapshot (refresh tokens replaced by sentinel) |\n| `GET` | `/v1/snapshot/stream` | bearer | SSE snapshot stream with delta events and keepalives |\n| `POST` | `/v1/credential` | bearer | Upsert one OAuth or API-key credential |\n| `POST` | `/v1/credential/:id/refresh` | bearer | Force-refresh one OAuth credential |\n| `POST` | `/v1/credential/:id/disable` | bearer | Disable one credential with a recorded cause |\n| `GET` | `/v1/usage` | bearer | Aggregate `UsageReport[]` across credentials |\n\nRequests use `Authorization: Bearer <token>`. The server compares against an in-memory token allow-list; the gateway’s implementation uses a timing-safe comparison.\n\n### Background refresher\n\n`AuthBrokerRefresher` iterates active OAuth credentials at `refreshIntervalMs` cadence and refreshes any within `refreshSkewMs` of expiry. Refreshes are single-flighted per credential id so a slow refresh cannot be retriggered. The refresher distinguishes:\n\n- **definitive failures** (`invalid_grant`, `invalid_token`, `revoked`, unauthorized refresh-token, 401/403 not from a network blip) — credentials are passed to `AuthStorage.disableCredentialById(id, cause)` so the next snapshot pull surfaces a clean delete on the client;\n- **transient failures** (timeout / ECONNREFUSED / fetch failed) — left in place for the next sweep.\n\n## auth-gateway\n\n### CLI\n\n```\nomp auth-gateway serve [--bind=host:port] [--no-auth]\nomp auth-gateway token [--regenerate] [--json]\nomp auth-gateway status [--json]\nomp auth-gateway check [--strict] [--json]\n```\n\n- `serve` requires `OMP_AUTH_BROKER_URL` (or `auth.broker.url` in `config.yml`) — the gateway is itself a broker client. It calls `AuthBrokerClient.fetchSnapshot()`, wraps it in `RemoteAuthCredentialStore`, and constructs an `AuthStorage` that resolves access tokens through the broker. Default bind is `127.0.0.1:4000`. The gateway token is stored at `<config-dir>/auth-gateway.token` (`0600`); `--no-auth` disables the bearer check entirely (loopback-only use).\n- `token` / `status` manage and inspect the gateway bearer token and upstream broker readiness.\n- `check` probes broker-backed credentials through the gateway store. Without `--strict` it uses provider usage probes; `--strict` also exercises each credential against its chat-completion endpoint and can consume a small amount of quota.\n\n### Endpoints\n\n| Method | Path | Auth | Purpose |\n| ------ | ----------------------- | ------ | ------------------------------------------------------------ |\n| `GET` | `/healthz` | none | Liveness + version |\n| `GET` | `/v1/usage` | bearer | Aggregate `UsageReport[]` (proxied through `AuthStorage`) |\n| `GET` | `/v1/models` | bearer | Bundled-model catalog filtered to providers with credentials |\n| `GET` | `/v1/credentials/check` | bearer | Per-credential auth health probe |\n| `POST` | `/v1/chat/completions` | bearer | OpenAI Chat Completions wire format |\n| `POST` | `/v1/messages` | bearer | Anthropic Messages wire format |\n| `POST` | `/v1/responses` | bearer | OpenAI Responses wire format |\n| `POST` | `/v1/pi/stream` | bearer | Native `pi-ai` stream wire format |\n\nThe model id is read from the top-level `model` field for foreign wire formats and from the pi-native request body for `/v1/pi/stream`. The gateway picks the first bundled `Model<Api>` matching that id, parses the inbound wire format into an omp `Context`, resolves the provider credential from broker-backed `AuthStorage`, dispatches through `streamSimple()`, and re-encodes the result to the inbound format (SSE for streamed responses).\n\nThere is no raw provider passthrough path. All supported routes go through `pi-ai` provider logic so credential-specific request shaping, OAuth refresh-on-auth-error, and provider quirks stay centralized.\n\n`idleTimeout` on the underlying `Bun.serve` is set to `255 s` so long thinking-budget calls do not get killed by Bun’s default idle timeout.\n\n## Usage cache: server-side 5-min jitter + client-side 15 s single-flight\n\nTwo layers cache the aggregate provider-usage report. Both are intentional and stacked.\n\n### Server-side cache (broker `AuthStorage`)\n\n`AuthStorage` caches each credential’s `UsageReport` in the broker’s SQLite store at a **5-minute per-credential TTL with ±25 % jitter**. Anthropic and OpenAI rate-limit `/usage` aggressively per source IP, and a synchronized 5-credential fan-out trips 429s every cycle; the jitter decorrelates refresh times within a few cycles. On fetch failure the store keeps the **last-good** report for up to 24 h with a short jittered re-poll window — so a transient upstream blip never blanks out the widget.\n\nConstants: `USAGE_REPORT_TTL_MS = 5 * 60_000`, `USAGE_LAST_GOOD_RETENTION_MS = 24 * 60 * 60_000` (`packages/ai/src/auth-storage.ts`).\n\n### Client-side single-flight (`RemoteAuthCredentialStore`)\n\nWhen the gateway (or any other broker client) calls `fetchUsageReports()` / `getUsageReport(provider, credential)`, `RemoteAuthCredentialStore` coalesces concurrent calls into a single `GET /v1/usage` round-trip and caches the result for **15 s** in memory.\n\n- `USAGE_CACHE_TTL_MS = 15_000` (`packages/ai/src/auth-broker/remote-store.ts`).\n- A single `#usageInflight` promise is shared across all callers; a per-caller `AbortSignal` is **raced** against the shared promise, not threaded into it, so one caller’s abort never cascades into a peer’s in-flight request.\n- On fetch failure the rejected promise is logged and the awaited value is `null` — callers (`AuthStorage.fetchUsageReports`, `#getUsageReport`) treat a `null` report as \"no usage signal for this cycle\" and proceed without it. **This is the 15 s TTL fallback**: the client absorbs transient broker outages by suppressing the error, returning `null` to ranking, and re-attempting after the 15 s window.\n\nThe 15 s client window deliberately sits below the broker’s 5 min server cache, so almost every client poll is served from the broker’s already-cached value; the client cache exists to absorb the parallel fan-out generated by `AuthStorage.#rankOAuthSelections` into a single broker round-trip.\n\n## Client snapshot cache\n\n`discoverAuthStorage()` persists the broker snapshot to `~/.omp/cache/auth-broker-snapshot.enc` after the initial `/v1/snapshot` fetch and after later broker-sourced full snapshots. The file is AES-256-GCM encrypted with `SHA-256(OMP_AUTH_BROKER_TOKEN)` and authenticated with the broker URL as additional data, so changing either the token or URL makes the cache unreadable. The file is written atomically with mode `0600`.\n\nFreshness is anchored to the broker-stamped `snapshot.generatedAt`, not local write time. Default TTL is 1 h (`OMP_AUTH_BROKER_SNAPSHOT_TTL_MS`); `0` disables the cache and restores the old always-fetch boot path. When the cached snapshot is still fresh, `omp` boots from it and skips the blocking `/v1/snapshot` query. `RemoteAuthCredentialStore` still starts its normal SSE / long-poll background sync immediately, so deleted or rotated credentials reconcile after startup, and expired OAuth access tokens still refresh through `POST /v1/credential/:id/refresh`.\n\nIf the broker is down at boot and a fresh cache exists, startup now succeeds from the cached snapshot. If the cache is missing, expired, corrupt, written for a different URL, or encrypted with a different token, startup falls back to the live fetch and fails the same way it did before if the broker is unreachable.\n\n## Operator opt-in\n\nThe broker is **off** unless `OMP_AUTH_BROKER_URL` (or `auth.broker.url` in `config.yml`) is set. When set, `discoverAuthStorage` in `packages/coding-agent/src/sdk.ts` swaps the local SQLite credential store for `RemoteAuthCredentialStore` and every API call resolves credentials through the broker.\n\n### Environment variables\n\n| Variable | Purpose | Required when |\n| ----------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------- |\n| `OMP_AUTH_BROKER_URL` | Base URL of the remote auth-broker (e.g. `https://broker.tailnet:8765`). Selecting this puts the client in broker mode — local SQLite is bypassed. | Any time the omp client should resolve credentials through a broker (and required by `omp auth-gateway serve`). |\n| `OMP_AUTH_BROKER_TOKEN` | Bearer token used for every broker endpoint except `/v1/healthz`. | When `OMP_AUTH_BROKER_URL` is set and no token is available from `auth.broker.token` or `<config-dir>/auth-broker.token`. |\n| `OMP_AUTH_BROKER_SNAPSHOT_TTL_MS` | Freshness window for the encrypted local snapshot cache. Default `3600000` (1 h); `0` disables cache reads and writes. | Optional in broker mode. |\n| `OMP_AUTH_BROKER_SNAPSHOT_CACHE` | Path override for the encrypted local snapshot cache. Default `~/.omp/cache/auth-broker-snapshot.enc` (or XDG cache equivalent). | Optional in broker mode. |\n\nResolution order in `resolveAuthBrokerConfig()`:\n\n1. `OMP_AUTH_BROKER_URL` env (else `auth.broker.url` from `config.yml`, resolved through `resolveConfigValue`);\n2. `OMP_AUTH_BROKER_TOKEN` env (else `auth.broker.token` from `config.yml`, else `<config-dir>/auth-broker.token`);\n3. URL set but no token resolvable → hard error pointing at the token file path.\n\nThe gateway has no dedicated env vars — it inherits `OMP_AUTH_BROKER_*` because it is itself a broker client.\n\n### `config.yml` keys\n\n| Key | Default | Purpose |\n| ------------------- | ------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| `auth.broker.url` | unset | Same as `OMP_AUTH_BROKER_URL`; env wins. Hidden from the settings UI. Values are resolved as a literal, an environment variable name, or `!<shell command>` to use trimmed stdout. |\n| `auth.broker.token` | unset | Same as `OMP_AUTH_BROKER_TOKEN`; env wins. Values are resolved the same way. |\n\n### Token files\n\n| Path | Owner | Mode |\n| --------------------------------- | ---------------------------------------------------- | ----------------------------- |\n| `<config-dir>/auth-broker.token` | `omp auth-broker serve` (created at first start) | `0600` in a `0700` parent dir |\n| `<config-dir>/auth-gateway.token` | `omp auth-gateway serve` (skipped under `--no-auth`) | `0600` in a `0700` parent dir |\n\n`<config-dir>` resolves to `~/.omp/` (respecting `PI_CONFIG_DIR`).\n\n## Interaction with the local API-key resolution order\n\nThe broker only owns OAuth credentials and provider-API-key credentials that were uploaded to it. The standard credential ladder in `models.md` (`Auth and API key resolution order`) is preserved, with one addition committed alongside the gateway:\n\n- `AuthStorage.setConfigApiKey / removeConfigApiKey / clearConfigApiKeys` let a `models.yml` `apiKey` beat a stored OAuth token **without** overriding an explicit `--api-key`. This is what allows a broker-resolved OAuth credential to be reliably shadowed by a per-environment `models.yml` config key when both are present.\n\n## See also\n\n- [`secrets.md`](./secrets.md) — secret obfuscation around tokens that _do_ leak through (e.g. `OMP_AUTH_BROKER_TOKEN` in shell output).\n- [`models.md`](./models.md) — provider auth resolution order; the broker plugs in at layers 2–3 (stored credentials).\n- [`environment-variables.md`](./environment-variables.md) — full env reference including `OMP_AUTH_BROKER_URL` / `OMP_AUTH_BROKER_TOKEN`.\n",
|
|
@@ -14,9 +14,9 @@ export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
|
|
|
14
14
|
"collab.md": "# Collab: Live Session Sharing\n\n`/collab` shares your running session with other omp instances in real time. Guests render the **same session natively in their own TUI** — streaming assistant text, tool-call cards, footer state (cwd, model, context %, cost), ctrl+o expansion, `/dump` — no terminal mirroring. Guests can prompt and interrupt the agent; the host machine runs the agent and all tools.\n\n## Quick start\n\nHost:\n\n```\n/collab\n```\n\nprints\n\n```\nCollab session started!\n • Join from another terminal: omp join \"mgAYTZwEnpRQtca0CTgn-Q.gdJUbTovD94ofDaa8YvhY0-ty16w4fn8PgB6PLnoA30\"\n • or any web browser: my.omp.sh/#mgAYTZwEnpRQtca0CTgn-Q.gdJUbTovD94ofDaa8YvhY0-ty16w4fn8PgB6PLnoA30\n```\n\nThe browser line is click-to-join (an OSC 8 hyperlink to the full `https://` deep link): the relay serves the web guest client at `/`, and the room id + key ride in the URL fragment. From another omp (any directory, any machine), either form works:\n\n```\n/join my.omp.sh/#mgAYTZwEnpRQtca0CTgn-Q.gdJU…\n```\n\nThe guest's previous session is restored on `/leave` (or when the host stops).\n\n### Commands\n\n| Command | Effect |\n|---|---|\n| `/collab` | Start sharing (or re-print the link when already hosting) |\n| `/collab <relay>` | Start sharing through a specific relay (`relay.example.com`, `ws://localhost:7475`) |\n| `/collab view` | Print a read-only (view-only) link (starts sharing first if needed) |\n| `/collab status` | Show link + participants |\n| `/collab stop` | Stop sharing |\n| `/join <link>` | Join a shared session as a guest |\n| `/leave` | Leave (guest) or stop sharing (host) |\n\n## Link format\n\n```\nhttps://host[:port]/#<link> → browser deep link (printed by /collab; /join accepts it too)\n<roomId>.<key> → default relay (my.omp.sh)\nhost[:port]/r/<roomId>.<key> → custom relay, wss:// inferred\nws://localhost:7475/r/<roomId>.<key> → plain ws, allowed for localhost only\n```\n\nThe trailing `.<key>` part is the room secret, base64url-encoded, in one of two strengths:\n\n- **Full link** — 48 bytes: the 32-byte AES-256-GCM room key followed by a 16-byte write token. Grants prompting, interrupting, and subagent control.\n- **View-only link** — the bare 32-byte key, no write token. Grants live read access only. Pre-token links parse as view-only.\n\nThe room secret is dot-joined rather than `#`-joined: RFC 3986 forbids a raw `#` inside a URL fragment, so strict URL stacks (macOS Foundation behind terminal click-to-open) percent-encode a second `#` to `%23` and break the link. Parsers leniently accept the legacy `#` form and the mangled `%23` form. In the browser deep link, everything after the `#` — room id and key — is a URL fragment: it never appears in any HTTP request, and neither secret is ever sent to the relay.\n\n## End-to-end encryption\n\nEvery session payload (entries, events, state, prompts) is sealed with AES-256-GCM before it touches the socket. The relay sees only:\n\n- room ids and connection counts,\n- opaque ciphertext frames and their sizes,\n- a 4-byte routing prefix (which guest a frame targets).\n\nPossession of the link is the trust boundary: a full link reads and steers the session, a view-only link reads it. Share both like secrets.\n\n## Guest permission model\n\nTwo trust levels, enforced by the link itself — the host verifies the 16-byte write token at join and rejects writes from peers without it (they appear as read-only in the participants list, and the join notice says so).\n\nGuests with a full link can:\n\n- read the entire session (including the back-transcript at join time),\n- prompt the agent (rendered with their name badge on every participant's transcript; the LLM sees the prompt text verbatim — names are display-only),\n- interrupt the agent (Esc),\n- use the Agent Hub against the host's subagents: live table and progress, chat (steers the host's subagent), kill, revive, and transcript viewing (fetched from the host on demand).\n\nGuests with a view-only link can read everything live — back-transcript, streaming text, tool cards, subagent transcripts — but the host rejects prompting, interrupting, and agent control from them.\n\nEverything that mutates the host session or machine is host-only: `/model`, `/compact`, `/resume`, `/branch`, bash (`!`), python (`$`), skills, etc. Guests keep a small local allowlist (`/dump`, `/export`, `/copy`, `/help`, `/hotkeys`, `/theme`, `/settings`, `/leave`, `/collab`, `/exit`).\n\nKnown v1 limit for guests: a turn already streaming when you join becomes visible from its next message boundary.\n\n## Web client\n\n`packages/collab-web` is a standalone browser client for the same links — no omp install needed on the guest side. The relay serves it at `/`, which is what makes the `/collab` deep link click-to-join: `https://<relay>/#<link>` loads the client and auto-connects from the fragment. It renders the live transcript (streaming text, thinking, tool cards), a subagent panel with on-demand transcripts, and a composer with the same guest powers (prompt, interrupt, hub actions). Run `bun run dev` in the package for a local instance, `bun run mock-host` for an offline scripted host to develop against, and `bun run build` to emit a static `dist/` deployable anywhere (HTTPS required for WebCrypto). The client never talks to anything but the relay, and the key stays in the URL fragment.\n\n## Settings\n\n| Setting | Default | Meaning |\n|---|---|---|\n| `collab.relayUrl` | `wss://my.omp.sh` | Relay used by `/collab` when no relay is passed inline |\n| `collab.displayName` | OS username | Name shown to other participants |\n| `share.serverUrl` | `https://my.omp.sh/s` | Share viewer/upload base used by `/share` (links are `<base>/<id>#<key>`) |\n| `share.redactSecrets` | `true` | Run the secret obfuscator over `/share` snapshots before upload |\n\n## Self-hosting the relay\n\nThe relay is a small content-blind Go service. It keeps no state beyond live connections and exposes:\n\n- `GET /` — the static collab-web guest client (target of the `/collab` deep link),\n- `GET /r/<roomId>?role=host|guest` — WebSocket upgrade,\n- `POST /s` / `GET /s/<id>` / `GET /s/<id>/raw` — `/share` blob upload, viewer page, and blob fetch,\n- `GET /healthz` — liveness.\n\n\n## Architecture notes\n\nHub topology — the host is authoritative, guests never peer:\n\n1. `entry` frames — durable session entries, broadcast pre-blob-externalization so images stay inline (guests cannot resolve host blob refs). Guests append them verbatim (ids preserved) to a replica session file under `~/.omp/collab/<roomId>.jsonl` and into the agent's message array, which is why `/dump` and context estimates work.\n2. `event` frames — live agent events, fed straight into the guest's normal event controller; rendering is events-only to prevent double-render.\n3. `state` frames — debounced footer snapshots: streaming flag, the host's full model object and thinking level (applied to the guest's replica agent state, so model display and context-window math are native), host context numbers, and participants.\n4. `bus` frames — mirrored task-subagent lifecycle/progress EventBus traffic, republished on the guest's local bus so the subagent HUD and status-line count work natively.\n5. `agents` frames — agent-registry snapshots feeding a guest-local registry, so the Agent Hub table renders host subagents.\n\nGuest→host: `hello`, `prompt`, `abort`, `agent-cmd` (hub chat/kill/revive), and `fetch-transcript` (incremental subagent-transcript reads answered by targeted `transcript` frames). The replica loads through the regular `/resume` machinery, so theming, ctrl+o, and transcript behavior are native by construction; the guest process never chdirs to host paths.\n",
|
|
15
15
|
"compaction.md": "# Compaction and Branch Summaries\n\nCompaction and branch summaries are the two mechanisms that keep long sessions usable without losing prior work context.\n\n- **Compaction** rewrites old history into a summary on the current branch.\n- **Branch summary** captures abandoned branch context during `/tree` navigation.\n\nBoth are persisted as session entries and converted back into user-context messages when rebuilding LLM input.\n\n## Key implementation files\n\n- `packages/agent/src/compaction/compaction.ts` (context-full summarization and handoff generation)\n- `packages/snapcompact/src/snapcompact.ts` (snapcompact strategy: history archived as dense bitmap images)\n- `packages/agent/src/compaction/branch-summarization.ts`\n- `packages/agent/src/compaction/pruning.ts`\n- `packages/agent/src/compaction/utils.ts`\n- `packages/agent/src/compaction/openai.ts`\n- `packages/coding-agent/src/session/session-manager.ts`\n- `packages/coding-agent/src/session/agent-session.ts`\n- `packages/coding-agent/src/session/messages.ts`\n- `packages/coding-agent/src/extensibility/hooks/types.ts`\n- `packages/coding-agent/src/config/settings-schema.ts`\n\n## Session entry model\n\nCompaction and branch summaries are first-class session entries, not plain assistant/user messages.\n\n- `CompactionEntry`\n - `type: \"compaction\"`\n - `summary`, optional `shortSummary`\n - `firstKeptEntryId` (compaction boundary)\n - `tokensBefore`\n - optional `details`, `preserveData`, `fromExtension`\n- `BranchSummaryEntry`\n - `type: \"branch_summary\"`\n - `fromId`, `summary`\n - optional `details`, `fromExtension`\n\nWhen context is rebuilt (`buildSessionContext`):\n\n1. Latest compaction on the active path is converted to one `compactionSummary` message.\n2. Kept entries from `firstKeptEntryId` to the compaction point are re-included.\n3. Later entries on the path are appended.\n4. `branch_summary` entries are converted to `branchSummary` messages.\n5. `custom_message` entries are converted to `custom` messages.\n\nThose custom roles are then transformed into LLM-facing messages in `convertToLlm()`: `compactionSummary` and `branchSummary` become user messages rendered through the static templates\n\n- `packages/agent/src/compaction/prompts/compaction-summary-context.md`\n- `packages/agent/src/compaction/prompts/branch-summary-context.md`\n\nwhile `custom` messages pass through as developer messages with their raw content (no template).\n\n## Compaction pipeline\n\n### Triggers\n\nCompaction/context maintenance can run in five ways:\n\n1. **Manual context compaction**: `/compact [instructions]` calls `AgentSession.compact(...)`.\n2. **Automatic overflow recovery**: after a same-model assistant error that matches context overflow.\n3. **Automatic incomplete-output recovery**: after a same-model assistant message ends with `stopReason === \"length\"` (OpenAI/Codex `response.incomplete`).\n4. **Automatic threshold maintenance**: after a successful turn when context exceeds the resolved threshold.\n5. **Idle maintenance**: `runIdleCompaction()` can invoke the same auto-maintenance path with reason `\"idle\"`.\n\n### Compaction shape (visual)\n\n```text\nBefore compaction:\n\n entry: 0 1 2 3 4 5 6 7 8 9\n ┌─────┬─────┬─────┬──────┬─────┬─────┬──────┬──────┬─────┬──────┐\n │ hdr │ usr │ ass │ tool │ usr │ ass │ tool │ tool │ ass │ tool │\n └─────┴─────┴─────┴──────┴─────┴─────┴──────┴──────┴─────┴──────┘\n └────────┬───────┘ └──────────────┬──────────────┘\n messagesToSummarize kept messages\n ↑\n firstKeptEntryId (entry 4)\n\nAfter compaction (new entry appended):\n\n entry: 0 1 2 3 4 5 6 7 8 9 10\n ┌─────┬─────┬─────┬──────┬─────┬─────┬──────┬──────┬─────┬──────┬─────┐\n │ hdr │ usr │ ass │ tool │ usr │ ass │ tool │ tool │ ass │ tool │ cmp │\n └─────┴─────┴─────┴──────┴─────┴─────┴──────┴──────┴─────┴──────┴─────┘\n └──────────┬──────┘ └──────────────────────┬───────────────────┘\n not sent to LLM sent to LLM\n ↑\n starts from firstKeptEntryId\n\nWhat the LLM sees:\n\n ┌────────┬─────────┬─────┬─────┬──────┬──────┬─────┬──────┐\n │ system │ summary │ usr │ ass │ tool │ tool │ ass │ tool │\n └────────┴─────────┴─────┴─────┴──────┴──────┴─────┴──────┘\n ↑ ↑ └─────────────────┬────────────────┘\n prompt from cmp messages from firstKeptEntryId\n```\n\n### Overflow/incomplete recovery vs threshold/idle maintenance\n\nThe automatic paths are intentionally different:\n\n- **Overflow recovery**\n - Trigger: current-model assistant error is detected as context overflow and the error is not older than the latest compaction.\n - The failing assistant error message is removed from active agent state before retry.\n - Context promotion is tried first; if a configured larger model is available, the agent switches model and retries without compacting.\n - If promotion is unavailable and compaction is enabled, context-full compaction runs with `reason: \"overflow\"` and `willRetry: true`; handoff strategy is not used for overflow because the handoff request would reuse the overflowing input.\n - On success, `agent.continue()` is scheduled to retry the turn.\n\n- **Incomplete-output recovery**\n - Trigger: same-model assistant message ends with `stopReason === \"length\"` and the message is not older than the latest compaction.\n - The incomplete assistant message is removed from active agent state before recovery.\n - Context promotion is tried first.\n - If promotion is unavailable and compaction is enabled, auto maintenance runs with `reason: \"incomplete\"` and `willRetry: true`.\n - Unlike overflow, `compaction.strategy: \"handoff\"` is allowed for incomplete-output recovery because the input context is still usable.\n - On context-full success, `agent.continue()` is scheduled to retry the turn.\n\n- **Threshold maintenance**\n - Trigger: successful, non-error assistant message whose adjusted context tokens exceed `resolveThresholdTokens(...)`.\n - Tool-output pruning can reduce the measured token count before threshold comparison.\n - Context promotion is tried before compaction.\n - If promotion is unavailable, auto maintenance runs with `reason: \"threshold\"` and `willRetry: false`.\n - With `compaction.strategy: \"handoff\"`, threshold maintenance normally schedules a post-prompt auto-handoff task instead of writing a compaction entry; pre-prompt checks run it inline to avoid racing the next turn. If handoff returns no document without aborting, it falls back to context-full compaction.\n - On success, if `compaction.autoContinue !== false`, schedules an agent-authored developer auto-continue prompt from `prompts/system/auto-continue.md`.\n\n- **Idle maintenance**\n - Trigger: `runIdleCompaction()` when not streaming or already compacting.\n - Uses `reason: \"idle\"` and does not auto-continue afterward.\n\n### Snapcompact strategy\n\n`compaction.strategy: \"snapcompact\"` replaces the LLM summarization call with a local, deterministic archival pass (`compact` from `@oh-my-pi/snapcompact`):\n\n- The discarded history is serialized, whitespace-collapsed, and printed onto model-aware PNG frames (frame width fixed per shape; frame height hugs the rows actually printed) using bundled public-domain pixel fonts. The shape — and frame size — resolve from the **model id** when the model line was measured: Claude reads X.org `6x12` glyphs with dimmed stopwords (`6x12-dim`; high-res lines — Opus 4.7+, Fable, Mythos — get 1932px frames under Anthropic's 4,784 visual-token cap, older lines stay at 1568px), Gemini reads two word-wrapped columns of `8x13` glyphs with sentence-hue ink and dimmed stopwords (`doc-8on16-sent-dim` at 2048px — Gemini 3.x bills a fixed 1,120-token budget per image at any pixel size), GPT/Kimi/GLM read `8x13` glyphs on a 16px pitch (`8on16-bw` at 1568px — patch billing is area-proportional, and kimi's processor downscales past 1792px). A Claude routed through Vertex or OpenRouter keeps its Claude shape. Unmeasured models fall back to their wire API family (Anthropic-family/unknown → `6x12-dim`, Google → `doc-8on16-sent-dim`, OpenAI-compatible → `8on16-bw`); billing (per-family patch/budget formulas, OpenAI's `detail: \"original\"` hint) always follows the API carrying the request, computed for the resolved frame size. The `snapcompact.shape` setting (default `auto`) forces one of the research-eval variants instead: square grids (`8x8r`/`8x8u`/`6x6u`/`5x8` × sentence-hue/black ink) or the per-model eval winners (`6x12-dim`, `8x13-bw`, `8on16-bw`, and the two-column word-wrapped `doc-8on16-bw`/`-sent`/`-sent-dim`, where `dim` prints stopwords in gray). A forced variant keeps its geometry but is re-priced for the target provider's image billing. The same setting governs inline system-prompt/tool-result imaging (`snapcompact.systemPrompt`, `snapcompact.toolResults`).\n- Serialization keeps the archive conversation-dense: tool results are truncated head+tail (default 2,000 chars at a 0.6 head ratio), tool-call argument values are capped per value (500) and per call (2,000), and tool output is printed in dim gray ink so conversation reads louder than tool noise. All budgets and the dimming are configurable via `SerializeOptions` (`toolResultMaxChars`, `toolArgMaxChars`, `toolCallMaxChars`, `truncateHeadRatio`, `dimToolResults`).\n- Frames persist under `CompactionEntry.preserveData.snapcompact` and are re-attached to the `compactionSummary` message as image blocks on every context rebuild; the entry's `summary` is a deterministic reading guide (grid geometry, role tags, truncation notes) plus the usual file-operation lists.\n- Later compactions carry earlier frames forward. The frame budget is provider-aware (`providerFrameBudget`): the per-provider image cap clamped to 8 (`MAX_FRAMES`) — OpenRouter hard-caps requests at 8 images and silently drops the excess, unknown providers get a safe floor of 5. Beyond the budget the archive fades from the middle out: the earliest frame (session head — the original request, or the filmed summary of older history) is pinned, and the oldest *unpinned* frames are evicted. Pages of the *current* compaction that no longer fit are never rendered or dropped — the newest unframed slice survives verbatim as a text tail on the summary (`Archive.textTail`, capped at two frame capacities with middle elision) and is folded back into frames by the next compaction. If the previous compaction was text-based, its summary is printed at the head of the frame archive as `[Summary of earlier history]`.\n- No model, API key, or network is involved, so snapcompact is also safe for overflow recovery. It requires a vision-capable current model (`model.input` includes `\"image\"`); otherwise the run falls back to context-full and emits a warning notice (auto and manual paths). Manual `/compact` honors the strategy unless custom instructions are given (those imply a directed LLM summary).\n- Rationale: the shape table comes from the snapcompact 200k-token evals in `packages/snapcompact`, where bitmap frames preserved QA recall at lower billed-token cost than raw text for vision-capable models.\n\n### Display transcript\n\nCompaction no longer visually restarts the conversation. The TUI renders the **display transcript** (`buildSessionContext({ transcript: true })` / `AgentSession.buildTranscriptSessionContext()`): every path entry in chronological order, with each compaction shown inline as a slim divider — `── 📷 compacted · ctrl+o ──` — at the point it fired. Expanding (ctrl+o) reveals the summary. Only the LLM context resets at the compaction boundary; the scrollback above the divider stays intact, including across session resume.\n\n### Pre-compaction pruning\n\nBefore compaction checks, tool-result pruning may run (`pruneToolOutputs`).\n\nDefault prune policy:\n\n- Protect newest `40_000` tool-output tokens.\n- Require at least `20_000` total estimated savings.\n- Never blank a result below `50` tokens (`MIN_PRUNE_TOKENS`): the `[Output truncated - N tokens]` placeholder costs ~8 tokens, so pruning a sub-floor result would grow the context and churn the prompt cache for nothing. (Superseded and useless results keep their own rules — the useless collector already drops no-savings candidates; superseded reads prune for correctness regardless of size.)\n- Never prune `skill` tool results, `read` results of `skill://` paths, or reads of the active plan reference file (added via `AgentSession`'s plan protection).\n\nPruned tool results are replaced with:\n\n- `[Output truncated - N tokens]`\n\nIf pruning changes entries, session storage is rewritten and agent message state is refreshed before compaction decisions.\n\n### Useless-result elision\n\nTools can flag a finished result as contextually useless — a search with zero matches, a `job` poll that timed out with everything still running, an empty `irc` inbox drain. The flag originates on the tool result (`AgentToolResult.useless`, set via `ToolResultBuilder.useless()` or directly on the returned object), is copied by the agent loop onto the persisted `ToolResultMessage` (never together with `isError` — errors always win), and is consumed in three places:\n\n- **Per-turn stale-result pass** (`pruneSupersededToolResults`, gated by `compaction.dropUseless`, default on): flagged results are blanked to the exact placeholder `[Uneventful result elided]` (`USELESS_NOTICE`) with the same cache-aware timing as superseded reads — only when the suffix after the candidate is small (≤ ~8k tokens) or the session has idled past the provider prompt-cache lifetime. Results smaller than the notice itself are never blanked (no savings), and protected tools are exempt.\n- **Threshold prune** (`pruneToolOutputs`): flagged results bypass the protect-recent window, same as superseded reads, and receive `USELESS_NOTICE` instead of the token-count placeholder.\n- **Summary serialization**: `serializeConversation` (agent and snapcompact) drops the whole tool call/result pair from summarizer/archive input — the source region is discarded after summarization anyway, so the exclusion costs no cache.\n\nThe flag never reaches provider wire formats, and flagged pairs are never removed from history (only blanked in place), so tool-call/result pairing and provider-native history replay stay intact.\n\n### Boundary and cut-point logic\n\n`prepareCompaction()` only considers entries since the last compaction entry (if any).\n\n1. Find previous compaction index.\n2. Compute `boundaryStart = prevCompactionIndex + 1`.\n3. Adapt `keepRecentTokens` using measured usage ratio when available.\n4. Run `findCutPoint()` over the boundary window.\n\nValid cut points include:\n\n- message entries with roles: `user`, `assistant`, `bashExecution`, `hookMessage`, `branchSummary`, `compactionSummary`\n- `custom_message` entries\n- `branch_summary` entries\n\nHard rule: never cut at `toolResult`.\n\nIf there are non-message metadata entries immediately before the cut point (`model_change`, `thinking_level_change`, labels, etc.), they are pulled into the kept region by moving cut index backward until a message or compaction boundary is hit.\n\n### Split-turn handling\n\nIf cut point is not at a user-turn start, compaction treats it as a split turn.\n\nTurn start detection treats these as user-turn boundaries:\n\n- `message.role === \"user\"`\n- `message.role === \"bashExecution\"`\n- `custom_message` entry\n- `branch_summary` entry\n\nSplit-turn compaction generates two summaries:\n\n1. History summary (`messagesToSummarize`)\n2. Turn-prefix summary (`turnPrefixMessages`)\n\nFinal stored summary is merged as:\n\n```markdown\n<history summary>\n\n---\n\n**Turn Context (split turn):**\n\n<turn prefix summary>\n```\n\n### Summary generation\n\n`compact(...)` builds summaries from serialized conversation text:\n\n1. Convert messages via `convertToLlm()`.\n2. Serialize with `serializeConversation()`.\n3. Wrap in `<conversation>...</conversation>`.\n4. Optionally include `<previous-summary>...</previous-summary>`.\n5. Optionally inject extension hook context and active memory-backend compaction context as `<additional-context>` entries.\n6. Execute summarization prompt with `SUMMARIZATION_SYSTEM_PROMPT`.\n\nPrompt selection:\n\n- first compaction: `compaction-summary.md`\n- iterative compaction with prior summary: `compaction-update-summary.md`\n- split-turn second pass: `compaction-turn-prefix.md`\n- short UI summary: `compaction-short-summary.md`\n- handoff document: `handoff-document.md` (used by `generateHandoff(...)`, not serialized compaction)\n\nRemote summarization modes:\n\n- If `compaction.remoteEndpoint` is set and remote compaction is enabled, local summary generation POSTs:\n - `{ systemPrompt, prompt }`\n- Expects JSON containing at least `{ summary }`.\n- For OpenAI/OpenAI Codex models, compaction first tries the provider-native `/responses/compact` endpoint when remote compaction is enabled. It preserves provider replacement history in `preserveData.openaiRemoteCompaction` and falls back to local summarization if that native request fails.\n\n### Handoff generation\n\n`packages/agent/src/compaction/compaction.ts` also exports `generateHandoff(...)`. Handoff generation uses the same `completeSimple(...)` oneshot style as summarization, but it preserves the live agent cache prefix by sending the active system prompt, tool array, and real LLM message history, then appending one agent-attributed `user` message containing the handoff prompt. It forces `toolChoice: \"none\"` and returns joined text blocks directly.\n\nHandoff does not write a `CompactionEntry`. `AgentSession.handoff()` owns the session transition: it starts a new session, injects the generated document as a visible `custom_message` with `customType: \"handoff\"`, and rebuilds agent messages from that new session.\n\n### File-operation context in summaries\n\nCompaction tracks cumulative file activity using assistant tool calls:\n\n- `read(path)` → read set\n- `write(path)` → modified set\n- `edit(path)` → modified set\n\nCumulative behavior:\n\n- Includes prior compaction details only when prior entry is pi-generated (`fromExtension !== true`).\n- In split turns, includes turn-prefix file ops too.\n- `details.readFiles` excludes files also modified; `details.modifiedFiles` carries the rest (persisted shape is unchanged).\n\nSummary text gets one `<files>` tag appended via prompt template: a grouped, prefix-folded directory tree (find-tool shape) with a per-file access marker — `(Read)` for read-only files, `(Write)` for modified files never read, `(RW)` for modified files also present in the cumulative read set. Capped at 20 files with an `… (N more files omitted)` line.\n\n```xml\n<files>\n# packages/agent/src/compaction/\ncompaction.ts (Read)\nutils.ts (RW)\n## prompts/\nfile-operations.md (Write)\n</files>\n```\n\nLegacy `<read-files>`/`<modified-files>` tags from summaries written by earlier versions are stripped (alongside `<files>`) before re-appending, so old summaries self-heal on the next compaction.\n\n### Persist and reload\n\nAfter summary generation (or hook-provided summary), agent session:\n\n1. Appends `CompactionEntry` with `appendCompaction(...)` for context-full maintenance; handoff strategy creates a new session and injects a handoff `custom_message` instead.\n2. Rebuilds display context from the active leaf via `buildDisplaySessionContext()`.\n3. Replaces live agent messages with rebuilt context.\n4. Synchronizes active todo phases from the rebuilt branch and closes provider sessions whose history was rewritten.\n5. Emits `session_compact` hook event.\n\n## Branch summarization pipeline\n\nBranch summarization is tied to tree navigation, not token overflow.\n\n### Trigger\n\nDuring `navigateTree(...)`:\n\n1. Compute abandoned entries from old leaf to common ancestor using `collectEntriesForBranchSummary(...)`.\n2. If caller requested summary (`options.summarize`), generate summary before switching leaf.\n3. If summary exists, attach it at the navigation target using `branchWithSummary(...)`.\n\nOperationally this is commonly driven by `/tree` flow when `branchSummary.enabled` is enabled.\n\n### Branch switch shape (visual)\n\n```text\nTree before navigation:\n\n ┌─ B ─ C ─ D (old leaf, being abandoned)\n A ───┤\n └─ E ─ F (target)\n\nCommon ancestor: A\nEntries to summarize: B, C, D\n\nAfter navigation with summary:\n\n ┌─ B ─ C ─ D ─ [summary of B,C,D]\n A ───┤\n └─ E ─ F (new leaf)\n```\n\n### Preparation and token budget\n\n`generateBranchSummary(...)` computes budget as:\n\n- `tokenBudget = model.contextWindow - branchSummary.reserveTokens`\n\n`prepareBranchEntries(...)` then:\n\n1. First pass: collect cumulative file ops from all summarized entries, including prior pi-generated `branch_summary` details.\n2. Second pass: walk newest → oldest, adding messages until token budget is reached.\n3. Prefer preserving recent context.\n4. May still include large summary entries near budget edge for continuity.\n\nCompaction entries are included as messages (`compactionSummary`) during branch summarization input.\n\n### Summary generation and persistence\n\nBranch summarization:\n\n1. Converts and serializes selected messages.\n2. Wraps in `<conversation>`.\n3. Uses custom instructions if supplied, otherwise `branch-summary.md`.\n4. Calls summarization model with `SUMMARIZATION_SYSTEM_PROMPT`.\n5. Prepends `branch-summary-preamble.md`.\n6. Appends file-operation tags.\n\nResult is stored as `BranchSummaryEntry` with optional details (`readFiles`, `modifiedFiles`).\n\n## Extension and hook touchpoints\n\n### `session_before_compact`\n\nPre-compaction hook.\n\nCan:\n\n- cancel compaction (`{ cancel: true }`)\n- provide full custom compaction payload (`{ compaction: CompactionResult }`)\n\n### `session.compacting`\n\nPrompt/context customization hook for default compaction.\n\nCan return:\n\n- `prompt` (override base summary prompt)\n- `context` (extra context lines injected into `<additional-context>`)\n- `preserveData` (stored on compaction entry)\n\n### `session_compact`\n\nPost-compaction notification with saved `compactionEntry` and `fromExtension` flag.\n\n### `session_before_tree`\n\nRuns on tree navigation before default branch summary generation.\n\nCan:\n\n- cancel navigation\n- provide custom `{ summary: { summary, details } }` used when user requested summarization\n\n### `session_tree`\n\nPost-navigation event exposing new/old leaf and optional summary entry.\n\n## Runtime behavior and failure semantics\n\n- Manual compaction aborts current agent operation first.\n- `abortCompaction()` cancels manual compaction, auto-compaction, and handoff generation controllers.\n- Auto compaction emits start/end session events for UI/state updates.\n- Auto compaction can try multiple model candidates and retry transient failures; long retry delays prefer the next candidate when one is available.\n- Overflow errors are excluded from generic retry path because they are handled by context promotion/compaction.\n- If auto-compaction fails:\n - overflow path emits `Context overflow recovery failed: ...`\n - incomplete-output path emits `Incomplete response recovery failed: ...`\n - threshold/idle paths emit `Auto-compaction failed: ...`\n- Branch summarization can be cancelled via abort signal (e.g., Escape), returning canceled/aborted navigation result.\n\n## Settings and defaults\n\nFrom `settings-schema.ts`:\n\n- `compaction.enabled` = `true`\n- `compaction.strategy` = `\"context-full\"` (`\"handoff\"`, `\"shake\"`, `\"snapcompact\"`, and `\"off\"` are also supported)\n- `compaction.reserveTokens` = `16384`\n- `compaction.keepRecentTokens` = `20000`\n- `compaction.autoContinue` = `true`\n- `compaction.remoteEnabled` = `true`\n- `compaction.remoteEndpoint` = `undefined`\n- `compaction.thresholdPercent` = `-1` and `compaction.thresholdTokens` = `-1`; when no positive override is set, the threshold is `contextWindow - max(15% of contextWindow, reserveTokens)`\n- `compaction.idleEnabled` = `false`\n- `compaction.idleThresholdTokens` = `200000`\n- `compaction.idleTimeoutSeconds` = `300`\n- `branchSummary.enabled` = `false`\n- `branchSummary.reserveTokens` = `16384`\n\nThese values are consumed at runtime by `AgentSession` and compaction/branch summarization modules.\n",
|
|
16
16
|
"config-usage.md": "# Configuration Discovery and Resolution\n\nThis document describes how the coding-agent resolves configuration today: which roots are scanned, how precedence works, and how resolved config is consumed by settings, skills, hooks, tools, and extensions.\n\n## Scope\n\nPrimary implementation:\n\n- `packages/coding-agent/src/config.ts`\n- `packages/coding-agent/src/config/settings.ts`\n- `packages/coding-agent/src/config/settings-schema.ts`\n- `packages/coding-agent/src/discovery/builtin.ts`\n- `packages/coding-agent/src/discovery/helpers.ts`\n\nKey integration points:\n\n- `packages/coding-agent/src/capability/index.ts`\n- `packages/coding-agent/src/discovery/index.ts`\n- `packages/coding-agent/src/extensibility/skills.ts`\n- `packages/coding-agent/src/extensibility/hooks/loader.ts`\n- `packages/coding-agent/src/extensibility/custom-tools/loader.ts`\n- `packages/coding-agent/src/extensibility/extensions/loader.ts`\n\n---\n\n## Resolution flow (visual)\n\n```text\n Generic helper order (`config.ts`)\n┌───────────────────────────────────────┐\n│ 1) ~/.omp/agent, ~/.claude, ... │\n│ 2) <cwd>/.omp, <cwd>/.claude, ... │\n└───────────────────────────────────────┘\n │\n ▼\n capability providers enumerate items\n (native provider scans project .omp before user .omp;\n other providers have their own loading rules)\n │\n ▼\n provider priority sort + capability dedup\n │\n ▼\n subsystem-specific consumption\n (settings, skills, hooks, tools, extensions)\n```\n\n## 1) Config roots and source order\n\n## Canonical roots\n\n`src/config.ts` defines a fixed source priority list:\n\n1. `.omp` (native)\n2. `.claude`\n3. `.codex`\n4. `.gemini`\n\nUser-level bases:\n\n- `~/.omp/agent`\n- `~/.claude`\n- `~/.codex`\n- `~/.gemini`\n\nProject-level bases:\n\n- `<cwd>/.omp`\n- `<cwd>/.claude`\n- `<cwd>/.codex`\n- `<cwd>/.gemini`\n\n`CONFIG_DIR_NAME` is `.omp` (`packages/utils/src/dirs.ts`).\n\n## Profiles\n\nA named profile (`omp --profile <name>`, the `--alias` shortcut, or `OMP_PROFILE` / `PI_PROFILE`) relocates the OMP user base. When a profile is active, every OMP-native user-level path written here as `~/.omp/agent/...` resolves to `~/.omp/profiles/<name>/agent/...` instead.\n\nThe relocation is uniform across the native provider (`builtin.ts`) and the generic `config.ts` helpers, so it covers slash commands, rules, prompts, instructions, hooks, tools, extensions, settings, skills, and MCP, plus the top-level `SYSTEM.md` / `RULES.md` / `AGENTS.md` files and runtime state (sessions, blobs, `agent.db`). A profile sees only its own OMP config, never the default profile's `~/.omp/agent`.\n\nThe other source bases are not profile-scoped and load identically under every profile: the external-tool bases (`~/.claude`, `~/.codex`, `~/.gemini`) belong to those tools, and the project-level bases (`<cwd>/.omp`, `<cwd>/.claude`, ...) are keyed to the working directory. Throughout this document, read `~/.omp/agent` as shorthand for the active profile's agent directory.\n\n## Important constraint\n\nThe generic helpers in `src/config.ts` do **not** include `.pi` in source discovery order.\n\n---\n\n## 2) Core discovery helpers (`src/config.ts`)\n\n## `getConfigDirs(subpath, options)`\n\nReturns ordered entries:\n\n- User-level entries first (by source priority)\n- Then project-level entries (by same source priority)\n\nOptions:\n\n- `user` (default `true`)\n- `project` (default `true`)\n- `cwd` (default `getProjectDir()`)\n- `existingOnly` (default `false`)\n\nThis API is used for directory-based config lookups (commands, hooks, tools, agents, etc.).\n\n## `findConfigFile(subpath, options)` / `findConfigFileWithMeta(...)`\n\nSearches for the first existing file across ordered bases, returns first match (path-only or path+metadata).\n\n## `findAllNearestProjectConfigDirs(subpath, cwd)`\n\nWalks parent directories upward and returns the **nearest existing directory per source base** (`.omp`, `.claude`, `.codex`, `.gemini`), then sorts results by source priority.\n\nUse this when project config should be inherited from ancestor directories (monorepo/nested workspace behavior).\n\n---\n\n## 3) File config wrapper (`ConfigFile<T>` in `src/config.ts`)\n\n`ConfigFile<T>` is the schema-validated loader for single config files.\n\nSupported formats:\n\n- `.yml` / `.yaml`\n- `.json` / `.jsonc`\n\nBehavior:\n\n- Validates parsed data against a provided Zod schema.\n- Caches load result until `invalidate()`.\n- Returns tri-state result via `tryLoad()`:\n - `ok`\n - `not-found`\n - `error` (`ConfigError` with schema/parse context)\n\nLegacy migration still supported:\n\n- If target path is `.yml`/`.yaml`, a sibling `.json` is auto-migrated once (`migrateJsonToYml`).\n\n---\n\n## 4) Settings resolution model (`src/config/settings.ts`)\n\nThe runtime settings model is layered:\n\n1. Global settings: `~/.omp/agent/config.yml`\n2. Project settings: discovered via settings capability (`settings.json` and `config.yml` from providers)\n3. CLI config overlays: `omp --config <path>` / repeated `--config` files, loaded as `config.yml`-style YAML for this process only\n4. Runtime overrides: in-memory, non-persistent\n5. Schema defaults: from `SETTINGS_SCHEMA`\n\nEffective precedence:\n\n`defaults <- global <- project <- CLI config overlays <- overrides`\n\nWrite behavior:\n\n- `settings.set(...)` writes to the **global** layer (`config.yml`) and queues background save.\n- Project settings are read-only from capability discovery.\n\n## Migration behavior still active\n\nOn startup, if `config.yml` is missing:\n\n1. Migrate from `~/.omp/agent/settings.json` (renamed to `.bak` on success)\n2. Merge with legacy DB settings from `agent.db`\n3. Write merged result to `config.yml`\n\nField-level migrations in `#migrateRawSettings`:\n\n- `queueMode` -> `steeringMode`\n- `ask.timeout` milliseconds -> seconds when old value looks like ms (`> 1000`)\n- Legacy flat `theme: \"...\"` -> `theme.dark/theme.light` structure\n\n---\n\n## 5) Capability/discovery integration\n\nMost non-core config loading flows through the capability registry (`src/capability/index.ts` + `src/discovery/index.ts`).\n\n## Provider ordering\n\nProviders are sorted by numeric priority (higher first). Example priorities:\n\n- Native OMP (`builtin.ts`): `100`\n- Claude: `80`\n- Codex / agents / Claude marketplace: `70`\n- Gemini: `60`\n\n```text\nProvider precedence (higher wins)\n\nnative (.omp) priority 100\nclaude priority 80\ncodex / agents / ... priority 70\ngemini priority 60\n```\n\n## Dedup semantics\n\nCapabilities define a `key(item)`:\n\n- same key => first item wins (higher-priority/earlier-loaded item)\n- no key (`undefined`) => no dedup, all items retained\n\nRelevant keys:\n\n- skills: `name`\n- tools: `name`\n- hooks: `${type}:${tool}:${name}`\n- extension modules: `name`\n- extensions: `name`\n- settings: no dedup (all items preserved)\n\n---\n\n## 6) Native `.omp` provider behavior (`packages/coding-agent/src/discovery/builtin.ts`)\n\nNative provider (`id: native`) reads native config from:\n\n- project: `<cwd>/.omp/...`\n- user: `~/.omp/agent/...`\n\n### Directory admission rules\n\n- Slash commands, rules, prompts, instructions, hooks, tools, extensions, extension modules, and settings use a project/user root only when the root directory exists and is non-empty.\n- Skills scan `<ancestor>/.omp/skills` for each ancestor from the current working directory up to the repo root/home boundary, plus `~/.omp/agent/skills`, without requiring the root `.omp` directory itself to be non-empty.\n- `SYSTEM.md` and `AGENTS.md` read user-level files directly and use nearest-ancestor project `.omp` lookup for project files, but the project `.omp` directory must be non-empty. See [`docs/system-prompt-customization.md`](./system-prompt-customization.md) for the full `SYSTEM.md` / `APPEND_SYSTEM.md` contract (replace vs. append, templating).\n\n### Scope-specific loading\n\n- Skills: `<ancestor>/.omp/skills/*/SKILL.md` and `~/.omp/agent/skills/*/SKILL.md`\n- Slash commands: `commands/*.md`\n- Rules: `rules/*.{md,mdc}`\n- Prompts: `prompts/*.md`\n- Instructions: `instructions/*.md`\n- Hooks: `hooks/pre/*`, `hooks/post/*`\n- Tools: `tools/*.{json,md,ts,js,sh,bash,py}` and `tools/<name>/index.ts`\n- Extension modules: discovered under `extensions/` (+ legacy `settings.json.extensions` string array)\n- Extensions: `extensions/<name>/gemini-extension.json`\n- Settings capability: `settings.json`, then `config.yml`\n\n### Nearest-project lookup nuance\n\n## For `SYSTEM.md` and `AGENTS.md`, native provider uses nearest-ancestor project `.omp` directory search (walk-up) and still requires the project `.omp` dir to be non-empty.\n\n## 7) How major subsystems consume config\n\n## Settings subsystem\n\n- `Settings.init()` loads global `config.yml` + discovered project settings capability items.\n- Only capability items with `level === \"project\"` are merged into project layer.\n\n### Session title prompt override\n\nCreate `TITLE_SYSTEM.md` in the same config locations as `SYSTEM.md` / `APPEND_SYSTEM.md`:\n\n```text\n# ~/.omp/agent/TITLE_SYSTEM.md\nGenerate a session name using lowercase `<type>:<primary-objective>`.\n```\n\n- Missing `TITLE_SYSTEM.md` keeps the bundled title prompts.\n- Discovery uses the same project-then-user config directory pattern as `SYSTEM.md`: project `.omp/TITLE_SYSTEM.md` first, then user `~/.omp/agent/TITLE_SYSTEM.md` and the other supported config bases.\n- The override replaces only the automatic session-title generation system prompt; normal `SYSTEM.md` / `APPEND_SYSTEM.md` prompt customization is unaffected.\n- The online path forces the `set_title` tool call when the title model honors a forced `tool_choice`. Tool-choice-less providers (chat-completions hosts without `tool_choice` support, Claude Fable/Mythos) instead receive a marker-based prompt and emit the title wrapped in `<title>...</title>`, which is parsed leniently (a plain sentence or a truncated/unclosed tag still works). A `TITLE_SYSTEM.md` override is reused in both modes; in marker mode the wrap-in-`<title>` instruction is appended after it. The local tiny-title path keeps the `<title>...</title>` prefill/stop wrapper and uses this file as its system turn.\n\n## Skills subsystem\n\n- `extensibility/skills.ts` loads via `loadCapability(skillCapability.id, { cwd })`.\n- Applies source toggles and filters (`ignoredSkills`, `includeSkills`, custom dirs).\n- Legacy-named toggles still exist (`skills.enablePiUser`, `skills.enablePiProject`) but they gate the native provider (`provider === \"native\"`).\n\n## Hooks subsystem\n\n- `discoverAndLoadHooks()` resolves hook paths from hook capability + explicit configured paths.\n- Then loads modules via Bun import.\n\n## Tools subsystem\n\n- `discoverAndLoadCustomTools()` resolves tool paths from tool capability + plugin tool paths + explicit configured paths.\n- Declarative `.md/.json` tool files are metadata only; executable loading expects code modules.\n\n## Extensions subsystem\n\n- `discoverAndLoadExtensions()` resolves extension modules from extension-module capability plus explicit paths.\n- Current implementation intentionally keeps only capability items with `_source.provider === \"native\"` before loading.\n\n---\n\n## 8) Precedence rules to rely on\n\nUse this mental model:\n\n1. Source directory ordering from `config.ts` determines candidate path order.\n2. Capability provider priority determines cross-provider precedence.\n3. Capability key dedup determines collision behavior (first wins for keyed capabilities).\n4. Subsystem-specific merge logic can further change effective precedence (especially settings).\n\n### Settings-specific caveat\n\nSettings capability items are not deduplicated; `Settings.#loadProjectSettings()` deep-merges project items in returned order. Because merge applies later item values over earlier values, effective override behavior depends on provider emission order, not just capability key semantics.\n\n---\n\n## 9) Legacy/compatibility behaviors still present\n\n- `ConfigFile` JSON -> YAML migration for YAML-targeted files.\n- Settings migration from `settings.json` and `agent.db` to `config.yml`.\n- Settings key migrations include `queueMode`, `ask.timeout`, flat `theme`, `task.isolation.enabled`, legacy `task.isolation.mode` values, removed edit modes, `statusLine.plan_mode`, `memories.enabled`, and hindsight scoping/name fields.\n- Legacy setting names `skills.enablePiUser` / `skills.enablePiProject` are still active gates for native skill source.\n\nIf these compatibility paths are removed in code, update this document immediately; several runtime behaviors still depend on them today.\n",
|
|
17
|
-
"context-files.md": "# Context files\n\nContext files are Markdown instruction files that `omp` discovers automatically before a session starts and injects into the agent's project context. Use them for repository conventions, architecture notes, test and review expectations, and instructions that should travel with a user account or a project.\n\nYou never have to ask the agent to go read `AGENTS.md`, `CLAUDE.md`, `GEMINI.md`, or similar files — the relevant ones are already discovered, loaded, and placed in context when the session begins.\n\n## How context files relate to other concepts\n\nFour similarly named things behave differently. Keep them straight:\n\n- **Context files** are read as plain Markdown and shown to the agent inside a `<context>` block. They are advisory background that stays in the session's opening context.\n- **Sticky rules** come from a top-level `RULES.md`. They are converted into an always-apply rule that is re-attached near the current turn, so they keep their hold even after the visible conversation grows. See \"Sticky rules vs normal context\" below.\n- **Discovery providers** are the config-source adapters (`native`, `claude`, `codex`, `gemini`, `opencode`, `github`, `agents`, `agents-md`) that know where each tool keeps its files. The same provider that contributes context files may also contribute MCP servers, slash commands, skills, hooks, tools, prompts, and settings.\n- **Model providers** are inference backends such as `anthropic`, `openai`, `google`, `groq`, `ollama`, and `openrouter`. They have nothing to do with context files except that both kinds of id share the one `disabledProviders` list — see \"Disabling discovery providers\" below and [Providers](./providers.md).\n\nAuthoring **skills** and **rule** files (as opposed to the sticky `RULES.md`) is covered in [Skills](./skills.md). Customizing the system prompt with `SYSTEM.md` is covered in [System prompt customization](./system-prompt-customization.md).\n\n## Native `.omp` files\n\nThe native provider is the recommended format for new projects. It reads from your user agent directory and from `.omp/` directories inside a project, and it has the highest discovery priority, so its files win over every other convention at the same scope.\n\n| File | Scope | Behavior |\n|---|---|---|\n| `~/.omp/agent/AGENTS.md` | User | User-level context for every session unless the `native` provider is disabled. |\n| `<ancestor>/.omp/AGENTS.md` | Project | Project context. `omp` walks upward from the current directory to the repository root and uses the **nearest** non-empty `.omp/AGENTS.md`. Farther native project files are not also included. |\n| `~/.omp/agent/RULES.md` | User | User-level sticky rule content. Loaded as an always-apply rule, not as a context file. |\n| `<ancestor>/.omp/RULES.md` | Project | Project sticky rule content. Same nearest-ancestor walk-up as above. Loaded as an always-apply rule. |\n\nTwo details matter:\n\n- **Walk-up to the repository root.** Discovery starts in the current working directory and climbs through each ancestor up to the repository root, stopping at the first ancestor that has a usable `.omp/` directory. The *nearest* match wins; ancestors above it are not loaded as native context.\n- **The `.omp/` directory must be non-empty.** An empty `.omp/` directory is skipped during the walk-up, so the search continues to the next ancestor. An empty `AGENTS.md` or `RULES.md` file contributes nothing.\n\n`~/.omp/agent` is the user base. If `PI_CODING_AGENT_DIR` is set, it relocates that base, so the user files become `$PI_CODING_AGENT_DIR/AGENTS.md` and `$PI_CODING_AGENT_DIR/RULES.md`.\n\n### Monorepo example\n\n```text\nrepo/\n .omp/\n AGENTS.md\n RULES.md\n packages/api/\n .omp/\n AGENTS.md\n```\n\nStarting a session in `repo/packages/api`:\n\n- The native context file is `repo/packages/api/.omp/AGENTS.md` (the nearest one). `repo/.omp/AGENTS.md` is **not** also included.\n- The project sticky rule is `repo/packages/api/.omp/RULES.md` if present; otherwise the walk-up continues and `repo/.omp/RULES.md` is used.\n\nPut broad, durable project background in `AGENTS.md`. Reserve `RULES.md` for short, hard requirements that must stay visible across long conversations.\n\n## Other supported context conventions\n\n`omp` also discovers the context files of other agent tools so existing projects keep working without migration. The table lists context-file behavior only; each provider may also contribute other capabilities.\n\n| Provider id | Convention path | Scope | Notes |\n|---|---|---|---|\n| `native` | `.omp/AGENTS.md` | User + project | Recommended `omp` format. User file at `~/.omp/agent/AGENTS.md`; project file is the nearest non-empty `.omp/AGENTS.md` walking up to the repo root. |\n| `claude` | `.claude/CLAUDE.md` | User + project | User file `~/.claude/CLAUDE.md`; project file `<cwd>/.claude/CLAUDE.md` only (no ancestor walk-up). |\n| `codex` | `.codex/AGENTS.md` | User | User file `~/.codex/AGENTS.md` only. Project-level Codex context comes from a standalone `AGENTS.md` via the `agents-md` provider, not from `<cwd>/.codex/AGENTS.md`. |\n| `gemini` | `.gemini/GEMINI.md` | User + project | User file `~/.gemini/GEMINI.md`; project file `<cwd>/.gemini/GEMINI.md` only (no ancestor walk-up). |\n| `opencode` | `.config/opencode/AGENTS.md` | User | User file `~/.config/opencode/AGENTS.md` only. |\n| `github` | `.github/copilot-instructions.md` | Project | Project-only GitHub Copilot instructions from `<cwd>/.github/copilot-instructions.md`. |\n| `agents` | `.agent/AGENTS.md`, `.agents/AGENTS.md` | User + project | User files from `~/.agent/` and `~/.agents/`; project files discovered while walking up from the current directory to the repository root. |\n| `agents-md` | `AGENTS.md` | Project | Standalone (non-config-directory) `AGENTS.md` files, discovered by walking up from the current directory to the repository root (or home when no repo root is known). Files whose parent directory name starts with `.` are ignored — those belong to a config-directory provider instead. |\n\nProviders marked \"(no ancestor walk-up)\" only look in the current working directory's config directory. If you need ancestor walk-up behavior, prefer the native `.omp/AGENTS.md` format or a standalone `AGENTS.md` (the `agents-md` provider), or launch `omp` from the directory that holds the config directory.\n\n## Load order and shadowing\n\nWhen two providers describe the *same* scope, the higher-priority provider wins. Provider priorities:\n\n| Priority | Provider id |\n|---:|---|\n| 100 | `native` |\n| 80 | `claude` |\n| 70 | `agents`, `codex` |\n| 60 | `gemini` |\n| 55 | `opencode` |\n| 30 | `github` |\n| 10 | `agents-md` |\n\nDiscovered files are then deduplicated by scope:\n\n- **One user context file** is kept across all providers. Because `native` has the highest priority, `~/.omp/agent/AGENTS.md` shadows every other user-level context file.\n- **One project context file per directory depth.** Depth is measured from the current directory: the cwd is depth 0, its parent depth 1, and so on. Config subdirectories of an ancestor (`.claude/`, `.github/`, `.gemini/`, …) count as the same depth as that ancestor.\n- **At the same depth, the higher-priority provider shadows the rest.**\n- **Across depths, multiple files survive.** In a monorepo, an ancestor `AGENTS.md` and a package-level one are different depths and both load.\n- **Byte-identical files are collapsed.** If two surviving files have exactly the same content, only the copy closest to the cwd is kept.\n\nAfter deduplication, project files are sorted so **farther ancestors appear first** and files **closer to the cwd appear last**. Later files sit nearer the end of the context block, where they are most prominent.\n\n### Worked shadowing example\n\n```text\nrepo/\n AGENTS.md\n packages/api/\n AGENTS.md\n .github/copilot-instructions.md\n```\n\nStarting in `repo/packages/api`:\n\n- `repo/AGENTS.md` is found by `agents-md` at depth 2 and kept.\n- `repo/packages/api/AGENTS.md` (`agents-md`, priority 10) and `repo/packages/api/.github/copilot-instructions.md` (`github`, priority 30) both resolve to depth 0. GitHub's higher priority shadows the package-level standalone `AGENTS.md`, so the Copilot file wins at that depth.\n- The two kept files are ordered root-first, package-last, so `packages/api`'s file is the more prominent one.\n- If you add `repo/packages/api/.omp/AGENTS.md`, `native` (priority 100) wins depth 0 outright, shadowing both lower-priority files.\n\n## Injection behavior\n\nDiscovered context files are injected into the opening project prompt as a single `<context>` block, one `<file>` element per surviving file, in the sort order above:\n\n```xml\n<context>\nYou MUST follow the context files below for all tasks:\n<file path=\"/abs/path/to/repo/AGENTS.md\">\n...root content...\n</file>\n<file path=\"/abs/path/to/repo/packages/api/.github/copilot-instructions.md\">\n...package content...\n</file>\n</context>\n```\n\nThe agent sees each file's absolute path and its fully expanded Markdown content (with `@` imports already resolved — see below). Loading is automatic — there is no need to instruct the agent to search for `AGENTS.md`, `CLAUDE.md`, `GEMINI.md`, `.cursorrules`, or similar files during a session.\n\nDeeper-directory `AGENTS.md` files that were *not* auto-loaded (for example, ones below the current directory) are surfaced separately in a `<dir-context>` block that lists their paths and tells the agent to read them before editing those directories. Those files are pointers, not full injected content.\n\n## `@` imports\n\nInside any context file, an `@path` token expands inline to the referenced file's content before injection:\n\n```markdown\n# Project notes\n\nRead @docs/architecture.md before changing storage code.\nShared release steps live in @../RELEASE.md and personal aliases in @~/.notes/aliases.md.\n```\n\nThe exact rules:\n\n- **Relative paths resolve from the importing file's own directory**, not the session's working directory.\n- **`~/` and `~`** resolve from the user's home directory; absolute paths are used as-is.\n- **Tokens inside fenced code blocks and inline code spans are left untouched** — useful when you want to *write about* an `@token` without expanding it.\n- **`git@github.com:org/repo.git` and `user@example.com`-style tokens are not treated as imports.** A token only counts when the `@` sits at the start of a line or after a space or tab.\n- **Trailing sentence punctuation is trimmed** off the path (`. , ; : ! ? ) ] } \" '`), so `@docs/setup.md.` imports `docs/setup.md`.\n- **Imports recurse up to five hops.** An imported file may itself contain `@` imports, up to a total depth of five.\n- **Cycles are skipped.** A file already pulled into the current expansion tree is not re-expanded, so mutual imports terminate cleanly.\n- **A missing or unreadable target leaves the original `@token` text in place** rather than erroring.\n\n## Sticky rules vs normal context\n\nUse a normal context file (`AGENTS.md`, `CLAUDE.md`, `GEMINI.md`, `.github/copilot-instructions.md`, …) for the bulk of your guidance: repository overview, code style, build and test commands, review expectations, and local conventions. These load into the opening `<context>` block.\n\nUse a top-level **`RULES.md`** for the handful of hard requirements that must stay active even after a long conversation has pushed the opening context far up the transcript:\n\n```markdown\n# ~/.omp/agent/RULES.md\n\nNever commit or push unless the user explicitly asks.\nDo not edit generated files.\n```\n\n`RULES.md` is special:\n\n- It is read **only** at the native locations — `~/.omp/agent/RULES.md` and the nearest `<ancestor>/.omp/RULES.md` from the cwd up to the repo root. A `RULES.md` anywhere else is not a context-file convention and is ignored.\n- It is loaded as an **always-apply rule**, not as a context file, so it is re-attached near the current turn and keeps its hold across long sessions.\n- It is **always sticky**: frontmatter cannot make it non-sticky. If you want conditional or opt-in behavior, write a normal rule file instead (see [Skills](./skills.md)).\n\nKeep `RULES.md` short. Long background belongs in `AGENTS.md`, where it costs context budget only once.\n\n## Disabling discovery providers\n\nTurn a provider off with the `disabledProviders` setting in `~/.omp/agent/config.yml`, a project's `.omp/config.yml`, or a `--config` overlay:\n\n```yaml\n# .omp/config.yml\ndisabledProviders:\n - claude\n - github\n```\n\n`disabledProviders` is a **whole-provider switch with one shared id namespace**, used by two unrelated subsystems:\n\n| Id kind | Examples | Effect when listed |\n|---|---|---|\n| Discovery provider ids | `native`, `claude`, `codex`, `gemini`, `opencode`, `github`, `agents`, `agents-md` | The entire config source is removed — not just its context files, but also any MCP servers, slash commands, skills, hooks, tools, prompts, and settings it would have contributed. |\n| Model provider ids | `anthropic`, `openai`, `google`, `groq`, `ollama`, `openrouter` | The model backend is removed from selection even when its credentials are present. See [Providers](./providers.md). |\n\nIds are exact and the two namespaces do not collide by accident: `google` disables the Google model backend, while `gemini` disables the Gemini CLI discovery files. Disabling a discovery provider is heavier than it looks — disabling `claude`, for instance, also drops Claude-discovered MCP servers, commands, skills, hooks, tools, and settings, not only `CLAUDE.md`.\n\nOnly `enabledModels` and `disabledProviders` support **path-scoped** entries, so you can vary provider availability per subtree:\n\n```yaml\ndisabledProviders:\n - github # disabled everywhere\n - path: ~/work/legacy-claude\n providers:\n - claude # disabled only under this directory\n```\n\nA scoped entry applies when the cwd equals the configured path or sits beneath it; `~` expands to home. Bare string entries apply everywhere.\n\nRemember that higher-precedence settings layers **replace** array settings rather than appending to them. If your global config disables `claude` but a project config sets `disabledProviders: [github]`, then inside that project Claude discovery is re-enabled and only GitHub is disabled. See [Settings](./settings.md) for the full layer precedence, merge rules, and path-scoped array details.\n\n## Troubleshooting\n\n### A file is not loaded\n\n- Native project context must live at `.omp/AGENTS.md`, and the `.omp/` directory must be non-empty; an empty `.omp/` is skipped and the walk-up continues to the next ancestor.\n- A standalone `AGENTS.md` is handled by `agents-md`, not `native`.\n- `.claude/CLAUDE.md`, `.gemini/GEMINI.md`, and `.github/copilot-instructions.md` are read only from the current working directory's config directory — not from every ancestor.\n- `~/.codex/AGENTS.md` and `~/.config/opencode/AGENTS.md` are user-level only and have no project equivalent.\n- Empty files contribute nothing for the native and standalone providers.\n- A disabled discovery provider contributes nothing — check `disabledProviders` across your global, project, and `--config` layers.\n\n### The wrong file wins\n\nAt one user scope or project depth, the higher-priority provider shadows the others (native > claude > agents/codex > gemini > opencode > github > agents-md). To force deterministic behavior, move your guidance into `.omp/AGENTS.md` (native always wins) or disable the competing discovery provider.\n\n### User context disappeared\n\nOnly one user-level context file survives, and `~/.omp/agent/AGENTS.md` has the highest priority. If it exists, it shadows user-level `~/.claude/CLAUDE.md`, `~/.codex/AGENTS.md`, `~/.gemini/GEMINI.md`, `~/.config/opencode/AGENTS.md`, and `~/.agent`/`~/.agents` files. Consolidate user guidance into the native file or remove the native one if you prefer another tool's file.\n\n### A `RULES.md` file is ignored\n\nOnly the native `RULES.md` locations are sticky: `~/.omp/agent/RULES.md` and the nearest `<ancestor>/.omp/RULES.md` from cwd to the repo root. A `RULES.md` in any other directory is not a recognized convention and will not be loaded.\n\n### An `@` import did not expand\n\nConfirm the target exists relative to the importing file (not the cwd). Imports inside fenced code blocks or inline code spans are intentionally left literal, `git@`/email-looking tokens are never imported, cycles are skipped, expansion stops after five hops, and a missing target leaves the original `@path` text unchanged.\n",
|
|
17
|
+
"context-files.md": "# Context files\n\nContext files are Markdown instruction files that `omp` discovers automatically before a session starts and injects into the agent's project context. Use them for repository conventions, architecture notes, test and review expectations, and instructions that should travel with a user account or a project.\n\nYou never have to ask the agent to go read `AGENTS.md`, `CLAUDE.md`, `GEMINI.md`, or similar files — the relevant ones are already discovered, loaded, and placed in context when the session begins.\n\n## How context files relate to other concepts\n\nFour similarly named things behave differently. Keep them straight:\n\n- **Context files** are read as plain Markdown and shown to the agent inside a `<context>` block. They are advisory background that stays in the session's opening context.\n- **Sticky rules** come from a top-level `RULES.md`. They are converted into an always-apply rule that is re-attached near the current turn, so they keep their hold even after the visible conversation grows. See \"Sticky rules vs normal context\" below.\n- **Discovery providers** are the config-source adapters (`native`, `claude`, `codex`, `gemini`, `opencode`, `github`, `agents`, `agents-md`) that know where each tool keeps its files. The same provider that contributes context files may also contribute MCP servers, slash commands, skills, hooks, tools, prompts, and settings.\n- **Model providers** are inference backends such as `anthropic`, `openai`, `google`, `groq`, `ollama`, and `openrouter`. They have nothing to do with context files except that both kinds of id share the one `disabledProviders` list — see \"Disabling discovery providers\" below and [Providers](./providers.md).\n\nAuthoring **skills** and **rule** files (as opposed to the sticky `RULES.md`) is covered in [Skills](./skills.md). Customizing the system prompt with `SYSTEM.md` is covered in [System prompt customization](./system-prompt-customization.md).\n\n## Native `.omp` files\n\nThe native provider is the recommended format for new projects. It reads from your user agent directory and from `.omp/` directories inside a project, and it has the highest discovery priority, so its files win over every other convention at the same scope.\n\n| File | Scope | Behavior |\n|---|---|---|\n| `~/.omp/agent/AGENTS.md` | User | User-level context for every session unless the `native` provider is disabled. |\n| `<ancestor>/.omp/AGENTS.md` | Project | Project context. `omp` walks upward from the current directory to the repository root and uses the **nearest** non-empty `.omp/AGENTS.md`. Farther native project files are not also included. |\n| `~/.omp/agent/RULES.md` | User | User-level sticky rule content. Loaded as an always-apply rule, not as a context file. |\n| `<ancestor>/.omp/RULES.md` | Project | Project sticky rule content. Same nearest-ancestor walk-up as above. Loaded as an always-apply rule. |\n\nTwo details matter:\n\n- **Walk-up to the repository root.** Discovery starts in the current working directory and climbs through each ancestor up to the repository root, stopping at the first ancestor that has a usable `.omp/` directory. The *nearest* match wins; ancestors above it are not loaded as native context.\n- **The `.omp/` directory must be non-empty.** An empty `.omp/` directory is skipped during the walk-up, so the search continues to the next ancestor. An empty `AGENTS.md` or `RULES.md` file contributes nothing.\n\n`~/.omp/agent` is the user base. If `PI_CODING_AGENT_DIR` is set, it relocates that base, so the user files become `$PI_CODING_AGENT_DIR/AGENTS.md` and `$PI_CODING_AGENT_DIR/RULES.md`.\n\n### Monorepo example\n\n```text\nrepo/\n .omp/\n AGENTS.md\n RULES.md\n packages/api/\n .omp/\n AGENTS.md\n```\n\nStarting a session in `repo/packages/api`:\n\n- The native context file is `repo/packages/api/.omp/AGENTS.md` (the nearest one). `repo/.omp/AGENTS.md` is **not** also included.\n- The project sticky rule is `repo/packages/api/.omp/RULES.md` if present; otherwise the walk-up continues and `repo/.omp/RULES.md` is used.\n\nPut broad, durable project background in `AGENTS.md`. Reserve `RULES.md` for short, hard requirements that must stay visible across long conversations.\n\n## Other supported context conventions\n\n`omp` also discovers the context and rule files of other agent tools so existing projects keep working without migration.\n\n| Provider id | Convention path | Scope | Notes |\n|---|---|---|---|\n| `native` | `.omp/AGENTS.md` | User + project | Recommended `omp` format. User file at `~/.omp/agent/AGENTS.md`; project file is the nearest non-empty `.omp/AGENTS.md` walking up to the repo root. |\n| `claude` | `.claude/CLAUDE.md` | User + project | User file `~/.claude/CLAUDE.md`; project file `<cwd>/.claude/CLAUDE.md` only (no ancestor walk-up). |\n| `codex` | `.codex/AGENTS.md` | User | User file `~/.codex/AGENTS.md` only. Project-level Codex context comes from a standalone `AGENTS.md` via the `agents-md` provider, not from `<cwd>/.codex/AGENTS.md`. |\n| `gemini` | `.gemini/GEMINI.md` | User + project | User file `~/.gemini/GEMINI.md`; project file `<cwd>/.gemini/GEMINI.md` only (no ancestor walk-up). |\n| `opencode` | `.config/opencode/AGENTS.md` | User | User file `~/.config/opencode/AGENTS.md` only. |\n| `github` | `.github/copilot-instructions.md` | Project | Project-only GitHub Copilot instructions from `<cwd>/.github/copilot-instructions.md`. |\n| `agents` | `.agent/AGENTS.md`, `.agents/AGENTS.md` | User + project | User files from `~/.agent/` and `~/.agents/`; project files discovered while walking up from the current directory to the repository root. |\n| `agents-md` | `AGENTS.md` | Project | Standalone (non-config-directory) `AGENTS.md` files, discovered by walking up from the current directory to the repository root (or home when no repo root is known). Files whose parent directory name starts with `.` are ignored — those belong to a config-directory provider instead. |\n| `github` | `.github/instructions/**/*.instructions.md` | Project rules | GitHub Copilot / VS Code instruction files become rules. `applyTo: '*'` or `applyTo: '**'` is injected as always-apply context; other `applyTo` globs are listed in the rulebook with `description` and are readable as `rule://<name>`. |\n\nProviders marked \"(no ancestor walk-up)\" only look in the current working directory's config directory. If you need ancestor walk-up behavior, prefer the native `.omp/AGENTS.md` format or a standalone `AGENTS.md` (the `agents-md` provider), or launch `omp` from the directory that holds the config directory.\n\n## Load order and shadowing\n\nWhen two providers describe the *same* scope, the higher-priority provider wins. Provider priorities:\n\n| Priority | Provider id |\n|---:|---|\n| 100 | `native` |\n| 80 | `claude` |\n| 70 | `agents`, `codex` |\n| 60 | `gemini` |\n| 55 | `opencode` |\n| 30 | `github` |\n| 10 | `agents-md` |\n\nDiscovered files are then deduplicated by scope:\n\n- **One user context file** is kept across all providers. Because `native` has the highest priority, `~/.omp/agent/AGENTS.md` shadows every other user-level context file.\n- **One project context file per directory depth.** Depth is measured from the current directory: the cwd is depth 0, its parent depth 1, and so on. Config subdirectories of an ancestor (`.claude/`, `.github/`, `.gemini/`, …) count as the same depth as that ancestor.\n- **At the same depth, the higher-priority provider shadows the rest.**\n- **Across depths, multiple files survive.** In a monorepo, an ancestor `AGENTS.md` and a package-level one are different depths and both load.\n- **Byte-identical files are collapsed.** If two surviving files have exactly the same content, only the copy closest to the cwd is kept.\n\nAfter deduplication, project files are sorted so **farther ancestors appear first** and files **closer to the cwd appear last**. Later files sit nearer the end of the context block, where they are most prominent.\n\n### Worked shadowing example\n\n```text\nrepo/\n AGENTS.md\n packages/api/\n AGENTS.md\n .github/copilot-instructions.md\n```\n\nStarting in `repo/packages/api`:\n\n- `repo/AGENTS.md` is found by `agents-md` at depth 2 and kept.\n- `repo/packages/api/AGENTS.md` (`agents-md`, priority 10) and `repo/packages/api/.github/copilot-instructions.md` (`github`, priority 30) both resolve to depth 0. GitHub's higher priority shadows the package-level standalone `AGENTS.md`, so the Copilot file wins at that depth.\n- The two kept files are ordered root-first, package-last, so `packages/api`'s file is the more prominent one.\n- If you add `repo/packages/api/.omp/AGENTS.md`, `native` (priority 100) wins depth 0 outright, shadowing both lower-priority files.\n\n## Injection behavior\n\nDiscovered context files are injected into the opening project prompt as a single `<context>` block, one `<file>` element per surviving file, in the sort order above:\n\n```xml\n<context>\nYou MUST follow the context files below for all tasks:\n<file path=\"/abs/path/to/repo/AGENTS.md\">\n...root content...\n</file>\n<file path=\"/abs/path/to/repo/packages/api/.github/copilot-instructions.md\">\n...package content...\n</file>\n</context>\n```\n\nThe agent sees each file's absolute path and its fully expanded Markdown content (with `@` imports already resolved — see below). Loading is automatic — there is no need to instruct the agent to search for `AGENTS.md`, `CLAUDE.md`, `GEMINI.md`, `.cursorrules`, or similar files during a session.\n\nDeeper-directory `AGENTS.md` files that were *not* auto-loaded (for example, ones below the current directory) are surfaced separately in a `<dir-context>` block that lists their paths and tells the agent to read them before editing those directories. Those files are pointers, not full injected content.\n\n## `@` imports\n\nInside any context file, an `@path` token expands inline to the referenced file's content before injection:\n\n```markdown\n# Project notes\n\nRead @docs/architecture.md before changing storage code.\nShared release steps live in @../RELEASE.md and personal aliases in @~/.notes/aliases.md.\n```\n\nThe exact rules:\n\n- **Relative paths resolve from the importing file's own directory**, not the session's working directory.\n- **`~/` and `~`** resolve from the user's home directory; absolute paths are used as-is.\n- **Tokens inside fenced code blocks and inline code spans are left untouched** — useful when you want to *write about* an `@token` without expanding it.\n- **`git@github.com:org/repo.git` and `user@example.com`-style tokens are not treated as imports.** A token only counts when the `@` sits at the start of a line or after a space or tab.\n- **Trailing sentence punctuation is trimmed** off the path (`. , ; : ! ? ) ] } \" '`), so `@docs/setup.md.` imports `docs/setup.md`.\n- **Imports recurse up to five hops.** An imported file may itself contain `@` imports, up to a total depth of five.\n- **Cycles are skipped.** A file already pulled into the current expansion tree is not re-expanded, so mutual imports terminate cleanly.\n- **A missing or unreadable target leaves the original `@token` text in place** rather than erroring.\n\n## Sticky rules vs normal context\n\nUse a normal context file (`AGENTS.md`, `CLAUDE.md`, `GEMINI.md`, `.github/copilot-instructions.md`, …) for the bulk of your guidance: repository overview, code style, build and test commands, review expectations, and local conventions. These load into the opening `<context>` block.\n\nUse a top-level **`RULES.md`** for the handful of hard requirements that must stay active even after a long conversation has pushed the opening context far up the transcript:\n\n```markdown\n# ~/.omp/agent/RULES.md\n\nNever commit or push unless the user explicitly asks.\nDo not edit generated files.\n```\n\n`RULES.md` is special:\n\n- It is read **only** at the native locations — `~/.omp/agent/RULES.md` and the nearest `<ancestor>/.omp/RULES.md` from the cwd up to the repo root. A `RULES.md` anywhere else is not a context-file convention and is ignored.\n- It is loaded as an **always-apply rule**, not as a context file, so it is re-attached near the current turn and keeps its hold across long sessions.\n- It is **always sticky**: frontmatter cannot make it non-sticky. If you want conditional or opt-in behavior, write a normal rule file instead (see [Skills](./skills.md)).\n\nKeep `RULES.md` short. Long background belongs in `AGENTS.md`, where it costs context budget only once.\n\n## Disabling discovery providers\n\nTurn a provider off with the `disabledProviders` setting in `~/.omp/agent/config.yml`, a project's `.omp/config.yml`, or a `--config` overlay:\n\n```yaml\n# .omp/config.yml\ndisabledProviders:\n - claude\n - github\n```\n\n`disabledProviders` is a **whole-provider switch with one shared id namespace**, used by two unrelated subsystems:\n\n| Id kind | Examples | Effect when listed |\n|---|---|---|\n| Discovery provider ids | `native`, `claude`, `codex`, `gemini`, `opencode`, `github`, `agents`, `agents-md` | The entire config source is removed — not just its context files, but also any MCP servers, slash commands, skills, hooks, tools, prompts, and settings it would have contributed. |\n| Model provider ids | `anthropic`, `openai`, `google`, `groq`, `ollama`, `openrouter` | The model backend is removed from selection even when its credentials are present. See [Providers](./providers.md). |\n\nIds are exact and the two namespaces do not collide by accident: `google` disables the Google model backend, while `gemini` disables the Gemini CLI discovery files. Disabling a discovery provider is heavier than it looks — disabling `claude`, for instance, also drops Claude-discovered MCP servers, commands, skills, hooks, tools, and settings, not only `CLAUDE.md`.\n\nOnly `enabledModels` and `disabledProviders` support **path-scoped** entries, so you can vary provider availability per subtree:\n\n```yaml\ndisabledProviders:\n - github # disabled everywhere\n - path: ~/work/legacy-claude\n providers:\n - claude # disabled only under this directory\n```\n\nA scoped entry applies when the cwd equals the configured path or sits beneath it; `~` expands to home. Bare string entries apply everywhere.\n\nRemember that higher-precedence settings layers **replace** array settings rather than appending to them. If your global config disables `claude` but a project config sets `disabledProviders: [github]`, then inside that project Claude discovery is re-enabled and only GitHub is disabled. See [Settings](./settings.md) for the full layer precedence, merge rules, and path-scoped array details.\n\n## Troubleshooting\n\n### A file is not loaded\n\n- Native project context must live at `.omp/AGENTS.md`, and the `.omp/` directory must be non-empty; an empty `.omp/` is skipped and the walk-up continues to the next ancestor.\n- A standalone `AGENTS.md` is handled by `agents-md`, not `native`.\n- `.claude/CLAUDE.md`, `.gemini/GEMINI.md`, and `.github/copilot-instructions.md` are read only from the current working directory's config directory — not from every ancestor.\n- `~/.codex/AGENTS.md` and `~/.config/opencode/AGENTS.md` are user-level only and have no project equivalent.\n- Empty files contribute nothing for the native and standalone providers.\n- A disabled discovery provider contributes nothing — check `disabledProviders` across your global, project, and `--config` layers.\n\n### The wrong file wins\n\nAt one user scope or project depth, the higher-priority provider shadows the others (native > claude > agents/codex > gemini > opencode > github > agents-md). To force deterministic behavior, move your guidance into `.omp/AGENTS.md` (native always wins) or disable the competing discovery provider.\n\n### User context disappeared\n\nOnly one user-level context file survives, and `~/.omp/agent/AGENTS.md` has the highest priority. If it exists, it shadows user-level `~/.claude/CLAUDE.md`, `~/.codex/AGENTS.md`, `~/.gemini/GEMINI.md`, `~/.config/opencode/AGENTS.md`, and `~/.agent`/`~/.agents` files. Consolidate user guidance into the native file or remove the native one if you prefer another tool's file.\n\n### A `RULES.md` file is ignored\n\nOnly the native `RULES.md` locations are sticky: `~/.omp/agent/RULES.md` and the nearest `<ancestor>/.omp/RULES.md` from cwd to the repo root. A `RULES.md` in any other directory is not a recognized convention and will not be loaded.\n\n### An `@` import did not expand\n\nConfirm the target exists relative to the importing file (not the cwd). Imports inside fenced code blocks or inline code spans are intentionally left literal, `git@`/email-looking tokens are never imported, cycles are skipped, expansion stops after five hops, and a missing target leaves the original `@path` text unchanged.\n",
|
|
18
18
|
"custom-tools.md": "# Custom Tools\n\nCustom tools are model-callable functions that plug into the same tool execution pipeline as built-in tools.\n\nA custom tool is a TypeScript/JavaScript module that exports a factory. The factory receives a host API (`CustomToolAPI`) and returns one tool or an array of tools.\n\n## What this is (and is not)\n\n- **Custom tool**: callable by the model during a turn (`execute` + Zod parameter schema).\n- **Extension**: lifecycle/event framework that can register tools and intercept/modify events.\n- **Hook**: external pre/post command scripts.\n- **Skill**: static guidance/context package, not executable tool code.\n\nIf you need the model to call code directly, use a custom tool.\n\n## Integration paths in current code\n\nThere are two active integration styles:\n\n1. **SDK-provided custom tools** (`options.customTools`)\n - Wrapped into agent tools via `CustomToolAdapter` or extension wrappers.\n - Always included in the initial active tool set in SDK bootstrap.\n\n2. **Filesystem-discovered modules via loader API** (`discoverAndLoadCustomTools` / `loadCustomTools`)\n - Exposed as library APIs in `src/extensibility/custom-tools/loader.ts`.\n - Host code can call these to discover and load tool modules from config/provider/plugin paths.\n\n```text\nModel tool call flow\n\nLLM tool call\n │\n ▼\nTool registry (built-ins + custom tool adapters)\n │\n ▼\nCustomTool.execute(toolCallId, params, onUpdate, ctx, signal)\n │\n ├─ onUpdate(...) -> streamed partial result\n └─ return result -> final tool content/details\n```\n\n## Discovery locations (loader API)\n\n`discoverAndLoadCustomTools(configuredPaths, cwd, builtInToolNames)` merges:\n\n1. Capability providers (`toolCapability`), including:\n - Native OMP config (`~/.omp/agent/tools`, `.omp/tools`)\n - Claude config (`~/.claude/tools`, `.claude/tools`)\n - Codex config (`~/.codex/tools`, `.codex/tools`)\n - Claude marketplace plugin cache provider\n2. Installed plugin manifests (`~/.omp/plugins/node_modules/*` via plugin loader)\n3. Explicit configured paths passed to the loader\n\n### Important behavior\n\n- Duplicate resolved paths are deduplicated.\n- Tool name conflicts are rejected against built-ins and already-loaded custom tools.\n- `.md` and `.json` files are discovered as tool metadata by some providers, but the executable module loader rejects them as runnable tools.\n- Relative configured paths are resolved from `cwd`; `~` is expanded.\n\n## Module contract\n\nA custom tool module must export a function (default export preferred):\n\n```ts\nimport type { CustomToolFactory } from \"@oh-my-pi/pi-coding-agent\";\n\nconst factory: CustomToolFactory = (pi) => ({\n name: \"repo_stats\",\n label: \"Repo Stats\",\n description: \"Counts tracked TypeScript files\",\n parameters: pi.zod.object({\n glob: pi.zod.string().optional().default(\"**/*.ts\"),\n }),\n\n async execute(toolCallId, params, onUpdate, ctx, signal) {\n onUpdate?.({\n content: [{ type: \"text\", text: \"Scanning files...\" }],\n details: { phase: \"scan\" },\n });\n\n const result = await pi.exec(\n \"git\",\n [\"ls-files\", params.glob ?? \"**/*.ts\"],\n { signal, cwd: pi.cwd },\n );\n if (result.killed) {\n throw new Error(\"Scan was cancelled\");\n }\n if (result.code !== 0) {\n throw new Error(result.stderr || \"git ls-files failed\");\n }\n\n const files = result.stdout.split(\"\\n\").filter(Boolean);\n return {\n content: [{ type: \"text\", text: `Found ${files.length} files` }],\n details: { count: files.length, sample: files.slice(0, 10) },\n };\n },\n\n onSession(event) {\n if (event.reason === \"shutdown\") {\n // cleanup resources if needed\n }\n },\n});\n\nexport default factory;\n```\n\nSchemas are authored with Zod (`pi.zod`) and flow through the shared validation/wire pipeline.\n\nFactory return type:\n\n- `CustomTool`\n- `CustomTool[]`\n- `Promise<CustomTool | CustomTool[]>`\n\n## API surface passed to factories (`CustomToolAPI`)\n\nFrom `types.ts` and `loader.ts`:\n\n- `cwd`: host working directory\n- `exec(command, args, options?)`: process execution helper\n- `ui`: UI context (can be no-op in headless modes)\n- `hasUI`: `false` in non-interactive flows\n- `logger`: shared file logger\n- `typebox`: zod-backed compatibility shim for legacy TypeBox-style schemas\n- `zod`: injected `zod/v4` module (canonical for new schemas)\n- `pi`: injected `@oh-my-pi/pi-coding-agent` exports\n- `pushPendingAction(action)`: register a preview action for hidden `resolve` tool (`docs/resolve-tool-runtime.md`)\n Loader starts with a no-op UI context and requires host code to call `setUIContext(...)` when real UI is ready.\n\n## Execution contract and typing\n\n`CustomTool.execute` signature:\n\n```ts\nexecute(toolCallId, params, onUpdate, ctx, signal);\n```\n\n- `params` is statically typed from your Zod/TypeBox schema via `Static<TParams>`.\n- Runtime argument validation happens before execution in the agent loop.\n- `onUpdate` emits partial results for UI streaming.\n- `ctx` includes `sessionManager`, `modelRegistry`, current `model`, `isIdle()`, `hasQueuedMessages()`, `abort()`, and optional `settings` / `autoApprove`.\n- `signal` carries cancellation.\n\n`CustomToolAdapter` bridges this to the agent tool interface and forwards calls in the correct argument order.\n\nTool definitions may also declare `strict`, `hidden`, `deferrable`, `mcpServerName`, `mcpToolName`, `approval`, and `formatApprovalDetails`.\n\n## How tools are exposed to the model\n\n- Tools are wrapped into `AgentTool` instances (`CustomToolAdapter` or extension wrappers).\n- They are inserted into the session tool registry by name.\n- In SDK bootstrap, custom and extension-registered tools are force-included in the initial active set.\n- CLI `--tools` currently validates only built-in tool names; custom tool inclusion is handled through discovery/registration paths and SDK options.\n\n## Rendering hooks\n\nOptional rendering hooks:\n\n- `renderCall(args, options, theme)`\n- `renderResult(result, options, theme, args?)`\n\nRuntime behavior in TUI:\n\n- If hooks exist, tool output is rendered inside a `Box` container.\n- `renderResult` receives `{ expanded, isPartial, spinnerFrame? }`.\n- Renderer errors are caught and logged; UI falls back to default text rendering.\n\n## Session/state handling\n\nOptional `onSession(event, ctx)` receives session lifecycle events, including:\n\n- `start`, `switch`, `branch`, `tree`, `shutdown`\n- `auto_compaction_start`, `auto_compaction_end`\n- `auto_retry_start`, `auto_retry_end`\n- `ttsr_triggered`, `todo_reminder`\n\nUse `ctx.sessionManager` to reconstruct state from history when branch/session context changes.\n\n## Failures and cancellation semantics\n\n### Synchronous/async failures\n\n- Throwing (or rejected promises) in `execute` is treated as tool failure.\n- Agent runtime converts failures into tool result messages with `isError: true` and error text content.\n- With extension wrappers, `tool_result` handlers can further rewrite content/details and even override error status.\n\n### Cancellation\n\n- Agent abort propagates through `AbortSignal` to `execute`.\n- Forward `signal` to subprocess work (`pi.exec(..., { signal })`) for cooperative cancellation.\n- `ctx.abort()` lets a tool request abort of the current agent operation.\n\n### onSession errors\n\n- `onSession` errors are caught and logged as warnings; they do not crash the session.\n\n## Real constraints to design for\n\n- Tool names must be globally unique in the active registry.\n- Prefer deterministic, schema-shaped outputs in `details` for renderer/state reconstruction.\n- Guard UI usage with `pi.hasUI`.\n- Treat `.md`/`.json` in tool directories as metadata, not executable modules.\n",
|
|
19
|
-
"environment-variables.md": "# Environment Variables (Current Runtime Reference)\n\nThis reference is derived from current code paths in:\n\n- `packages/coding-agent/src/**`\n- `packages/ai/src/**` (provider/auth resolution used by coding-agent)\n- `packages/utils/src/**` and `packages/tui/src/**` where those vars directly affect coding-agent runtime\n\nIt documents only active behavior.\n\n## Resolution model and precedence\n\nMost runtime lookups use `$env` from `@oh-my-pi/pi-utils` (`packages/utils/src/env.ts`).\n\n`$env` loading order:\n\n1. Existing process environment (`Bun.env`)\n2. Project `.env` (`$PWD/.env`) for keys not already set\n3. Agent `.env` (`~/.omp/agent/.env`, respecting `PI_CONFIG_DIR` / `PI_CODING_AGENT_DIR`) for keys not already set\n4. Config-root `.env` (`~/.omp/.env`, respecting `PI_CONFIG_DIR`) for keys not already set\n5. Home `.env` (`~/.env`) for keys not already set\n\nAdditional rule inside each `.env` file: `OMP_*` keys are mirrored to `PI_*` keys in that parsed file.\n\n---\n\n## 1) Model/provider authentication\n\nThese are consumed via `getEnvApiKey()` (`packages/ai/src/stream.ts`) unless noted otherwise.\n\n### Core provider credentials\n\n| Variable | Used for | Required when | Notes / precedence |\n| ------------------------------- | ------------------------------------------------ | -------------------------------------------------------------- | --------------------------------------------------------------------------------------------------- |\n| `ANTHROPIC_OAUTH_TOKEN` | Anthropic API auth | Using Anthropic with OAuth token auth | Takes precedence over `ANTHROPIC_API_KEY` for provider auth resolution |\n| `ANTHROPIC_API_KEY` | Anthropic API auth | Using Anthropic without OAuth token | Fallback after `ANTHROPIC_OAUTH_TOKEN` |\n| `ANTHROPIC_FOUNDRY_API_KEY` | Anthropic via Azure Foundry / enterprise gateway | `CLAUDE_CODE_USE_FOUNDRY` enabled | Takes precedence over `ANTHROPIC_OAUTH_TOKEN` and `ANTHROPIC_API_KEY` when Foundry mode is enabled |\n| `OPENAI_API_KEY` | OpenAI auth | Using OpenAI-family providers without explicit apiKey argument | Used by OpenAI Completions/Responses providers |\n| `GEMINI_API_KEY` | Google Gemini auth | Using `google` provider models | Primary key for Gemini provider mapping |\n| `GOOGLE_API_KEY` | Gemini image tool auth fallback | Using `gemini_image` tool without `GEMINI_API_KEY` | Used by coding-agent image tool fallback path |\n| `GROQ_API_KEY` | Groq auth | Using Groq models | |\n| `CEREBRAS_API_KEY` | Cerebras auth | Using Cerebras models | |\n| `FIREWORKS_API_KEY` | Fireworks auth | Using Fireworks models | |\n| `FIREPASS_API_KEY` | Fire Pass auth | Using Fire Pass models | |\n| `TOGETHER_API_KEY` | Together auth | Using `together` provider | |\n| `AIMLAPI_API_KEY` | AIML API auth | Using `aimlapi` provider | OpenAI-compatible AIML API endpoint at `https://api.aimlapi.com/v1` |\n| `HUGGINGFACE_HUB_TOKEN` | Hugging Face auth | Using `huggingface` provider | Primary Hugging Face token env var |\n| `HF_TOKEN` | Hugging Face auth | Using `huggingface` provider | Fallback when `HUGGINGFACE_HUB_TOKEN` is unset |\n| `SYNTHETIC_API_KEY` | Synthetic auth | Using Synthetic models | |\n| `NVIDIA_API_KEY` | NVIDIA auth | Using `nvidia` provider | |\n| `NANO_GPT_API_KEY` | NanoGPT auth | Using `nanogpt` provider | |\n| `VENICE_API_KEY` | Venice auth | Using `venice` provider | |\n| `LITELLM_API_KEY` | LiteLLM auth | Using `litellm` provider | OpenAI-compatible LiteLLM proxy key |\n| `LM_STUDIO_API_KEY` | LM Studio auth (optional) | Using `lm-studio` provider with authenticated hosts | Local LM Studio usually runs without auth; any non-empty token works when a key is required |\n| `OLLAMA_API_KEY` | Ollama auth (optional) | Using `ollama` provider with authenticated hosts | Local Ollama usually runs without auth; any non-empty token works when a key is required |\n| `LLAMA_CPP_API_KEY` | llama.cpp auth (optional) | Using `llama.cpp` provider with authenticated hosts | Local llama.cpp usually runs without auth; any non-empty token works when a key is configured |\n| `XIAOMI_API_KEY` | Xiaomi MiMo auth | Using `xiaomi` provider | |\n| `MOONSHOT_API_KEY` | Moonshot auth | Using `moonshot` provider | |\n| `XAI_API_KEY` | xAI auth | Using xAI models or as fallback for `xai-oauth` | |\n| `XAI_OAUTH_TOKEN` | xAI OAuth/SuperGrok auth | Using `xai-oauth` provider | Takes precedence over `XAI_API_KEY` for `xai-oauth` |\n| `OPENROUTER_API_KEY` | OpenRouter auth | Using OpenRouter models | Also used by image tool when preferred/auto provider is OpenRouter |\n| `MISTRAL_API_KEY` | Mistral auth | Using Mistral models | |\n| `ZAI_API_KEY` | z.ai auth | Using z.ai models | Also used by z.ai web search provider |\n| `ZHIPU_API_KEY` | Zhipu Coding Plan auth | Using `zhipu-coding-plan` provider | |\n| `MINIMAX_API_KEY` | MiniMax auth | Using `minimax` provider | |\n| `MINIMAX_CODE_API_KEY` | MiniMax Code auth | Using `minimax-code` provider | |\n| `MINIMAX_CODE_CN_API_KEY` | MiniMax Code CN auth | Using `minimax-code-cn` provider | |\n| `OPENCODE_API_KEY` | OpenCode auth | Using `opencode-go` / `opencode-zen` models | |\n| `QIANFAN_API_KEY` | Qianfan auth | Using `qianfan` provider | |\n| `QWEN_OAUTH_TOKEN` | Qwen Portal auth | Using `qwen-portal` with OAuth token | Takes precedence over `QWEN_PORTAL_API_KEY` |\n| `QWEN_PORTAL_API_KEY` | Qwen Portal auth | Using `qwen-portal` with API key | Fallback after `QWEN_OAUTH_TOKEN` |\n| `ZENMUX_API_KEY` | ZenMux auth | Using `zenmux` provider | Used for ZenMux OpenAI and Anthropic-compatible routes |\n| `VLLM_API_KEY` | vLLM auth/discovery opt-in | Using `vllm` provider (local OpenAI-compatible servers) | Any non-empty value works for no-auth local servers |\n| `CURSOR_ACCESS_TOKEN` | Cursor provider auth | Using Cursor provider | |\n| `AI_GATEWAY_API_KEY` | Vercel AI Gateway auth | Using `vercel-ai-gateway` provider | |\n| `CLOUDFLARE_AI_GATEWAY_API_KEY` | Cloudflare AI Gateway auth | Using `cloudflare-ai-gateway` provider | Base URL must be configured as `https://gateway.ai.cloudflare.com/v1/<account>/<gateway>/anthropic` |\n| `ALIBABA_CODING_PLAN_API_KEY` | Alibaba Coding Plan auth | Using `alibaba-coding-plan` provider | |\n| `DEEPSEEK_API_KEY` | DeepSeek auth | Using DeepSeek models | |\n| `KILO_API_KEY` | Kilo auth | Using Kilo models | |\n| `OLLAMA_CLOUD_API_KEY` | Ollama Cloud auth | Using `ollama-cloud` provider | |\n| `WAFER_PASS_API_KEY` | Wafer Pass auth | Using `wafer-pass` provider | Flat-rate Wafer subscription; validated against `https://pass.wafer.ai/v1/models` |\n| `WAFER_SERVERLESS_API_KEY` | Wafer Serverless auth | Using `wafer-serverless` provider | Pay-as-you-go Wafer SKU; validated against `https://pass.wafer.ai/v1/models` |\n| `GITLAB_TOKEN` | GitLab Duo auth | Using `gitlab-duo` provider | |\n\n### GitHub/Copilot tokens\n\n| Variable | Used for | Notes |\n| ---------------------- | ------------------------------------------------ | ------------------------------------------ |\n| `COPILOT_GITHUB_TOKEN` | GitHub Copilot provider auth | Generic GitHub tokens are not used here |\n| `GH_TOKEN` | GitHub API auth in web scraper | Web scraper fallback after `GITHUB_TOKEN` |\n| `GITHUB_TOKEN` | GitHub API auth in web scraper | Web scraper checks this before `GH_TOKEN` |\n\n### Auth broker / auth gateway (remote credential vault)\n\nWhen the broker is enabled, the local SQLite credential store is bypassed and all OAuth refresh / access tokens live on the broker host. See [`auth-broker-gateway.md`](./auth-broker-gateway.md) for the full protocol, CLI surface, and 5-min/15-s usage cache layering.\n\n| Variable | Used for | Required when | Notes / precedence |\n| ----------------------- | -------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |\n| `OMP_AUTH_BROKER_URL` | Base URL of the remote auth-broker (e.g. `https://broker.tailnet:8765`); selects broker mode | Resolving credentials through a broker; also required by `omp auth-gateway serve` (the gateway is itself a broker client) | Wins over `auth.broker.url` in `config.yml`. When set with no resolvable token, `resolveAuthBrokerConfig()` hard-errors instead of falling back to local SQLite. |\n| `OMP_AUTH_BROKER_TOKEN` | Bearer token sent on every broker endpoint except `/v1/healthz` | `OMP_AUTH_BROKER_URL` is set and no token is available from `auth.broker.token` or `<config-dir>/auth-broker.token` | Resolution: this env → `auth.broker.token` (`$ENV_NAME` indirection supported) → `<config-dir>/auth-broker.token` (mode `0600`). `<config-dir>` is `~/.omp/` (respecting `PI_CONFIG_DIR`). |\n| `OMP_AUTH_BROKER_SNAPSHOT_TTL_MS` | Freshness window for the encrypted local broker snapshot cache | Optional in broker mode | Default `3600000` (1 h). Freshness is based on broker `snapshot.generatedAt`; `0` disables cache reads/writes and forces the old blocking fetch every startup. |\n| `OMP_AUTH_BROKER_SNAPSHOT_CACHE` | Path to the encrypted local broker snapshot cache | Optional in broker mode | Defaults to `~/.omp/cache/auth-broker-snapshot.enc` (or XDG cache equivalent). Useful for tests, ephemeral hosts, or relocating the `0600` cache file. |\n\nThe gateway has no dedicated env vars — it inherits `OMP_AUTH_BROKER_*`. Its own inbound bearer token lives at `<config-dir>/auth-gateway.token` and is managed via `omp auth-gateway token`.\n\n---\n\n## 2) Provider-specific runtime configuration\n\n### Anthropic Foundry Gateway (Azure / enterprise proxy)\n\nWhen `CLAUDE_CODE_USE_FOUNDRY` is enabled, Anthropic requests switch to Foundry mode:\n\n- Base URL resolves from `FOUNDRY_BASE_URL` (fallback remains model/default base URL if unset).\n- API key resolution for provider `anthropic` becomes:\n `ANTHROPIC_FOUNDRY_API_KEY` → `ANTHROPIC_OAUTH_TOKEN` → `ANTHROPIC_API_KEY`.\n- `ANTHROPIC_CUSTOM_HEADERS` is parsed as comma/newline-separated `key: value`\n pairs and merged into request headers. They are also forwarded when\n `ANTHROPIC_BASE_URL` points to a non-Anthropic host (e.g. a corporate API\n gateway), so enterprise gateways requiring proprietary auth headers work\n without enabling Foundry mode.\n- TLS client/server material can be injected from env values:\n `NODE_EXTRA_CA_CERTS`, `CLAUDE_CODE_CLIENT_CERT`, `CLAUDE_CODE_CLIENT_KEY`.\n Each accepts either:\n - a filesystem path to PEM content, or\n - inline PEM (including escaped `\\n` sequences).\n\n| Variable | Value type | Behavior |\n| --------------------------- | ---------------------------------------------- | ----------------------------------------------------------------------------- |\n| `CLAUDE_CODE_USE_FOUNDRY` | Boolean-like string (`1`, `true`, `yes`, `on`) | Enables Foundry mode for Anthropic provider |\n| `FOUNDRY_BASE_URL` | URL string | Anthropic endpoint base URL in Foundry mode |\n| `ANTHROPIC_FOUNDRY_API_KEY` | Token string | Used for `Authorization: Bearer <token>` |\n| `ANTHROPIC_CUSTOM_HEADERS` | Header list string | Extra headers; format `header-a: value, header-b: value` or newline-separated. Also forwarded outside Foundry whenever `ANTHROPIC_BASE_URL` is non-Anthropic. |\n| `NODE_EXTRA_CA_CERTS` | PEM path or inline PEM | Extra CA chain for server certificate validation |\n| `CLAUDE_CODE_CLIENT_CERT` | PEM path or inline PEM | mTLS client certificate |\n| `CLAUDE_CODE_CLIENT_KEY` | PEM path or inline PEM | mTLS client private key (must be paired with cert) |\n\n### Amazon Bedrock\n\n| Variable | Default / behavior |\n| ------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------- |\n| `AWS_REGION` | Primary region source |\n| `AWS_DEFAULT_REGION` | Fallback if `AWS_REGION` unset |\n| `AWS_PROFILE` | Enables named profile auth path |\n| `AWS_ACCESS_KEY_ID` + `AWS_SECRET_ACCESS_KEY` | Enables IAM key auth path |\n| `AWS_BEARER_TOKEN_BEDROCK` | Highest-precedence bearer token auth path; skips AWS profile/credential-chain lookup when set |\n| `AWS_CONTAINER_CREDENTIALS_RELATIVE_URI` / `AWS_CONTAINER_CREDENTIALS_FULL_URI` | Marks Bedrock as available in provider detection (credential resolution itself covers env keys, profiles/SSO/`credential_process`, then IMDSv2) |\n| `AWS_WEB_IDENTITY_TOKEN_FILE` + `AWS_ROLE_ARN` | Marks Bedrock as available in provider detection (same caveat as the ECS variables above) |\n| `AWS_BEDROCK_SKIP_AUTH` | If `1`, injects dummy credentials (proxy/non-auth scenarios) |\n| `HTTPS_PROXY` / `HTTP_PROXY` | Honored via Bun's native fetch proxy support (the provider no longer ships an AWS SDK / proxy-agent transport) |\n| `NO_PROXY` | Excludes matching hosts from Bun's native proxy routing |\n\nRegion fallback in provider code: `options.region` → `AWS_REGION` → `AWS_DEFAULT_REGION` → `us-east-1`.\n\n### Azure OpenAI Responses\n\n| Variable | Default / behavior |\n| ---------------------------------- | --------------------------------------------------------------------------- |\n| `AZURE_OPENAI_API_KEY` | Required unless API key passed as option |\n| `AZURE_OPENAI_API_VERSION` | Default `v1` |\n| `AZURE_OPENAI_BASE_URL` | Direct base URL override |\n| `AZURE_OPENAI_RESOURCE_NAME` | Used to construct base URL: `https://<resource>.openai.azure.com/openai/v1` |\n| `AZURE_OPENAI_DEPLOYMENT_NAME_MAP` | Optional mapping string: `modelId=deploymentName,model2=deployment2` |\n\nBase URL resolution: option `azureBaseUrl` → env `AZURE_OPENAI_BASE_URL` → option/env resource name → `model.baseUrl`.\n\n### Google Vertex AI\n\n| Variable | Required? | Notes |\n| -------------------------------- | ------------------------------ | ------------------------------------------------------------------------------------------------------------------------- |\n| `GOOGLE_CLOUD_PROJECT` | Yes (unless passed in options) | Primary project ID source |\n| `GCP_PROJECT` | Fallback | Alternate project ID source |\n| `GCLOUD_PROJECT` | Fallback | Alternate project ID source |\n| `GOOGLE_CLOUD_PROJECT_ID` | OAuth login helper only | Used by Gemini CLI OAuth project discovery |\n| `GOOGLE_VERTEX_LOCATION` | Yes (unless passed in options) | Primary Vertex location source |\n| `GOOGLE_CLOUD_LOCATION` | Fallback | Alternate Vertex location source |\n| `VERTEX_LOCATION` | Fallback | Alternate Vertex location source |\n| `GOOGLE_CLOUD_API_KEY` | Conditional | Direct Vertex API-key auth; otherwise ADC fallback can authenticate when project and location are set |\n| `GOOGLE_APPLICATION_CREDENTIALS` | Conditional | If set, file must exist; otherwise ADC fallback path is checked (`~/.config/gcloud/application_default_credentials.json`) |\n\n### Kimi\n\n| Variable | Default / behavior |\n| ---------------------- | -------------------------------------------------------- |\n| `KIMI_CODE_OAUTH_HOST` | Primary OAuth host override |\n| `KIMI_OAUTH_HOST` | Fallback OAuth host override |\n| `KIMI_CODE_BASE_URL` | Overrides Kimi usage endpoint base URL (`usage/kimi.ts`) |\n\nOAuth host chain: `KIMI_CODE_OAUTH_HOST` → `KIMI_OAUTH_HOST` → `https://auth.kimi.com`.\n\n### Gemini CLI compatibility\n\n| Variable | Default / behavior |\n| -------------------------- | --------------------------------------------------------------- |\n| `PI_AI_GEMINI_CLI_VERSION` | Overrides Gemini CLI user-agent version tag (`0.35.3` if unset) |\n\n### OpenAI Codex responses (feature/debug controls)\n\n| Variable | Behavior |\n| ------------------------------------------ | ---------------------------------------------------- |\n| `PI_CODEX_DEBUG` | `1`/`true` enables Codex provider debug logging |\n| `PI_CODEX_WEBSOCKET` | `1`/`true` enables websocket transport preference |\n| `PI_OPENAI_STATEFUL` | Overrides the stateful-chaining default for the platform OpenAI Responses API (`previous_response_id`, forces `store: true`): on by default against api.openai.com, off elsewhere |\n| `PI_CODEX_WEBSOCKET_IDLE_TIMEOUT_MS` | Positive integer override (default 300000) |\n| `PI_CODEX_WEBSOCKET_RETRY_BUDGET` | Non-negative integer override (default 5) |\n| `PI_CODEX_WEBSOCKET_RETRY_DELAY_MS` | Positive integer base backoff override (default 500) |\n| `PI_OPENAI_STREAM_FIRST_EVENT_TIMEOUT_MS` | Positive integer OpenAI first-event timeout override |\n| `PI_OPENAI_STREAM_IDLE_TIMEOUT_MS` | Positive integer OpenAI stream idle timeout override |\n\n### Cursor provider debug\n\n| Variable | Behavior |\n| ------------------ | ------------------------------------------------------------------------ |\n| `DEBUG_CURSOR` | Enables provider debug logs; `2`/`verbose` for detailed payload snippets |\n| `DEBUG_CURSOR_LOG` | Optional file path for JSONL debug log output |\n\n### Prompt cache compatibility switch\n\n| Variable | Behavior |\n| -------------------- | ----------------------------------------------------------------------------------------------------------------- |\n| `PI_CACHE_RETENTION` | If `long`, enables long retention where supported (`anthropic`, `openai-responses`, Bedrock retention resolution) |\n\n---\n\n## 3) Web search subsystem\n\n### Search provider credentials\n\n| Variable | Used by |\n| --------------------------------------------------- | ------------------------------------------------------------- |\n| `EXA_API_KEY` | Exa search provider and Exa MCP tools |\n| `BRAVE_API_KEY` | Brave search provider |\n| `PERPLEXITY_API_KEY` | Perplexity search provider API-key mode |\n| `PERPLEXITY_COOKIES` | Perplexity cookie-auth search mode |\n| `TAVILY_API_KEY` | Tavily search provider |\n| `ZAI_API_KEY` | z.ai search provider (also checks stored OAuth in `agent.db`) |\n| `OPENAI_API_KEY` / Codex OAuth in DB | Codex search provider availability/auth |\n| `PI_CODEX_WEB_SEARCH_MODEL` | Codex search provider model override |\n| `MOONSHOT_SEARCH_API_KEY` / `KIMI_SEARCH_API_KEY` | Kimi/Moonshot search provider env auth |\n| `MOONSHOT_SEARCH_BASE_URL` / `KIMI_SEARCH_BASE_URL` | Kimi/Moonshot search endpoint override |\n| `KAGI_API_KEY` | Kagi search provider |\n| `JINA_API_KEY` | Jina search provider |\n| `PARALLEL_API_KEY` | Parallel search provider |\n| `SEARXNG_ENDPOINT`, `SEARXNG_TOKEN` | SearXNG endpoint and optional bearer token |\n| `SEARXNG_BASIC_USERNAME`, `SEARXNG_BASIC_PASSWORD` | SearXNG HTTP Basic Auth credentials |\n\nSearXNG also reads the equivalent `searxng.endpoint`, `searxng.token`, `searxng.basicUsername`, and `searxng.basicPassword` settings from `~/.omp/agent/config.yml`; environment variables are fallbacks.\n\n### Anthropic web search auth chain\n\n`searchAnthropic()` resolves credentials in this order:\n\n1. `ANTHROPIC_SEARCH_API_KEY`\n2. `authStorage.getApiKey(\"anthropic\")` fallback credentials (runtime/config overrides, stored API-key credentials, stored OAuth credentials, then generic Anthropic env fallback: `ANTHROPIC_FOUNDRY_API_KEY` in Foundry mode, otherwise `ANTHROPIC_OAUTH_TOKEN` / `ANTHROPIC_API_KEY`)\n\nFor either credential path, base URL resolution is:\n\n1. `ANTHROPIC_SEARCH_BASE_URL`\n2. `FOUNDRY_BASE_URL` when `CLAUDE_CODE_USE_FOUNDRY` is enabled\n3. `ANTHROPIC_BASE_URL`\n4. `https://api.anthropic.com`\n\nRelated vars:\n\n| Variable | Default / behavior |\n| --------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| `ANTHROPIC_SEARCH_API_KEY` | API key used exclusively for the Anthropic web search provider. Highest-priority search auth; overrides `ANTHROPIC_API_KEY` / OAuth / Foundry for search calls without affecting chat completions. |\n| `ANTHROPIC_SEARCH_BASE_URL` | Base URL used exclusively for the Anthropic web search provider. Applied to either `ANTHROPIC_SEARCH_API_KEY` or fallback Anthropic credentials; overrides `ANTHROPIC_BASE_URL` (and `FOUNDRY_BASE_URL` in Foundry mode) for search calls. |\n| `ANTHROPIC_SEARCH_MODEL` | Search model override. Defaults to `claude-haiku-4-5`. |\n| `ANTHROPIC_BASE_URL` | Generic fallback base URL for Anthropic requests when no search-specific base URL is set. |\n\nUse `ANTHROPIC_SEARCH_BASE_URL` (optionally with `ANTHROPIC_SEARCH_API_KEY`) to keep chat routed through an enterprise gateway (`ANTHROPIC_BASE_URL` or `CLAUDE_CODE_USE_FOUNDRY=true`) while pointing web search at a direct Anthropic endpoint, or vice versa.\n\n### Perplexity OAuth flow behavior flag\n\n| Variable | Behavior |\n| ------------------- | ------------------------------------------------------------------------------- |\n| `PI_AUTH_NO_BORROW` | If set, disables macOS native-app token borrowing path in Perplexity login flow |\n\n---\n\n## 4) Python tooling and kernel runtime\n\n| Variable | Default / behavior |\n| ----------------------- | ------------------------------------------------------------------------------------------------------------------- |\n| `PI_PY` | Boolean-like override for the Python eval backend: truthy (`1`/`true`/`yes`/`on`) enables, any other value disables; unset defers to the `eval.py` setting (default enabled) |\n| `PI_JS` | Same boolean-like override for the JavaScript eval backend; unset defers to the `eval.js` setting (default enabled) |\n| `PI_PYTHON_SKIP_CHECK` | If `1`, skips Python interpreter availability checks (subprocess runner still starts on demand) |\n| `PI_PYTHON_INTEGRATION` | If `1`, opts gated integration tests in (e.g. `python-runner.integration.test.ts`) into running against real Python |\n| `PI_PYTHON_IPC_TRACE` | If `1`, logs NDJSON frames exchanged with the Python runner subprocess |\n| `VIRTUAL_ENV` | Highest-priority venv path for Python runtime resolution |\n\nExtra conditional behavior:\n\n- If `BUN_ENV=test` or `NODE_ENV=test`, Python availability checks are treated as OK and warming is skipped.\n- Python env filtering denies common API keys and allows safe base vars + `LC_`, `XDG_`, `PI_` prefixes.\n\n---\n\n## 5) Agent/runtime behavior toggles\n\n| Variable | Default / behavior |\n| ---------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| `PI_SMOL_MODEL` | Ephemeral model-role override for `smol` (CLI `--smol` takes precedence) |\n| `PI_SLOW_MODEL` | Ephemeral model-role override for `slow` (CLI `--slow` takes precedence) |\n| `PI_PLAN_MODEL` | Ephemeral model-role override for `plan` (CLI `--plan` takes precedence) |\n| `PI_NO_TITLE` | If set (any non-empty value), disables auto session title generation on first user message |\n| `PI_TINY_DEVICE` | ONNX execution provider for local tiny models; overrides the `providers.tinyModelDevice` setting (default: CPU; supports `cpu`, `gpu`, `metal`/`webgpu`, `auto`, `cuda`, `dml`, `coreml`, `wasm`, `webnn`, `webnn-gpu`, `webnn-cpu`, `webnn-npu`) |\n| `PI_TINY_DTYPE` | ONNX quantization/precision for local tiny models; overrides the `providers.tinyModelDtype` setting (default: each model's shipped dtype, currently `q4`; supports `auto`, `fp32`, `fp16`, `q8`, `int8`, `uint8`, `q4`, `bnb4`, `q4f16`, `q2`, `q2f16`, `q1`, `q1f16`) |\n| `PI_NO_INTERLEAVED_THINKING` | If `1`, disables Anthropic interleaved thinking budget behavior and uses output-token inflation for older thinking mode |\n| `NULL_PROMPT` | If `true`, system prompt builder returns empty string |\n| `PI_BLOCKED_AGENT` | Blocks a specific subagent type in task tool |\n| `PI_SUBPROCESS_CMD` | Overrides subagent spawn command (`omp` / `omp.cmd` resolution bypass) |\n| `PI_TASK_MAX_OUTPUT_BYTES` | Max captured output bytes per subagent (default `500000`) |\n| `PI_TASK_MAX_OUTPUT_LINES` | Max captured output lines per subagent (default `5000`) |\n| `PI_TIMING` | If set (any non-empty value), prints a hierarchical timing-span tree to **stderr** via `logger.printTimings()`. In interactive mode the tree prints once the agent is ready (before the TUI starts); in print mode it prints after the whole prompt batch completes. Print-mode prompts are wrapped in `print:prompt:initial` / `print:prompt:next` spans so each user message shows up as its own row. `PI_TIMING=x` exits the process with code 0 right after printing in interactive mode (use to measure cold startup only). `PI_TIMING=full` lists every module-load entry instead of just the top N. |\n| `PI_DEBUG_STARTUP` | If set (any non-empty value), streams one synchronous `[startup] <phase>:start` / `:done` marker line to **stderr** as each startup phase begins/ends — including command-module imports (`cli:load:<name>`) and the native addon extraction/`dlopen` (`native:*`). Unlike `PI_TIMING` (which prints only once startup completes), the markers survive a hard hang: the last line on stderr names the phase the process is stuck in. Combine with `PI_TIMING` freely; markers and the span tree share the same phase names. |\n| `PI_PACKAGE_DIR` | Overrides package asset base dir resolution (`docs/`, `examples/`, `CHANGELOG.md`) |\n| `PI_DISABLE_LSPMUX` | If `1`, disables lspmux detection/integration and forces direct LSP server spawning |\n| `PI_RPC_EMIT_TITLE` | Boolean-like flag enabling title events in RPC mode |\n| `SMITHERY_URL` | Smithery web URL override (default `https://smithery.ai`) |\n| `SMITHERY_API_URL` | Smithery API base URL override (default `https://api.smithery.ai`) |\n| `SMITHERY_API_KEY` | Smithery API key for managed MCP auth lookup |\n| `PUPPETEER_EXECUTABLE_PATH` | Browser tool Chromium executable override |\n| `LM_STUDIO_BASE_URL` | Default implicit LM Studio discovery base URL override (`http://127.0.0.1:1234/v1` if unset) |\n| `OLLAMA_BASE_URL` | Default implicit Ollama discovery base URL override (`OLLAMA_HOST` if unset, then `http://127.0.0.1:11434`) |\n| `OLLAMA_HOST` | Ollama host used for implicit Ollama discovery when `OLLAMA_BASE_URL` is unset; accepts Ollama-style values such as `127.0.0.1:11434` or `http://host:11434` |\n| `OLLAMA_CONTEXT_LENGTH` | Positive integer context-window override for implicit Ollama discovery; affects OMP context budgeting only and does not change Ollama's runtime `num_ctx` |\n| `LLAMA_CPP_BASE_URL` | Default implicit Llama.cpp discovery base URL override (`http://127.0.0.1:8080` if unset) |\n| `PI_EDIT_VARIANT` | Forces edit tool variant when valid (`patch`, `replace`, `hashline`, `apply_patch`) |\n| `PI_FORCE_IMAGE_PROTOCOL` | Forces supported image protocol (`kitty`, `iterm2`/`iterm`, `sixel`, `none`) where used |\n| `PI_ALLOW_SIXEL_PASSTHROUGH` | Allows SIXEL passthrough when `PI_FORCE_IMAGE_PROTOCOL=sixel` |\n| `PI_NO_PTY` | If `1`, disables interactive PTY path for bash tool |\n| `OMP_MCP_TIMEOUT_MS` | Overrides MCP client request timeout (ms) for every MCP server. `0` disables client-side timeouts (`AbortSignal` never fires). Invalid (negative or non-numeric) values are ignored with a warning and the per-server config or default (`30000`) is used. |\n\n`PI_NO_PTY` is also set internally when CLI `--no-pty` is used.\n\n---\n\n## 6) Storage and config root paths\n\nThese are consumed via `@oh-my-pi/pi-utils/dirs` and affect where coding-agent stores data.\n\n| Variable | Default / behavior |\n| --------------------- | ----------------------------------------------------------------------------- |\n| `PI_CONFIG_DIR` | Config root dirname under home (default `.omp`) |\n| `PI_CODING_AGENT_DIR` | Full override for agent directory (default `~/<PI_CONFIG_DIR or .omp>/agent`) |\n| `PWD` | Used when matching canonical current working directory in path helpers |\n\n---\n\n## 7) Shell/tool execution environment\n\n(From `packages/utils/src/procmgr.ts` and coding-agent bash tool integration.)\n\n| Variable | Behavior |\n| -------------------------- | ------------------------------------------------------------------------------ |\n| `PI_BASH_NO_CI` | Suppresses automatic `CI=true` injection into spawned shell env |\n| `CLAUDE_BASH_NO_CI` | Legacy alias fallback for `PI_BASH_NO_CI` |\n| `PI_BASH_NO_LOGIN` | Disables login-shell mode; shell args become `['-c']` instead of `['-l','-c']` |\n| `CLAUDE_BASH_NO_LOGIN` | Legacy alias fallback for `PI_BASH_NO_LOGIN` |\n| `PI_SHELL_PREFIX` | Optional command prefix wrapper |\n| `CLAUDE_CODE_SHELL_PREFIX` | Legacy alias fallback for `PI_SHELL_PREFIX` |\n| `VISUAL` | Preferred external editor command |\n| `EDITOR` | Fallback external editor command |\n\nCurrent implementation: `PI_BASH_NO_LOGIN`/`CLAUDE_BASH_NO_LOGIN` are active; when either is set, `getShellArgs()` returns `['-c']`.\n\n---\n\n## 8) UI/theme/session detection (auto-detected env)\n\nThese are read as runtime signals; they are usually set by the terminal/OS rather than manually configured.\n\n| Variable | Used for |\n| ------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------- |\n| `COLORTERM`, `TERM`, `WT_SESSION` | Color capability detection (theme color mode) |\n| `COLORFGBG` | Terminal background light/dark auto-detection |\n| `TERM_PROGRAM`, `TERM_PROGRAM_VERSION`, `TERMINAL_EMULATOR` | Terminal identity in system prompt/context |\n| `TMUX_PANE`, `CMUX_SURFACE_ID`, `KITTY_WINDOW_ID`, `TERM_SESSION_ID`, `WT_SESSION` | Stable per-terminal session breadcrumb IDs |\n| `SHELL`, `ComSpec`, `TERM_PROGRAM`, `TERM` | System info diagnostics |\n| `APPDATA`, `XDG_CONFIG_HOME` | lspmux config path resolution |\n| `HOME` | Path shortening in MCP command UI |\n\n---\n\n## 9) TUI runtime flags (shared package, affects coding-agent UX)\n\n| Variable | Behavior |\n| ------------------------- | ------------------------------------------------------------------------------------- |\n| `PI_NOTIFICATIONS` | `off` / `0` / `false` suppress desktop notifications |\n| `PI_TUI_WRITE_LOG` | If set, logs TUI writes to file |\n| `PI_HARDWARE_CURSOR` | If `1`, enables hardware cursor mode |\n| `PI_NO_SYNC_OUTPUT` | If set (any non-empty value), disables DEC 2026 synchronized-output wrappers while keeping TUI autowrap guards |\n| `PI_NO_DECCARA` | If set (truthy), disables Kitty DECCARA rectangular-SGR background fills (forces padded-string rendering) |\n| `PI_DEBUG_REDRAW` | If `1`, enables redraw debug logging |\n| `PI_FORCE_IMAGE_PROTOCOL` | Forces terminal image protocol detection (`kitty`, `iterm2`/`iterm`, `sixel`, `none`) |\n\n---\n\n## 10) Commit generation controls\n\n| Variable | Behavior |\n| ------------------------- | ------------------------------------------------------------------- |\n| `PI_COMMIT_TEST_FALLBACK` | If `true` (case-insensitive), force commit fallback generation path |\n| `PI_COMMIT_NO_FALLBACK` | If `true`, disables fallback when agent returns no proposal |\n| `PI_COMMIT_MAP_REDUCE` | If `false`, disables map-reduce commit analysis path |\n| `DEBUG` | If set, commit agent error stack traces are printed |\n\n---\n\n## Security-sensitive variables\n\nTreat these as secrets; do not log or commit them:\n\n- Provider/API keys and OAuth/bearer credentials (all `*_API_KEY`, `*_TOKEN`, OAuth access/refresh tokens)\n- Cloud credentials (`AWS_*`, `GOOGLE_APPLICATION_CREDENTIALS` path may expose service-account material)\n- Search/provider auth vars (`EXA_API_KEY`, `BRAVE_API_KEY`, `PERPLEXITY_API_KEY`, Anthropic search keys)\n- Foundry mTLS material (`CLAUDE_CODE_CLIENT_CERT`, `CLAUDE_CODE_CLIENT_KEY`, `NODE_EXTRA_CA_CERTS` when it points to private CA bundles)\n\nPython runtime also explicitly strips many common key vars before spawning kernel subprocesses (`packages/coding-agent/src/eval/py/runtime.ts`).\n",
|
|
19
|
+
"environment-variables.md": "# Environment Variables (Current Runtime Reference)\n\nThis reference is derived from current code paths in:\n\n- `packages/coding-agent/src/**`\n- `packages/ai/src/**` (provider/auth resolution used by coding-agent)\n- `packages/utils/src/**` and `packages/tui/src/**` where those vars directly affect coding-agent runtime\n\nIt documents only active behavior.\n\n## Resolution model and precedence\n\nMost runtime lookups use `$env` from `@oh-my-pi/pi-utils` (`packages/utils/src/env.ts`).\n\n`$env` loading order:\n\n1. Existing process environment (`Bun.env`)\n2. Project `.env` (`$PWD/.env`) for keys not already set\n3. Agent `.env` (`~/.omp/agent/.env`, respecting `PI_CONFIG_DIR` / `PI_CODING_AGENT_DIR`) for keys not already set\n4. Config-root `.env` (`~/.omp/.env`, respecting `PI_CONFIG_DIR`) for keys not already set\n5. Home `.env` (`~/.env`) for keys not already set\n\nAdditional rule inside each `.env` file: `OMP_*` keys are mirrored to `PI_*` keys in that parsed file.\n\n---\n\n## 1) Model/provider authentication\n\nThese are consumed via `getEnvApiKey()` (`packages/ai/src/stream.ts`) unless noted otherwise.\n\n### Core provider credentials\n\n| Variable | Used for | Required when | Notes / precedence |\n| ------------------------------- | ------------------------------------------------ | -------------------------------------------------------------- | --------------------------------------------------------------------------------------------------- |\n| `ANTHROPIC_OAUTH_TOKEN` | Anthropic API auth | Using Anthropic with OAuth token auth | Takes precedence over `ANTHROPIC_API_KEY` for provider auth resolution |\n| `ANTHROPIC_API_KEY` | Anthropic API auth | Using Anthropic without OAuth token | Fallback after `ANTHROPIC_OAUTH_TOKEN` |\n| `ANTHROPIC_FOUNDRY_API_KEY` | Anthropic via Azure Foundry / enterprise gateway | `CLAUDE_CODE_USE_FOUNDRY` enabled | Takes precedence over `ANTHROPIC_OAUTH_TOKEN` and `ANTHROPIC_API_KEY` when Foundry mode is enabled |\n| `OPENAI_API_KEY` | OpenAI auth | Using OpenAI-family providers without explicit apiKey argument | Used by OpenAI Completions/Responses providers |\n| `GEMINI_API_KEY` | Google Gemini auth | Using `google` provider models | Primary key for Gemini provider mapping |\n| `GOOGLE_API_KEY` | Gemini image tool auth fallback | Using `gemini_image` tool without `GEMINI_API_KEY` | Used by coding-agent image tool fallback path |\n| `GROQ_API_KEY` | Groq auth | Using Groq models | |\n| `CEREBRAS_API_KEY` | Cerebras auth | Using Cerebras models | |\n| `FIREWORKS_API_KEY` | Fireworks auth | Using Fireworks models | |\n| `FIREPASS_API_KEY` | Fire Pass auth | Using Fire Pass models | |\n| `TOGETHER_API_KEY` | Together auth | Using `together` provider | |\n| `AIMLAPI_API_KEY` | AIML API auth | Using `aimlapi` provider | OpenAI-compatible AIML API endpoint at `https://api.aimlapi.com/v1` |\n| `HUGGINGFACE_HUB_TOKEN` | Hugging Face auth | Using `huggingface` provider | Primary Hugging Face token env var |\n| `HF_TOKEN` | Hugging Face auth | Using `huggingface` provider | Fallback when `HUGGINGFACE_HUB_TOKEN` is unset |\n| `SYNTHETIC_API_KEY` | Synthetic auth | Using Synthetic models | |\n| `NVIDIA_API_KEY` | NVIDIA auth | Using `nvidia` provider | |\n| `NANO_GPT_API_KEY` | NanoGPT auth | Using `nanogpt` provider | |\n| `VENICE_API_KEY` | Venice auth | Using `venice` provider | |\n| `LITELLM_API_KEY` | LiteLLM auth | Using `litellm` provider | OpenAI-compatible LiteLLM proxy key |\n| `LM_STUDIO_API_KEY` | LM Studio auth (optional) | Using `lm-studio` provider with authenticated hosts | Local LM Studio usually runs without auth; any non-empty token works when a key is required |\n| `OLLAMA_API_KEY` | Ollama auth (optional) | Using `ollama` provider with authenticated hosts | Local Ollama usually runs without auth; any non-empty token works when a key is required |\n| `LLAMA_CPP_API_KEY` | llama.cpp auth (optional) | Using `llama.cpp` provider with authenticated hosts | Local llama.cpp usually runs without auth; any non-empty token works when a key is configured |\n| `XIAOMI_API_KEY` | Xiaomi MiMo auth | Using `xiaomi` provider | |\n| `MOONSHOT_API_KEY` | Moonshot auth | Using `moonshot` provider | |\n| `XAI_API_KEY` | xAI auth | Using xAI models or as fallback for `xai-oauth` | |\n| `XAI_OAUTH_TOKEN` | xAI OAuth/SuperGrok auth | Using `xai-oauth` provider | Takes precedence over `XAI_API_KEY` for `xai-oauth` |\n| `OPENROUTER_API_KEY` | OpenRouter auth | Using OpenRouter models | Also used by image tool when preferred/auto provider is OpenRouter |\n| `MISTRAL_API_KEY` | Mistral auth | Using Mistral models | |\n| `ZAI_API_KEY` | z.ai auth | Using z.ai models | Also used by z.ai web search provider |\n| `ZHIPU_API_KEY` | Zhipu Coding Plan auth | Using `zhipu-coding-plan` provider | |\n| `UMANS_AI_CODING_PLAN_API_KEY` | Umans AI Coding Plan auth | Using `umans` provider | |\n| `MINIMAX_API_KEY` | MiniMax auth | Using `minimax` provider | |\n| `MINIMAX_CODE_API_KEY` | MiniMax Code auth | Using `minimax-code` provider | |\n| `MINIMAX_CODE_CN_API_KEY` | MiniMax Code CN auth | Using `minimax-code-cn` provider | |\n| `OPENCODE_API_KEY` | OpenCode auth | Using `opencode-go` / `opencode-zen` models | |\n| `QIANFAN_API_KEY` | Qianfan auth | Using `qianfan` provider | |\n| `QWEN_OAUTH_TOKEN` | Qwen Portal auth | Using `qwen-portal` with OAuth token | Takes precedence over `QWEN_PORTAL_API_KEY` |\n| `QWEN_PORTAL_API_KEY` | Qwen Portal auth | Using `qwen-portal` with API key | Fallback after `QWEN_OAUTH_TOKEN` |\n| `ZENMUX_API_KEY` | ZenMux auth | Using `zenmux` provider | Used for ZenMux OpenAI and Anthropic-compatible routes |\n| `VLLM_API_KEY` | vLLM auth/discovery opt-in | Using `vllm` provider (local OpenAI-compatible servers) | Any non-empty value works for no-auth local servers |\n| `CURSOR_ACCESS_TOKEN` | Cursor provider auth | Using Cursor provider | |\n| `AI_GATEWAY_API_KEY` | Vercel AI Gateway auth | Using `vercel-ai-gateway` provider | |\n| `CLOUDFLARE_AI_GATEWAY_API_KEY` | Cloudflare AI Gateway auth | Using `cloudflare-ai-gateway` provider | Base URL must be configured as `https://gateway.ai.cloudflare.com/v1/<account>/<gateway>/anthropic` |\n| `ALIBABA_CODING_PLAN_API_KEY` | Alibaba Coding Plan auth | Using `alibaba-coding-plan` provider | |\n| `DEEPSEEK_API_KEY` | DeepSeek auth | Using DeepSeek models | |\n| `KILO_API_KEY` | Kilo auth | Using Kilo models | |\n| `OLLAMA_CLOUD_API_KEY` | Ollama Cloud auth | Using `ollama-cloud` provider | |\n| `WAFER_PASS_API_KEY` | Wafer Pass auth | Using `wafer-pass` provider | Flat-rate Wafer subscription; validated against `https://pass.wafer.ai/v1/models` |\n| `WAFER_SERVERLESS_API_KEY` | Wafer Serverless auth | Using `wafer-serverless` provider | Pay-as-you-go Wafer SKU; validated against `https://pass.wafer.ai/v1/models` |\n| `GITLAB_TOKEN` | GitLab Duo auth | Using `gitlab-duo` provider | |\n\n### GitHub/Copilot tokens\n\n| Variable | Used for | Notes |\n| ---------------------- | ------------------------------------------------ | ------------------------------------------ |\n| `COPILOT_GITHUB_TOKEN` | GitHub Copilot provider auth | Generic GitHub tokens are not used here |\n| `GH_TOKEN` | GitHub API auth in web scraper | Web scraper fallback after `GITHUB_TOKEN` |\n| `GITHUB_TOKEN` | GitHub API auth in web scraper | Web scraper checks this before `GH_TOKEN` |\n\n### Auth broker / auth gateway (remote credential vault)\n\nWhen the broker is enabled, the local SQLite credential store is bypassed and all OAuth refresh / access tokens live on the broker host. See [`auth-broker-gateway.md`](./auth-broker-gateway.md) for the full protocol, CLI surface, and 5-min/15-s usage cache layering.\n\n| Variable | Used for | Required when | Notes / precedence |\n| ----------------------- | -------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |\n| `OMP_AUTH_BROKER_URL` | Base URL of the remote auth-broker (e.g. `https://broker.tailnet:8765`); selects broker mode | Resolving credentials through a broker; also required by `omp auth-gateway serve` (the gateway is itself a broker client) | Wins over `auth.broker.url` in `config.yml`. When set with no resolvable token, `resolveAuthBrokerConfig()` hard-errors instead of falling back to local SQLite. |\n| `OMP_AUTH_BROKER_TOKEN` | Bearer token sent on every broker endpoint except `/v1/healthz` | `OMP_AUTH_BROKER_URL` is set and no token is available from `auth.broker.token` or `<config-dir>/auth-broker.token` | Resolution: this env → `auth.broker.token` (`$ENV_NAME` indirection supported) → `<config-dir>/auth-broker.token` (mode `0600`). `<config-dir>` is `~/.omp/` (respecting `PI_CONFIG_DIR`). |\n| `OMP_AUTH_BROKER_SNAPSHOT_TTL_MS` | Freshness window for the encrypted local broker snapshot cache | Optional in broker mode | Default `3600000` (1 h). Freshness is based on broker `snapshot.generatedAt`; `0` disables cache reads/writes and forces the old blocking fetch every startup. |\n| `OMP_AUTH_BROKER_SNAPSHOT_CACHE` | Path to the encrypted local broker snapshot cache | Optional in broker mode | Defaults to `~/.omp/cache/auth-broker-snapshot.enc` (or XDG cache equivalent). Useful for tests, ephemeral hosts, or relocating the `0600` cache file. |\n\nThe gateway has no dedicated env vars — it inherits `OMP_AUTH_BROKER_*`. Its own inbound bearer token lives at `<config-dir>/auth-gateway.token` and is managed via `omp auth-gateway token`.\n\n---\n\n## 2) Provider-specific runtime configuration\n\n### Anthropic Foundry Gateway (Azure / enterprise proxy)\n\nWhen `CLAUDE_CODE_USE_FOUNDRY` is enabled, Anthropic requests switch to Foundry mode:\n\n- Base URL resolves from `FOUNDRY_BASE_URL` (fallback remains model/default base URL if unset).\n- API key resolution for provider `anthropic` becomes:\n `ANTHROPIC_FOUNDRY_API_KEY` → `ANTHROPIC_OAUTH_TOKEN` → `ANTHROPIC_API_KEY`.\n- `ANTHROPIC_CUSTOM_HEADERS` is parsed as comma/newline-separated `key: value`\n pairs and merged into request headers. They are also forwarded when\n `ANTHROPIC_BASE_URL` points to a non-Anthropic host (e.g. a corporate API\n gateway), so enterprise gateways requiring proprietary auth headers work\n without enabling Foundry mode.\n- TLS client/server material can be injected from env values:\n `NODE_EXTRA_CA_CERTS`, `CLAUDE_CODE_CLIENT_CERT`, `CLAUDE_CODE_CLIENT_KEY`.\n Each accepts either:\n - a filesystem path to PEM content, or\n - inline PEM (including escaped `\\n` sequences).\n\n| Variable | Value type | Behavior |\n| --------------------------- | ---------------------------------------------- | ----------------------------------------------------------------------------- |\n| `CLAUDE_CODE_USE_FOUNDRY` | Boolean-like string (`1`, `true`, `yes`, `on`) | Enables Foundry mode for Anthropic provider |\n| `FOUNDRY_BASE_URL` | URL string | Anthropic endpoint base URL in Foundry mode |\n| `ANTHROPIC_FOUNDRY_API_KEY` | Token string | Used for `Authorization: Bearer <token>` |\n| `ANTHROPIC_CUSTOM_HEADERS` | Header list string | Extra headers; format `header-a: value, header-b: value` or newline-separated. Also forwarded outside Foundry whenever `ANTHROPIC_BASE_URL` is non-Anthropic. |\n| `NODE_EXTRA_CA_CERTS` | PEM path or inline PEM | Extra CA chain for server certificate validation |\n| `CLAUDE_CODE_CLIENT_CERT` | PEM path or inline PEM | mTLS client certificate |\n| `CLAUDE_CODE_CLIENT_KEY` | PEM path or inline PEM | mTLS client private key (must be paired with cert) |\n\n### Amazon Bedrock\n\n| Variable | Default / behavior |\n| ------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------- |\n| `AWS_REGION` | Primary region source |\n| `AWS_DEFAULT_REGION` | Fallback if `AWS_REGION` unset |\n| `AWS_PROFILE` | Enables named profile auth path |\n| `AWS_ACCESS_KEY_ID` + `AWS_SECRET_ACCESS_KEY` | Enables IAM key auth path |\n| `AWS_BEARER_TOKEN_BEDROCK` | Highest-precedence bearer token auth path; skips AWS profile/credential-chain lookup when set |\n| `AWS_CONTAINER_CREDENTIALS_RELATIVE_URI` / `AWS_CONTAINER_CREDENTIALS_FULL_URI` | Marks Bedrock as available in provider detection (credential resolution itself covers env keys, profiles/SSO/`credential_process`, then IMDSv2) |\n| `AWS_WEB_IDENTITY_TOKEN_FILE` + `AWS_ROLE_ARN` | Marks Bedrock as available in provider detection (same caveat as the ECS variables above) |\n| `AWS_BEDROCK_SKIP_AUTH` | If `1`, injects dummy credentials (proxy/non-auth scenarios) |\n| `HTTPS_PROXY` / `HTTP_PROXY` | Honored via Bun's native fetch proxy support (the provider no longer ships an AWS SDK / proxy-agent transport) |\n| `NO_PROXY` | Excludes matching hosts from Bun's native proxy routing |\n\nRegion fallback in provider code: `options.region` → `AWS_REGION` → `AWS_DEFAULT_REGION` → `us-east-1`.\n\n### Azure OpenAI Responses\n\n| Variable | Default / behavior |\n| ---------------------------------- | --------------------------------------------------------------------------- |\n| `AZURE_OPENAI_API_KEY` | Required unless API key passed as option |\n| `AZURE_OPENAI_API_VERSION` | Default `v1` |\n| `AZURE_OPENAI_BASE_URL` | Direct base URL override |\n| `AZURE_OPENAI_RESOURCE_NAME` | Used to construct base URL: `https://<resource>.openai.azure.com/openai/v1` |\n| `AZURE_OPENAI_DEPLOYMENT_NAME_MAP` | Optional mapping string: `modelId=deploymentName,model2=deployment2` |\n\nBase URL resolution: option `azureBaseUrl` → env `AZURE_OPENAI_BASE_URL` → option/env resource name → `model.baseUrl`.\n\n### Google Vertex AI\n\n| Variable | Required? | Notes |\n| -------------------------------- | ------------------------------ | ------------------------------------------------------------------------------------------------------------------------- |\n| `GOOGLE_CLOUD_PROJECT` | Yes (unless passed in options) | Primary project ID source |\n| `GCP_PROJECT` | Fallback | Alternate project ID source |\n| `GCLOUD_PROJECT` | Fallback | Alternate project ID source |\n| `GOOGLE_CLOUD_PROJECT_ID` | OAuth login helper only | Used by Gemini CLI OAuth project discovery |\n| `GOOGLE_VERTEX_LOCATION` | Yes (unless passed in options) | Primary Vertex location source |\n| `GOOGLE_CLOUD_LOCATION` | Fallback | Alternate Vertex location source |\n| `VERTEX_LOCATION` | Fallback | Alternate Vertex location source |\n| `GOOGLE_CLOUD_API_KEY` | Conditional | Direct Vertex API-key auth; otherwise ADC fallback can authenticate when project and location are set |\n| `GOOGLE_APPLICATION_CREDENTIALS` | Conditional | If set, file must exist; otherwise ADC fallback path is checked (`~/.config/gcloud/application_default_credentials.json`) |\n\n### Kimi\n\n| Variable | Default / behavior |\n| ---------------------- | -------------------------------------------------------- |\n| `KIMI_CODE_OAUTH_HOST` | Primary OAuth host override |\n| `KIMI_OAUTH_HOST` | Fallback OAuth host override |\n| `KIMI_CODE_BASE_URL` | Overrides Kimi usage endpoint base URL (`usage/kimi.ts`) |\n\nOAuth host chain: `KIMI_CODE_OAUTH_HOST` → `KIMI_OAUTH_HOST` → `https://auth.kimi.com`.\n\n### Gemini CLI compatibility\n\n| Variable | Default / behavior |\n| -------------------------- | --------------------------------------------------------------- |\n| `PI_AI_GEMINI_CLI_VERSION` | Overrides Gemini CLI user-agent version tag (`0.35.3` if unset) |\n\n### OpenAI Codex responses (feature/debug controls)\n\n| Variable | Behavior |\n| ------------------------------------------ | ---------------------------------------------------- |\n| `PI_CODEX_DEBUG` | `1`/`true` enables Codex provider debug logging |\n| `PI_CODEX_WEBSOCKET` | `1`/`true` enables websocket transport preference |\n| `PI_OPENAI_STATEFUL` | Overrides the stateful-chaining default for the platform OpenAI Responses API (`previous_response_id`, forces `store: true`): on by default against api.openai.com, off elsewhere |\n| `PI_CODEX_WEBSOCKET_IDLE_TIMEOUT_MS` | Positive integer override (default 300000) |\n| `PI_CODEX_WEBSOCKET_RETRY_BUDGET` | Non-negative integer override (default 5) |\n| `PI_CODEX_WEBSOCKET_RETRY_DELAY_MS` | Positive integer base backoff override (default 500) |\n| `PI_OPENAI_STREAM_FIRST_EVENT_TIMEOUT_MS` | Positive integer OpenAI first-event timeout override |\n| `PI_OPENAI_STREAM_IDLE_TIMEOUT_MS` | Positive integer OpenAI stream idle timeout override |\n\n### Cursor provider debug\n\n| Variable | Behavior |\n| ------------------ | ------------------------------------------------------------------------ |\n| `DEBUG_CURSOR` | Enables provider debug logs; `2`/`verbose` for detailed payload snippets |\n| `DEBUG_CURSOR_LOG` | Optional file path for JSONL debug log output |\n\n### Prompt cache compatibility switch\n\n| Variable | Behavior |\n| -------------------- | ----------------------------------------------------------------------------------------------------------------- |\n| `PI_CACHE_RETENTION` | If `long`, enables long retention where supported (`anthropic`, `openai-responses`, Bedrock retention resolution) |\n\n---\n\n## 3) Web search subsystem\n\n### Search provider credentials\n\n| Variable | Used by |\n| --------------------------------------------------- | ------------------------------------------------------------- |\n| `EXA_API_KEY` | Exa search provider and Exa MCP tools |\n| `BRAVE_API_KEY` | Brave search provider |\n| `PERPLEXITY_API_KEY` | Perplexity search provider API-key mode |\n| `PERPLEXITY_COOKIES` | Perplexity cookie-auth search mode |\n| `TAVILY_API_KEY` | Tavily search provider |\n| `ZAI_API_KEY` | z.ai search provider (also checks stored OAuth in `agent.db`) |\n| `OPENAI_API_KEY` / Codex OAuth in DB | Codex search provider availability/auth |\n| `PI_CODEX_WEB_SEARCH_MODEL` | Codex search provider model override |\n| `MOONSHOT_SEARCH_API_KEY` / `KIMI_SEARCH_API_KEY` | Kimi/Moonshot search provider env auth |\n| `MOONSHOT_SEARCH_BASE_URL` / `KIMI_SEARCH_BASE_URL` | Kimi/Moonshot search endpoint override |\n| `KAGI_API_KEY` | Kagi search provider |\n| `JINA_API_KEY` | Jina search provider |\n| `PARALLEL_API_KEY` | Parallel search provider |\n| `SEARXNG_ENDPOINT`, `SEARXNG_TOKEN` | SearXNG endpoint and optional bearer token |\n| `SEARXNG_BASIC_USERNAME`, `SEARXNG_BASIC_PASSWORD` | SearXNG HTTP Basic Auth credentials |\n\nSearXNG also reads the equivalent `searxng.endpoint`, `searxng.token`, `searxng.basicUsername`, and `searxng.basicPassword` settings from `~/.omp/agent/config.yml`; environment variables are fallbacks.\n\n### Anthropic web search auth chain\n\n`searchAnthropic()` resolves credentials in this order:\n\n1. `ANTHROPIC_SEARCH_API_KEY`\n2. `authStorage.getApiKey(\"anthropic\")` fallback credentials (runtime/config overrides, stored API-key credentials, stored OAuth credentials, then generic Anthropic env fallback: `ANTHROPIC_FOUNDRY_API_KEY` in Foundry mode, otherwise `ANTHROPIC_OAUTH_TOKEN` / `ANTHROPIC_API_KEY`)\n\nFor either credential path, base URL resolution is:\n\n1. `ANTHROPIC_SEARCH_BASE_URL`\n2. `FOUNDRY_BASE_URL` when `CLAUDE_CODE_USE_FOUNDRY` is enabled\n3. `ANTHROPIC_BASE_URL`\n4. `https://api.anthropic.com`\n\nRelated vars:\n\n| Variable | Default / behavior |\n| --------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| `ANTHROPIC_SEARCH_API_KEY` | API key used exclusively for the Anthropic web search provider. Highest-priority search auth; overrides `ANTHROPIC_API_KEY` / OAuth / Foundry for search calls without affecting chat completions. |\n| `ANTHROPIC_SEARCH_BASE_URL` | Base URL used exclusively for the Anthropic web search provider. Applied to either `ANTHROPIC_SEARCH_API_KEY` or fallback Anthropic credentials; overrides `ANTHROPIC_BASE_URL` (and `FOUNDRY_BASE_URL` in Foundry mode) for search calls. |\n| `ANTHROPIC_SEARCH_MODEL` | Search model override. Defaults to `claude-haiku-4-5`. |\n| `ANTHROPIC_BASE_URL` | Generic fallback base URL for Anthropic requests when no search-specific base URL is set. |\n\nUse `ANTHROPIC_SEARCH_BASE_URL` (optionally with `ANTHROPIC_SEARCH_API_KEY`) to keep chat routed through an enterprise gateway (`ANTHROPIC_BASE_URL` or `CLAUDE_CODE_USE_FOUNDRY=true`) while pointing web search at a direct Anthropic endpoint, or vice versa.\n\n### Perplexity OAuth flow behavior flag\n\n| Variable | Behavior |\n| ------------------- | ------------------------------------------------------------------------------- |\n| `PI_AUTH_NO_BORROW` | If set, disables macOS native-app token borrowing path in Perplexity login flow |\n\n---\n\n## 4) Python tooling and kernel runtime\n\n| Variable | Default / behavior |\n| ----------------------- | ------------------------------------------------------------------------------------------------------------------- |\n| `PI_PY` | Boolean-like override for the Python eval backend: truthy (`1`/`true`/`yes`/`on`) enables, any other value disables; unset defers to the `eval.py` setting (default enabled) |\n| `PI_JS` | Same boolean-like override for the JavaScript eval backend; unset defers to the `eval.js` setting (default enabled) |\n| `PI_PYTHON_SKIP_CHECK` | If `1`, skips Python interpreter availability checks (subprocess runner still starts on demand) |\n| `PI_PYTHON_INTEGRATION` | If `1`, opts gated integration tests in (e.g. `python-runner.integration.test.ts`) into running against real Python |\n| `PI_PYTHON_IPC_TRACE` | If `1`, logs NDJSON frames exchanged with the Python runner subprocess |\n| `VIRTUAL_ENV` | Highest-priority venv path for Python runtime resolution |\n\nExtra conditional behavior:\n\n- If `BUN_ENV=test` or `NODE_ENV=test`, Python availability checks are treated as OK and warming is skipped.\n- Python env filtering denies common API keys and allows safe base vars + `LC_`, `XDG_`, `PI_` prefixes.\n\n---\n\n## 5) Agent/runtime behavior toggles\n\n| Variable | Default / behavior |\n| ---------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| `PI_SMOL_MODEL` | Ephemeral model-role override for `smol` (CLI `--smol` takes precedence) |\n| `PI_SLOW_MODEL` | Ephemeral model-role override for `slow` (CLI `--slow` takes precedence) |\n| `PI_PLAN_MODEL` | Ephemeral model-role override for `plan` (CLI `--plan` takes precedence) |\n| `PI_NO_TITLE` | If set (any non-empty value), disables auto session title generation on first user message |\n| `PI_TINY_DEVICE` | ONNX execution provider for local tiny models; overrides the `providers.tinyModelDevice` setting (default: CPU; supports `cpu`, `gpu`, `metal`/`webgpu`, `auto`, `cuda`, `dml`, `coreml`, `wasm`, `webnn`, `webnn-gpu`, `webnn-cpu`, `webnn-npu`) |\n| `PI_TINY_DTYPE` | ONNX quantization/precision for local tiny models; overrides the `providers.tinyModelDtype` setting (default: each model's shipped dtype, currently `q4`; supports `auto`, `fp32`, `fp16`, `q8`, `int8`, `uint8`, `q4`, `bnb4`, `q4f16`, `q2`, `q2f16`, `q1`, `q1f16`) |\n| `PI_NO_INTERLEAVED_THINKING` | If `1`, disables Anthropic interleaved thinking budget behavior and uses output-token inflation for older thinking mode |\n| `NULL_PROMPT` | If `true`, system prompt builder returns empty string |\n| `PI_BLOCKED_AGENT` | Blocks a specific subagent type in task tool |\n| `PI_SUBPROCESS_CMD` | Overrides subagent spawn command (`omp` / `omp.cmd` resolution bypass) |\n| `PI_TASK_MAX_OUTPUT_BYTES` | Max captured output bytes per subagent (default `500000`) |\n| `PI_TASK_MAX_OUTPUT_LINES` | Max captured output lines per subagent (default `5000`) |\n| `PI_TIMING` | If set (any non-empty value), prints a hierarchical timing-span tree to **stderr** via `logger.printTimings()`. In interactive mode the tree prints once the agent is ready (before the TUI starts); in print mode it prints after the whole prompt batch completes. Print-mode prompts are wrapped in `print:prompt:initial` / `print:prompt:next` spans so each user message shows up as its own row. `PI_TIMING=x` exits the process with code 0 right after printing in interactive mode (use to measure cold startup only). `PI_TIMING=full` lists every module-load entry instead of just the top N. |\n| `PI_DEBUG_STARTUP` | If set (any non-empty value), streams one synchronous `[startup] <phase>:start` / `:done` marker line to **stderr** as each startup phase begins/ends — including command-module imports (`cli:load:<name>`) and the native addon extraction/`dlopen` (`native:*`). Unlike `PI_TIMING` (which prints only once startup completes), the markers survive a hard hang: the last line on stderr names the phase the process is stuck in. Combine with `PI_TIMING` freely; markers and the span tree share the same phase names. |\n| `PI_PACKAGE_DIR` | Overrides package asset base dir resolution (`docs/`, `examples/`, `CHANGELOG.md`) |\n| `PI_DISABLE_LSPMUX` | If `1`, disables lspmux detection/integration and forces direct LSP server spawning |\n| `PI_RPC_EMIT_TITLE` | Boolean-like flag enabling title events in RPC mode |\n| `SMITHERY_URL` | Smithery web URL override (default `https://smithery.ai`) |\n| `SMITHERY_API_URL` | Smithery API base URL override (default `https://api.smithery.ai`) |\n| `SMITHERY_API_KEY` | Smithery API key for managed MCP auth lookup |\n| `PUPPETEER_EXECUTABLE_PATH` | Browser tool Chromium executable override |\n| `LM_STUDIO_BASE_URL` | Default implicit LM Studio discovery base URL override (`http://127.0.0.1:1234/v1` if unset) |\n| `OLLAMA_BASE_URL` | Default implicit Ollama discovery base URL override (`OLLAMA_HOST` if unset, then `http://127.0.0.1:11434`) |\n| `OLLAMA_HOST` | Ollama host used for implicit Ollama discovery when `OLLAMA_BASE_URL` is unset; accepts Ollama-style values such as `127.0.0.1:11434` or `http://host:11434` |\n| `OLLAMA_CONTEXT_LENGTH` | Positive integer context-window override for implicit Ollama discovery; affects OMP context budgeting only and does not change Ollama's runtime `num_ctx` |\n| `LLAMA_CPP_BASE_URL` | Default implicit Llama.cpp discovery base URL override (`http://127.0.0.1:8080` if unset) |\n| `PI_EDIT_VARIANT` | Forces edit tool variant when valid (`patch`, `replace`, `hashline`, `apply_patch`) |\n| `PI_FORCE_IMAGE_PROTOCOL` | Forces supported image protocol (`kitty`, `iterm2`/`iterm`, `sixel`, `none`) where used |\n| `PI_ALLOW_SIXEL_PASSTHROUGH` | Allows SIXEL passthrough when `PI_FORCE_IMAGE_PROTOCOL=sixel` |\n| `PI_NO_PTY` | If `1`, disables interactive PTY path for bash tool |\n| `OMP_MCP_TIMEOUT_MS` | Overrides MCP client request timeout (ms) for every MCP server. `0` disables client-side timeouts (`AbortSignal` never fires). Invalid (negative or non-numeric) values are ignored with a warning and the per-server config or default (`30000`) is used. |\n\n`PI_NO_PTY` is also set internally when CLI `--no-pty` is used.\n\n---\n\n## 6) Storage and config root paths\n\nThese are consumed via `@oh-my-pi/pi-utils/dirs` and affect where coding-agent stores data.\n\n| Variable | Default / behavior |\n| --------------------- | ----------------------------------------------------------------------------- |\n| `PI_CONFIG_DIR` | Config root dirname under home (default `.omp`) |\n| `PI_CODING_AGENT_DIR` | Full override for agent directory (default `~/<PI_CONFIG_DIR or .omp>/agent`) |\n| `PWD` | Used when matching canonical current working directory in path helpers |\n\n---\n\n## 7) Shell/tool execution environment\n\n(From `packages/utils/src/procmgr.ts` and coding-agent bash tool integration.)\n\n| Variable | Behavior |\n| -------------------------- | ------------------------------------------------------------------------------ |\n| `PI_BASH_NO_CI` | Suppresses automatic `CI=true` injection into spawned shell env |\n| `CLAUDE_BASH_NO_CI` | Legacy alias fallback for `PI_BASH_NO_CI` |\n| `PI_BASH_NO_LOGIN` | Disables login-shell mode; shell args become `['-c']` instead of `['-l','-c']` |\n| `CLAUDE_BASH_NO_LOGIN` | Legacy alias fallback for `PI_BASH_NO_LOGIN` |\n| `PI_SHELL_PREFIX` | Optional command prefix wrapper |\n| `CLAUDE_CODE_SHELL_PREFIX` | Legacy alias fallback for `PI_SHELL_PREFIX` |\n| `VISUAL` | Preferred external editor command |\n| `EDITOR` | Fallback external editor command |\n\nCurrent implementation: `PI_BASH_NO_LOGIN`/`CLAUDE_BASH_NO_LOGIN` are active; when either is set, `getShellArgs()` returns `['-c']`.\n\n---\n\n## 8) UI/theme/session detection (auto-detected env)\n\nThese are read as runtime signals; they are usually set by the terminal/OS rather than manually configured.\n\n| Variable | Used for |\n| ------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------- |\n| `COLORTERM`, `TERM`, `WT_SESSION` | Color capability detection (theme color mode) |\n| `COLORFGBG` | Terminal background light/dark auto-detection |\n| `TERM_PROGRAM`, `TERM_PROGRAM_VERSION`, `TERMINAL_EMULATOR` | Terminal identity in system prompt/context |\n| `TMUX_PANE`, `CMUX_SURFACE_ID`, `KITTY_WINDOW_ID`, `TERM_SESSION_ID`, `WT_SESSION` | Stable per-terminal session breadcrumb IDs |\n| `SHELL`, `ComSpec`, `TERM_PROGRAM`, `TERM` | System info diagnostics |\n| `APPDATA`, `XDG_CONFIG_HOME` | lspmux config path resolution |\n| `HOME` | Path shortening in MCP command UI |\n\n---\n\n## 9) TUI runtime flags (shared package, affects coding-agent UX)\n\n| Variable | Behavior |\n| ------------------------- | ------------------------------------------------------------------------------------- |\n| `PI_NOTIFICATIONS` | `off` / `0` / `false` suppress desktop notifications |\n| `PI_TUI_WRITE_LOG` | If set, logs TUI writes to file |\n| `PI_HARDWARE_CURSOR` | If `1`, enables hardware cursor mode |\n| `PI_NO_SYNC_OUTPUT` | If set (any non-empty value), disables DEC 2026 synchronized-output wrappers while keeping TUI autowrap guards |\n| `PI_NO_DECCARA` | If set (truthy), disables Kitty DECCARA rectangular-SGR background fills (forces padded-string rendering) |\n| `PI_DEBUG_REDRAW` | If `1`, enables redraw debug logging |\n| `PI_FORCE_IMAGE_PROTOCOL` | Forces terminal image protocol detection (`kitty`, `iterm2`/`iterm`, `sixel`, `none`) |\n| `PI_TUI_RESIZE_IN_PLACE` | `1`/`true` force in-place resize (no alt-screen borrow, no ED3 rewrap); `0`/`false` force the alt-screen fast path. Default-on for Warp, which re-reports its size on alt-screen toggles |\n\n---\n\n## 10) Commit generation controls\n\n| Variable | Behavior |\n| ------------------------- | ------------------------------------------------------------------- |\n| `PI_COMMIT_TEST_FALLBACK` | If `true` (case-insensitive), force commit fallback generation path |\n| `PI_COMMIT_NO_FALLBACK` | If `true`, disables fallback when agent returns no proposal |\n| `PI_COMMIT_MAP_REDUCE` | If `false`, disables map-reduce commit analysis path |\n| `DEBUG` | If set, commit agent error stack traces are printed |\n\n---\n\n## Security-sensitive variables\n\nTreat these as secrets; do not log or commit them:\n\n- Provider/API keys and OAuth/bearer credentials (all `*_API_KEY`, `*_TOKEN`, OAuth access/refresh tokens)\n- Cloud credentials (`AWS_*`, `GOOGLE_APPLICATION_CREDENTIALS` path may expose service-account material)\n- Search/provider auth vars (`EXA_API_KEY`, `BRAVE_API_KEY`, `PERPLEXITY_API_KEY`, Anthropic search keys)\n- Foundry mTLS material (`CLAUDE_CODE_CLIENT_CERT`, `CLAUDE_CODE_CLIENT_KEY`, `NODE_EXTRA_CA_CERTS` when it points to private CA bundles)\n\nPython runtime also explicitly strips many common key vars before spawning kernel subprocesses (`packages/coding-agent/src/eval/py/runtime.ts`).\n",
|
|
20
20
|
"extension-loading.md": "# Extension Loading (TypeScript/JavaScript Modules)\n\nThis document covers how the coding agent discovers and loads **extension modules** (`.ts`/`.js`) at startup.\n\nIt does **not** cover `gemini-extension.json` manifest extensions (documented separately).\n\n## What this subsystem does\n\nExtension loading builds a list of module entry files, imports each module with Bun, executes its factory, and returns:\n\n- loaded extension definitions\n- per-path load errors (without aborting the whole load)\n- a shared extension runtime object used later by `ExtensionRunner`\n\n## Primary implementation files\n\n- `src/extensibility/extensions/loader.ts` — path discovery + import/execution\n- `src/extensibility/extensions/index.ts` — public exports\n- `src/extensibility/extensions/runner.ts` — runtime/event execution after load\n- `src/discovery/builtin.ts` — native auto-discovery provider for extension modules\n- `src/config/settings.ts` — loads merged `extensions` / `disabledExtensions` settings\n\n---\n\n## Inputs to extension loading\n\n### 1) Auto-discovered native extension modules\n\n`discoverAndLoadExtensions()` first asks discovery providers for `extension-module` capability items, then keeps only provider `native` items.\n\nNative `extension-module` discovery comes from:\n\n- Project directory: `<cwd>/.omp/extensions`\n- User directory: `~/.omp/agent/extensions`\n- Native legacy/settings JSON entries: `<cwd>/.omp/settings.json#extensions` and `~/.omp/agent/settings.json#extensions`\n\nThe project root is the native provider's `.omp` directory (`SOURCE_PATHS.native.projectDir`), cwd-only; it does not walk ancestors. The user root is the active profile's agent directory via `getAgentDir()`, so under `omp --profile <name>` it becomes `~/.omp/profiles/<name>/agent/extensions` (and it honors `PI_CODING_AGENT_DIR`). See [Profiles](./config-usage.md#profiles).\n\nNotes:\n\n- Native auto-discovery is currently `.omp` based.\n- Legacy `.pi` is still accepted in package manifests (`pi.extensions`) and project override lookup, but `.pi/extensions` is not a native root here.\n\n### 2) Installed plugin extension entries\n\nAfter native auto-discovery, `discoverAndLoadExtensions()` appends extension entry points from enabled installed plugins via `getAllPluginExtensionPaths(cwd)`.\n\nPlugin extension entries come from package `omp.extensions` / `pi.extensions` manifests, including enabled feature entries.\n\n### 3) Explicitly configured paths\n\nAfter plugin extension entries, configured paths are appended and resolved.\n\nConfigured path sources in the main session startup path (`sdk.ts`):\n\n1. CLI-provided paths (`--extension/-e`, and `--hook` is also treated as an extension path)\n2. Merged settings `extensions` array\n\nSettings files:\n\n- User: `~/.omp/agent/config.yml` (or custom agent dir via `PI_CODING_AGENT_DIR`)\n- Project/native settings capability: `<cwd>/.omp/config.yml` and `<cwd>/.omp/settings.json`\n\nNative extension-module discovery also reads legacy JSON extension lists from:\n\n- `~/.omp/agent/settings.json`\n- `<cwd>/.omp/settings.json`\n\nExamples:\n\n```yaml\n# ~/.omp/agent/config.yml\nextensions:\n - ~/my-exts/safety.ts\n - ./local/ext-pack\n```\n\n```json\n{\n \"extensions\": [\"./.omp/extensions/my-extra\"]\n}\n```\n\n---\n\n## Enable/disable controls\n\n### Disable discovery\n\n- CLI: `--no-extensions`\n- SDK option: `disableExtensionDiscovery`\n\nBehavior split:\n\n- SDK: when `disableExtensionDiscovery=true`, it still loads `additionalExtensionPaths` via `loadExtensions()`.\n- CLI path building (`main.ts`) currently clears CLI extension paths when `--no-extensions` is set, so explicit `-e/--hook` are not forwarded in that mode.\n\n### Disable specific extension modules\n\n`disabledExtensions` setting filters by extension id format:\n\n- `extension-module:<derivedName>`\n\n`derivedName` is based on entry path (`getExtensionNameFromPath`), for example:\n\n- `/x/foo.ts` -> `foo`\n- `/x/bar/index.ts` -> `bar`\n\nExample:\n\n```yaml\ndisabledExtensions:\n - extension-module:foo\n```\n\n---\n\n## Path and entry resolution\n\n### Path normalization\n\nFor configured paths:\n\n1. Normalize unicode spaces\n2. Expand `~`\n3. If relative, resolve against current `cwd`\n\n### If configured path is a file\n\nIt is used directly as a module entry candidate.\n\n### If configured path is a directory\n\nResolution order:\n\n1. `package.json` in that directory with `omp.extensions` (or legacy `pi.extensions`) -> use declared entries\n2. `index.ts`\n3. `index.js`\n4. Otherwise scan one level for extension entries:\n - direct `*.ts` / `*.js`\n - subdir `index.ts` / `index.js`\n - subdir `package.json` with `omp.extensions` / `pi.extensions`\n\nRules and constraints:\n\n- no recursive discovery beyond one subdirectory level\n- declared `extensions` manifest entries are resolved relative to that package directory\n- declared entries are included only if file exists/access is allowed\n- in `*/index.{ts,js}` pairs, TypeScript is preferred over JavaScript\n- symlinks are treated as eligible files/directories\n\n### Ignore behavior differs by source\n\n- Native auto-discovery (`discoverExtensionModulePaths` in discovery helpers) uses native glob with `gitignore: true` and `hidden: false`.\n- Explicit configured directory scanning in `loader.ts` uses `readdir` rules and does **not** apply gitignore filtering.\n\n---\n\n## Load order and precedence\n\n`discoverAndLoadExtensions()` builds one ordered list and then calls `loadExtensions()`.\n\nOrder:\n\n1. Native auto-discovered modules\n2. Installed plugin extension entries\n3. Explicit configured paths (in provided order)\n\nIn `sdk.ts`, configured order is:\n\n1. CLI additional paths\n2. Settings `extensions`\n\nDe-duplication:\n\n- absolute path based\n- first seen path wins\n- later duplicates are ignored\n\nImplication: if the same module path is both auto-discovered and explicitly configured, it is loaded once at the first position (auto-discovered stage).\n\n---\n\n## Module import and factory contract\n\nEach candidate path is loaded with dynamic import:\n\n- `await import(resolvedPath)`\n- factory is `module.default ?? module`\n- factory must be a function (`ExtensionFactory`)\n\nIf export is not a function, that path fails with a structured error and loading continues.\n\n---\n\n## Failure handling and isolation\n\n### During loading\n\nPer extension path, failures are captured as `{ path, error }` and do not stop other paths from loading.\n\nCommon cases:\n\n- import failure / missing file\n- invalid factory export (non-function)\n- exception thrown while executing factory\n\n### Runtime isolation model\n\n- Extensions are **not sandboxed** (same process/runtime).\n- They share one `EventBus` and one `ExtensionRuntime` instance.\n- During load, runtime action methods intentionally throw `ExtensionRuntimeNotInitializedError`; action wiring happens later in `ExtensionRunner.initialize()`.\n\n### After loading\n\nWhen events run through `ExtensionRunner`, handler exceptions are caught and emitted as extension errors instead of crashing the runner loop.\n\n---\n\n## Minimal user/project layout examples\n\n### User-level\n\n```text\n~/.omp/agent/\n config.yml\n extensions/\n guardrails.ts\n audit/\n index.ts\n```\n\n### Project-level\n\n```text\n<repo>/\n .omp/\n settings.json\n extensions/\n checks/\n package.json\n lint-gates.ts\n```\n\n`checks/package.json`:\n\n```json\n{\n \"omp\": {\n \"extensions\": [\"./src/check-a.ts\", \"./src/check-b.js\"]\n }\n}\n```\n\nLegacy manifest key still accepted:\n\n```json\n{\n \"pi\": {\n \"extensions\": [\"./index.ts\"]\n }\n}\n```\n",
|
|
21
21
|
"extensions.md": "# Extensions\n\nPrimary guide for authoring runtime extensions in `packages/coding-agent`.\n\nThis document covers the current extension runtime in:\n\n- `src/extensibility/extensions/types.ts`\n- `src/extensibility/extensions/runner.ts`\n- `src/extensibility/extensions/wrapper.ts`\n- `src/extensibility/extensions/index.ts`\n- `src/modes/controllers/extension-ui-controller.ts`\n\nFor discovery paths and filesystem loading rules, see [`extension-loading.md`](./extension-loading.md).\n\n## What an extension is\n\nAn extension is a TS/JS module exporting a default factory:\n\n```ts\nimport type { ExtensionAPI } from \"@oh-my-pi/pi-coding-agent\";\n\nexport default function myExtension(pi: ExtensionAPI) {\n // register handlers/tools/commands/renderers\n}\n```\n\nExtensions can combine all of the following in one module:\n\n- event handlers (`pi.on(...)`)\n- LLM-callable tools (`pi.registerTool(...)`)\n- slash commands (`pi.registerCommand(...)`)\n- keyboard shortcuts and flags\n- custom message rendering\n- session/message injection APIs (`sendMessage`, `sendUserMessage`, `appendEntry`)\n\n## Runtime model\n\n1. Extensions are imported and their factory functions run.\n2. During that load phase, registration methods are valid; runtime action methods are not yet initialized.\n3. `ExtensionRunner.initialize(...)` wires live actions/contexts for the active mode.\n4. Session/agent/tool lifecycle events are emitted to handlers.\n5. Every tool execution is wrapped with extension interception (`tool_call` / `tool_result`).\n\n```text\nExtension lifecycle (simplified)\n\nload paths\n │\n ▼\nimport module + run factory (registration only)\n │\n ▼\nExtensionRunner.initialize(mode/session/tool registry)\n │\n ├─ emit session/agent events to handlers\n ├─ wrap tool execution (tool_call/tool_result)\n └─ expose runtime actions (sendMessage, setActiveTools, ...)\n```\n\nImportant constraint from `loader.ts`:\n\n- calling action methods like `pi.sendMessage()` during extension load throws `ExtensionRuntimeNotInitializedError`\n- register first; perform runtime behavior from events/commands/tools\n\n## Quick start\n\n```ts\nimport type { ExtensionAPI } from \"@oh-my-pi/pi-coding-agent\";\n\nexport default function (pi: ExtensionAPI) {\n const { z } = pi.zod;\n\n pi.setLabel(\"Safety + Utilities\");\n\n pi.on(\"session_start\", async (_event, ctx) => {\n ctx.ui.notify(`Extension loaded in ${ctx.cwd}`, \"info\");\n });\n\n pi.on(\"tool_call\", async (event) => {\n if (event.toolName === \"bash\" && event.input.command?.includes(\"rm -rf\")) {\n return { block: true, reason: \"Blocked by extension policy\" };\n }\n });\n\n pi.registerTool({\n name: \"hello_extension\",\n label: \"Hello Extension\",\n description: \"Return a greeting\",\n parameters: z.object({ name: z.string() }),\n async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {\n return {\n content: [{ type: \"text\", text: `Hello, ${params.name}` }],\n details: { greeted: params.name },\n };\n },\n });\n\n pi.registerCommand(\"hello-ext\", {\n description: \"Show queue state\",\n handler: async (_args, ctx) => {\n ctx.ui.notify(`pending=${ctx.hasPendingMessages()}`, \"info\");\n },\n });\n}\n```\n\n## Extension API surfaces\n\n## 1) Registration and actions (`ExtensionAPI`)\n\nCore methods:\n\n- `on(event, handler)`\n- `registerTool`, `registerCommand`, `registerShortcut`, `registerFlag`\n- `registerMessageRenderer`, `registerAssistantThinkingRenderer`\n- `setLabel`, `getFlag`\n- `sendMessage`, `sendUserMessage`, `appendEntry`, `exec`\n- `getActiveTools`, `getAllTools`, `setActiveTools`\n- `getCommands`\n- `getSessionName`, `setSessionName`\n- `setModel`, `getThinkingLevel`, `setThinkingLevel`\n- `registerProvider`\n- `events` (shared event bus)\n\nIn interactive mode, `input` handlers run before the built-in first-message auto-title check. Extensions that call `await pi.setSessionName(...)` from `input` can set the persisted session name and prevent the default auto-generated title from running for that session.\n\nAlso exposed:\n\n- `pi.logger`\n- `pi.typebox` (zod-backed compatibility shim for legacy TypeBox-style schemas)\n- `pi.zod` (injected `zod/v4` module — canonical for tool parameter schemas)\n- `pi.pi` (package exports)\n\n### Message delivery semantics\n\n`pi.sendMessage(message, options)` supports:\n\n- `deliverAs: \"steer\"` (default) — interrupts current run\n- `deliverAs: \"followUp\"` — queued to run after current run\n- `deliverAs: \"nextTurn\"` — stored and injected on the next user prompt\n- `triggerTurn: true` — starts a turn when idle (also honored with `deliverAs: \"nextTurn\"`: idle prompts immediately; while streaming the queued message schedules an internal continuation)\n\n`pi.sendUserMessage(content, { deliverAs })` always goes through prompt flow; while streaming it queues as steer/follow-up.\n\n## 2) Handler context (`ExtensionContext`)\n\nHandlers and tool `execute` receive `ctx` with:\n\n- `ui`\n- `hasUI`\n- `cwd`\n- `sessionManager` (read-only)\n- `modelRegistry`, `model`\n- `models` (read-only model query — see below)\n- `getContextUsage()`\n- `compact(...)`\n- `isIdle()`, `hasPendingMessages()`, `abort()`\n- `shutdown()`\n- `getSystemPrompt()`\n\n### Model selection (`ctx.models`)\n\n`ctx.models` is a read-only facade for picking and comparing models the same way core does:\n\n- `list()` — authenticated models available this session.\n- `current()` — the live session model (read lazily, so it reflects `/model` switches).\n- `resolve(spec)` — a model string (`provider/id`, bare id) or role alias (`pi/slow`, a configured role) → `Model`, honoring the same settings-backed aliases and match preferences as `--model`. Returns `undefined` when nothing matches.\n- `family(model)` — an opaque lineage token for \"same family?\" checks (Claude point releases share a token; Claude and GPT differ). Compare it; don't persist it (the vocabulary tracks new releases).\n\n```ts\n// Pick a model from a different family than the current one (e.g. a cross-family reviewer).\nconst current = ctx.models.current();\nconst contrasting = ctx.models\n .list()\n .find(m => current && ctx.models.family(m) !== ctx.models.family(current));\n```\n\n## 3) Command context (`ExtensionCommandContext`)\n\nCommand handlers additionally get:\n\n- `waitForIdle()`\n- `newSession(...)`\n- `switchSession(...)`\n- `branch(entryId)`\n- `navigateTree(targetId, { summarize })`\n- `reload()`\n\nUse command context for session-control flows; these methods are intentionally separated from general event handlers.\n\n## Event surface (current names and behavior)\n\nCanonical event unions and payload types are in `types.ts`.\n\n### Session lifecycle\n\n- `session_start`\n- `session_before_switch` / `session_switch`\n- `session_before_branch` / `session_branch`\n- `session_before_compact` / `session.compacting` / `session_compact`\n- `session_before_tree` / `session_tree`\n- `session_shutdown`\n\nCancelable pre-events:\n\n- `session_before_switch` → `{ cancel?: boolean }`\n- `session_before_branch` → `{ cancel?: boolean; skipConversationRestore?: boolean }`\n- `session_before_compact` → `{ cancel?: boolean; compaction?: CompactionResult }`\n- `session_before_tree` → `{ cancel?: boolean; summary?: { summary: string; details?: unknown } }`\n\n### Prompt and turn lifecycle\n\n- `input`\n- `before_agent_start`\n- `before_provider_request` (may replace provider request payload)\n- `after_provider_response`\n- `context`\n- `agent_start` / `agent_end`\n- `turn_start` / `turn_end`\n- `message_start` / `message_update` / `message_end`\n\n### Tool lifecycle\n\n- `tool_call` (pre-exec, may block)\n- `tool_result` (post-exec, may patch content/details/isError)\n- `tool_execution_start` / `tool_execution_update` / `tool_execution_end` (observability)\n\n`tool_result` is middleware-style: handlers run in extension order and each sees prior modifications.\n\n### Reliability/runtime signals\n\n- `auto_compaction_start` / `auto_compaction_end`\n- `auto_retry_start` / `auto_retry_end`\n- `ttsr_triggered`\n- `todo_reminder`\n- `goal_updated`\n- `credential_disabled`\n\n### User command interception\n\n- `user_bash` (override with `{ result }`)\n- `user_python` (override with `{ result }`)\n\n### `resources_discover`\n\n`resources_discover` exists in extension types and `ExtensionRunner`.\nCurrent runtime note: `ExtensionRunner.emitResourcesDiscover(...)` is implemented, but there are no `AgentSession` callsites invoking it in the current codebase.\n\n## Tool authoring details\n\n`registerTool` uses `ToolDefinition` from `types.ts`.\n\nCurrent `execute` signature:\n\n```ts\nexecute(\n\ttoolCallId,\n\tparams,\n\tsignal,\n\tonUpdate,\n\tctx,\n): Promise<AgentToolResult>\n```\n\nTemplate:\n\n```ts\nconst { z } = pi.zod;\n\npi.registerTool({\n name: \"my_tool\",\n label: \"My Tool\",\n description: \"...\",\n parameters: z.object({}),\n hidden: false,\n defaultInactive: false,\n deferrable: false,\n async execute(_id, _params, signal, onUpdate, ctx) {\n if (signal?.aborted) {\n return { content: [{ type: \"text\", text: \"Cancelled\" }] };\n }\n onUpdate?.({ content: [{ type: \"text\", text: \"Working...\" }] });\n return { content: [{ type: \"text\", text: \"Done\" }], details: {} };\n },\n onSession(event, ctx) {\n // reason: start|switch|branch|tree|shutdown\n },\n renderCall(args, options, theme) {\n // optional TUI render\n },\n renderResult(result, options, theme, args) {\n // optional TUI render\n },\n});\n```\n\n`tool_call`/`tool_result` intercept all tools once the registry is wrapped in `sdk.ts`, including built-ins and extension/custom tools. `ToolDefinition` also supports optional `hidden`, `defaultInactive`, `deferrable`, `approval`, `mcpServerName`, `mcpToolName`, `renderCall`, and `renderResult` fields.\n\n## UI integration points\n\n`ctx.ui` implements the `ExtensionUIContext` interface. Support differs by mode.\n\n### Interactive mode (`extension-ui-controller.ts`)\n\nSupported:\n\n- dialogs: `select`, `confirm`, `input`, `editor`\n- input editing: `setEditorText`, `getEditorText`, `pasteToEditor`, `editor`\n- terminal title and working message (`setTitle`, `setWorkingMessage`)\n- notifications/status/editor text/terminal input/custom overlays\n- theme listing/loading by name (`setTheme` supports string names)\n- tools expanded toggle\n\nCurrent no-op methods in this controller:\n\n- `setFooter`\n- `setHeader`\n\n`setEditorComponent` is wired to the live editor (`ctx.setEditorComponent(factory)`). `setWidget` renders real widget components above or below the editor via `setHookWidget(...)` (`placement: \"aboveEditor\" | \"belowEditor\"`; string-array content capped at 10 lines).\n\n### RPC mode (`rpc-mode.ts`)\n\n`ctx.ui` is backed by RPC `extension_ui_request` events:\n\n- dialog methods (`select`, `confirm`, `input`, `editor`) round-trip to client responses\n- fire-and-forget methods emit requests (`notify`, `setStatus`, `setWidget` for string arrays, `setEditorText`; `setTitle` emits only when `PI_RPC_EMIT_TITLE=1`)\n\nUnsupported/no-op in RPC implementation:\n\n- `onTerminalInput`\n- `custom`\n- `setFooter`, `setHeader`, `setEditorComponent`\n- `setWorkingMessage`\n- theme switching/loading (`setTheme` returns failure)\n- tool expansion controls are inert\n\n### Print/headless/subagent paths\n\nWhen no UI context is supplied to runner init, `ctx.hasUI` is `false` and methods are no-op/default-returning.\n\n### ACP mode\n\nACP installs an elicitation-bridged UI context (`createAcpExtensionUiContext` in `acp-agent.ts`). `ctx.hasUI` is `true` while only `select`/`confirm`/`input` round-trip (as ACP elicitations; defaults are returned when the client lacks the `elicitation.form` capability). The non-elicitation surface (widgets, editor, theming, terminal input) is stubbed no-op.\n\n## Session and state patterns\n\nFor durable extension state:\n\n1. Persist with `pi.appendEntry(customType, data)`.\n2. Rebuild state from `ctx.sessionManager.getBranch()` on `session_start`, `session_branch`, `session_tree`.\n3. Keep tool result `details` structured when state should be visible/reconstructible from tool result history.\n\nExample reconstruction pattern:\n\n```ts\npi.on(\"session_start\", async (_event, ctx) => {\n let latest;\n for (const entry of ctx.sessionManager.getBranch()) {\n if (entry.type === \"custom\" && entry.customType === \"my-state\") {\n latest = entry.data;\n }\n }\n // restore from latest\n});\n```\n\n## Rendering extension points\n\n## Custom message renderer\n\n```ts\npi.registerMessageRenderer(\"my-type\", (message, { expanded }, theme) => {\n // return pi-tui Component\n});\n```\n\nUsed by interactive rendering when custom messages are displayed.\n\n## Assistant thinking renderer\n\n```ts\nimport { Container, Text } from \"@oh-my-pi/pi-tui\";\n\npi.registerAssistantThinkingRenderer((context, theme) => {\n const container = new Container();\n container.addChild(new Text(theme.fg(\"dim\", `thinking chars: ${context.text.length}`), 1, 0));\n return container;\n});\n```\n\nUsed by interactive rendering to add display-only supplemental UI below each visible assistant thinking block. The renderer receives the already-visible thinking text, content/thinking indexes, theme, and a `requestRender()` callback for async renderers. All registered renderers that return a component are appended in registration order. Renderers must not mutate messages; the original thinking block remains the provider/session source of truth.\n\n## Tool call/result renderer\n\nProvide `renderCall` / `renderResult` on `registerTool` definitions for custom tool visualization in TUI.\n\n## Constraints and pitfalls\n\n- Runtime actions are unavailable during extension load.\n- `tool_call` errors block execution (fail-closed).\n- Command name conflicts with built-ins are skipped with diagnostics.\n- Reserved shortcuts are ignored (`ctrl+c`, `ctrl+d`, `ctrl+z`, `ctrl+k`, `ctrl+p`, `ctrl+l`, `ctrl+o`, `ctrl+t`, `ctrl+g`, `ctrl+q`, `alt+m`, `shift+tab`, `shift+ctrl+p`, `alt+enter`, `escape`, `enter`).\n- Treat `ctx.reload()` as terminal for the current command handler frame.\n\n## Extensions vs hooks vs custom-tools\n\nUse the right surface:\n\n- **Extensions** (`src/extensibility/extensions/*`): unified system (events + tools + commands + renderers + provider registration).\n- **Hooks** (`src/extensibility/hooks/*`): separate legacy event API.\n- **Custom-tools** (`src/extensibility/custom-tools/*`): tool-focused modules; when loaded alongside extensions they are adapted and still pass through extension interception wrappers.\n\nIf you need one package that owns policy, tools, command UX, and rendering together, use extensions.\n",
|
|
22
22
|
"fs-scan-cache-architecture.md": "# Filesystem Scan Cache Architecture Contract\n\nThis document defines the current contract for the shared filesystem scan cache implemented in Rust (`crates/pi-natives/src/fs_cache.rs`) and consumed by native discovery/search APIs exposed to `packages/coding-agent`.\n\n## What this cache is\n\nThe cache stores full directory-scan entry lists (`GlobMatch[]`) keyed by scan scope, traversal policy, and requested metadata detail. Higher-level operations (`glob` filtering, `fuzzyFind` scoring, and cached `grep` candidate selection) run against those cached entries.\n\nPrimary goals:\n\n- avoid repeated filesystem walks for repeated discovery/search calls\n- keep consistency across native discovery/search flows when they share the same scan policy\n- allow explicit staleness recovery for empty results and explicit invalidation after file mutations\n\n## Ownership and public surface\n\n- Cache implementation and policy: `crates/pi-natives/src/fs_cache.rs`\n- Native consumers:\n - `crates/pi-natives/src/glob.rs`\n - `crates/pi-natives/src/fd.rs` (`fuzzyFind`)\n - `crates/pi-natives/src/grep.rs` (cached directory mode only)\n- JS binding/export:\n - `packages/natives/native/index.d.ts` (`invalidateFsScanCache`)\n - `packages/natives/native/index.js`\n- Coding-agent mutation invalidation helpers:\n - `packages/coding-agent/src/tools/fs-cache-invalidation.ts`\n\n## Cache key partitioning (hard contract)\n\nEach entry is keyed by:\n\n- canonicalized `root` directory path\n- `include_hidden` boolean\n- `use_gitignore` boolean\n- `skip_node_modules` boolean\n- `detail` (`ScanDetail::Minimal` or `ScanDetail::Full`)\n\nImplications:\n\n- Hidden and non-hidden scans do **not** share entries.\n- Gitignore-respecting and ignore-disabled scans do **not** share entries.\n- Scans that prune `node_modules` do **not** share entries with scans that include it.\n- Minimal scans (path + file type only) do **not** share entries with full scans (mtime + regular-file size metadata).\n- `follow_links` is part of `ScanOptions` used to build the walker, but is not currently part of `CacheKey`; calls that differ only by `follow_links` can share a cache entry.\n\nConsumers must pass stable semantics for hidden/gitignore/node_modules/detail behavior; changing any keyed flag creates a different cache partition.\n\n## Scan collection behavior\n\nCache population uses `ignore::WalkBuilder` configured by `include_hidden`, `use_gitignore`, `skip_node_modules`, and `follow_links`:\n\n- sorted by file path\n- `.git` is always pruned\n- `node_modules` is pruned at traversal time when `skip_node_modules=true`\n- cancellation is checked before the walk and every 128 visited entries per parallel visitor\n- `ScanDetail::Minimal` records normalized relative path and file type only\n- `ScanDetail::Full` also records mtime and regular-file size\n\nSearch roots for cache scans are resolved by `fs_cache::resolve_search_path`:\n\n- relative paths are resolved against current cwd\n- target must be an existing directory\n- root is canonicalized when possible\n\n## Freshness and eviction policy\n\nGlobal policy (environment-overridable):\n\n- `FS_SCAN_CACHE_TTL_MS` (default `1000`)\n- `FS_SCAN_EMPTY_RECHECK_MS` (default `200`)\n- `FS_SCAN_CACHE_MAX_ENTRIES` (default `16`)\n\nBehavior:\n\n- `get_or_scan(...)`\n - if TTL is `0`: bypass cache entirely, always fresh scan (`cache_age_ms = 0`)\n - on cache hit within TTL: return cloned cached entries + non-zero `cache_age_ms`\n - on expired hit: evict key, rescan, store fresh entry\n- `force_rescan(..., store=false)`: remove any matching key, scan fresh, and do not repopulate cache\n- `force_rescan(..., store=true)`: remove any matching key, scan fresh, then store the new entry\n- max entry enforcement is oldest-first eviction by `created_at` after insert\n\n## Empty-result fast recheck (separate from normal hits)\n\nNormal cache hit:\n\n- a cache hit inside TTL returns cached entries and does nothing else.\n\nEmpty-result fast recheck:\n\n- this is a **caller-side** policy using `ScanResult.cache_age_ms`\n- if filtered/query result is empty and cached scan age is at least `empty_recheck_ms()`, caller performs one `force_rescan(..., store=true)` and retries\n- intended to reduce stale-negative results when files were added while the cache is still inside TTL\n\nCurrent consumers:\n\n- `glob`: rechecks when filtered matches are empty and scan age exceeds threshold\n- `fuzzyFind` (`fd.rs`): rechecks only when query is non-empty and scored matches are empty\n- `grep`: rechecks when cached directory candidate file list is empty\n\n## Consumer defaults and cache usage\n\nCache is opt-in on exposed scan/search APIs (`cache?: boolean`, default `false`).\n\nCurrent defaults in native APIs:\n\n- `glob`: `hidden=false`, `gitignore=true`, `cache=false`; `node_modules` is included only when `includeNodeModules=true` or the pattern mentions `node_modules`; full detail is used only when `sortByMtime=true`\n- `fuzzyFind`: `hidden=false`, `gitignore=true`, `cache=false`, `node_modules` is skipped, `follow_links=true`, minimal detail\n- `grep`: `hidden=true`, `gitignore=true`, `cache=false`; cached directory mode skips `node_modules` unless the glob mentions `node_modules`; minimal detail\n\nCurrent callers:\n\n- `@`-mention fuzzy file autocomplete enables cache (`fuzzyFind` with `cache: true`):\n - `packages/tui/src/autocomplete.ts`\n- Mutation flows invalidate through `packages/coding-agent/src/tools/fs-cache-invalidation.ts`.\n- Tool-level search integration (`packages/coding-agent/src/tools/search.ts`) currently calls native `grep` with `cache: false`.\n\n## Invalidation contract\n\nNative invalidation entrypoint:\n\n- `invalidateFsScanCache(path?: string)`\n - with `path`: remove cache entries whose root is a prefix of the target path\n - without path: clear all scan cache entries\n\nPath handling details:\n\n- relative invalidation paths are resolved against cwd\n- invalidation attempts canonicalization\n- if target does not exist (for example after delete), fallback canonicalizes the parent and reattaches the filename when possible\n- this preserves invalidation behavior for create/delete/rename where one side may not exist\n\n## Coding-agent mutation flow responsibilities\n\nCoding-agent code must invalidate after successful filesystem mutations.\n\nCentral helpers:\n\n- `invalidateFsScanAfterWrite(path)`\n- `invalidateFsScanAfterDelete(path)`\n- `invalidateFsScanAfterRename(oldPath, newPath)` (invalidates both sides when paths differ)\n\nCurrent mutation callsites include:\n\n- `packages/coding-agent/src/tools/write.ts`\n- `packages/coding-agent/src/edit/hashline/filesystem.ts`\n- `packages/coding-agent/src/edit/modes/patch.ts`\n- `packages/coding-agent/src/edit/modes/replace.ts`\n\nRule: if a flow mutates filesystem content or location and bypasses these helpers, cache staleness bugs are expected.\n\n## Adding a new cache consumer safely\n\nWhen introducing cache use in a new scanner/search path:\n\n1. **Use stable scan policy inputs**\n - decide hidden/gitignore/node_modules/detail semantics first\n - pass them consistently to `get_or_scan`/`force_rescan` so cache partitions are intentional\n\n2. **Treat cache data as pre-filtered only by traversal policy**\n - apply tool-specific filtering (glob patterns, type filters, scoring) after retrieval\n - never assume cached entries already reflect your higher-level filters\n\n3. **Implement empty-result fast recheck only for stale-negative risk**\n - use `scan.cache_age_ms >= empty_recheck_ms()`\n - retry once with `force_rescan(..., store=true, ...)`\n - keep this path separate from normal cache-hit logic\n\n4. **Respect no-cache mode explicitly**\n - when caller disables cache, call `force_rescan(..., store=false, ...)` or use an uncached streaming walker\n - do not populate shared cache in a no-cache request path\n\n5. **Wire mutation invalidation for any new write path**\n - after successful write/edit/delete/rename, call the coding-agent invalidation helper\n - for rename/move, invalidate both old and new paths\n\n6. **Do not add per-call TTL knobs**\n - current contract is global policy only (env-configured), no per-request TTL override\n\n## Known boundaries\n\n- Cache scope is process-local in-memory (`DashMap`), not persisted across process restarts.\n- Cache stores scan entries, not final tool results.\n- `glob`/`fuzzyFind`/cached `grep` share scan entries only when key dimensions (`root`, `hidden`, `gitignore`, `skip_node_modules`, `detail`) match.\n- `.git` is always excluded at scan collection time regardless of caller options.\n",
|
|
@@ -28,7 +28,7 @@ export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
|
|
|
28
28
|
"local-models.md": "# Embedded Local Tiny-Model Experiments\n\nThis document summarizes the experiments behind the optional **local** tiny-model paths for\nsession-title generation (`providers.tinyModel`), Mnemopi memory extraction/consolidation\n(`providers.memoryModel`), and the `auto` thinking-level difficulty classifier\n(`providers.autoThinkingModel`, which reuses the memory-model registry). It is a factual engineering\nrecord for maintainers: what we measured, which recipes won, and which models we shipped. All three\nsettings default to `online`, so existing users incur no downloads or on-device inference cost unless\nthey opt in.\n\n## Runtime / environment findings\n\n- **Stack**: `@huggingface/transformers` (transformers.js) v4 running under Bun. In Bun the library\n loads the **native `onnxruntime-node` backend** (not the WASM build).\n- **Device policy**: local tiny models default to CPU-only inference and retry once on CPU if an\n explicit accelerated provider cannot initialize.\n - Pick a provider persistently with the `providers.tinyModelDevice` setting (`default` keeps CPU),\n or per-run with the `PI_TINY_DEVICE` env var (which overrides the setting).\n - Accepted values are `cpu`, `gpu`, `metal`/`webgpu`, `auto`, `cuda`, `dml`, `coreml`, `wasm`,\n `webnn`, `webnn-gpu`, `webnn-cpu`, and `webnn-npu`.\n - Direct `coreml` remains opt-in via `PI_TINY_DEVICE=coreml`; it is not part of the default because\n cached decoder-LLM ONNX loads can fail during session initialization.\n - WebGPU/Metal works for the single-process eval harness, but the production worker forces\n Darwin `gpu`/`webgpu`/`auto` requests back to CPU because ONNX Runtime/Bun currently\n hard-crashes on worker teardown after WebGPU inference.\n - Use `providers.tinyModelDevice` or `PI_TINY_DEVICE` only when explicitly opting out of the CPU\n default.\n- **Quantization: q4 is the sweet spot** — smaller on disk, faster to load, and fast at inference.\n q8/int8 loads slower _and_ infers slower on CPU. Every shipped model defaults to `q4`; override the\n precision persistently with the `providers.tinyModelDtype` setting (`default` keeps `q4`, e.g. `fp16`\n for higher fidelity), or per-run with `PI_TINY_DTYPE` (which overrides the setting). Accepts `auto`,\n `fp32`, `fp16`, `q8`, `int8`, `uint8`, `q4`, `bnb4`, `q4f16`, `q2`, `q2f16`, `q1`, `q1f16`; an\n unrecognized value fails loudly at worker startup.\n- **Load-time correction (important).** An earlier belief that \"q4 >=1B models take minutes to load\"\n was a **measurement artifact** caused by running ~5 multi-GB HuggingFace downloads in parallel\n (I/O saturation). Clean, isolated **warm** loads are all sub-3s:\n - TinyLlama-1.1B q4: ~0.5s\n - Llama-3.2-1B q4: ~2.8s (`graphOpt=all`) / ~0.5s (`disabled`)\n - LFM2-1.2B q4: ~0.36s\n - Qwen2.5-1.5B q4: ~1.5s\n - Qwen3-1.7B q4: ~1.6s\n - gemma-3-1b q4: ~1.1s\n - Conclusion: **1B–1.7B models are viable on CPU.**\n- **`session_options.graphOptimizationLevel`** trades load vs inference speed: `disabled` = fastest\n load, slightly slower inference; `all` = default.\n- **First run** downloads weights from the HF Hub to a cache dir (q4 weights ~200MB–1.1GB depending\n on model); subsequent **warm** loads are sub-second to ~3s. Inference is async and\n background-friendly for memory tasks; titles are semi-interactive.\n\n## Task 1: Session title generation (`providers.tinyModel`)\n\n**Task**: turn the first user message into a 3–6 word title. Tiny models (sub-1B) suffice.\n\n**Winning recipe**:\n\n- Plain system prompt (no few-shot).\n- **Prefill** the assistant turn with `<title>` and **stop at `</title>`**, then take the first line.\n- Greedy decoding (`do_sample:false`), `enable_thinking:false` in the chat template.\n\n**What we learned**:\n\n- **Few-shot examples HURT sub-0.6B models** for titles; the tag-prefill rescues even 270M models.\n- **Token biasing (`bad_words_ids`) is a confirmed no-op** here — the prefill already controls the\n opener.\n\n**Leaderboard** (tag trick, CPU, warm):\n\n| Model | Verdict |\n| ------------- | ----------------------------------- |\n| LFM2-350M | Best speed/quality balance (~212MB) |\n| Qwen3-0.6B | Most robust |\n| gemma-3-270m | Smallest viable |\n| Qwen2.5-0.5B | Acceptable |\n| SmolLM2-135M | Too small |\n| flan-t5-small | Rejected — just echoes the input |\n\n**Shipped local options**: `lfm2-350m`, `qwen3-0.6b`, `gemma-270m`, `qwen2.5-0.5b`, `lfm2-700m`.\n**Default**: `online` (pi/smol).\n\n## Task 2: Mnemopi memory (`providers.memoryModel`)\n\nMnemopi runs two small-LLM tasks:\n\n1. **Extraction** — pull durable, structured items from a single message.\n2. **Consolidation** — summarize a list of memories into 1–3 faithful sentences.\n\nThese need **bigger models than titles: 1B–1.7B**. We tested LFM2-1.2B, Qwen2.5-1.5B, Qwen3-1.7B,\nand gemma-3-1b (q4, CPU) via four parallel agents each running 27–31 experiments.\n\n### Extraction findings\n\nThe stock 5-category JSON prompt fails on small models in two ways:\n\n1. The all-empty example `{\"facts\":[],...}` gets **copied verbatim** → 0 facts extracted.\n2. Capable models emit **JSON objects inside arrays**, which Mnemopi's `String(item)` coerces into\n the literal string `[object Object]`.\n\nThe robust fix is a **one-item-per-line output format** (consumed by Mnemopi's parser line-fallback)\nor a **flat JSON array of strings**. Every model also over-extracts pure small talk; an explicit\nchit-chat → NONE example is the best mitigation.\n\n### Technique polarity flips vs titles\n\n- At 1B+, **few-shot is the dominant quality lever**: e.g. Qwen2.5-1.5B extraction F1 0.52 → 0.83\n going 1 → 3 shots; gemma recall 0.65 → 0.92 with 2 shots.\n- **Prefill HURTS extraction** — it forces output on small talk, producing false positives.\n- **System-split** (instructions in the system role) helps models that have a system role.\n- **Greedy >= temperature** for both tasks.\n- **Token biasing** is again a no-op.\n\n### Per-model verdicts (head-to-head, 16-fixture set)\n\n- **Qwen3-1.7B** — most disciplined extraction: returns empty on small talk, no buried-fact leak,\n preserves language, clean flat JSON. Weaknesses: coarse granularity, missed a multi-turn value\n update.\n- **Qwen2.5-1.5B** — best extraction granularity (atomic facts), caught the value update, zero\n small-talk leakage. Weaknesses: weakest consolidation (run-on, no dedup) and one degenerate\n buried-fact output.\n- **gemma-3-1b** — best consolidation (dedup works, faithful, clean single-memory). Weaknesses: leaks\n small talk and translated German.\n- **LFM2-1.2B** — solid and fastest to load. Weaknesses: `Label: value` noise, small-talk + buried\n leaks, a fluffy single-memory summary.\n\n### Recommendation\n\nExtraction favors **precision** (do not pollute long-term memory) → **Qwen3-1.7B is the best single\npick** (its consolidation is good enough). If running a second model for consolidation, **gemma-3-1b**\nwins that task.\n\n**Shipped local options**: `qwen3-1.7b` (recommended), `gemma-3-1b`, `qwen2.5-1.5b`, `lfm2-1.2b`.\n**Default**: `online` (the configured smol model).\n\n### Known Mnemopi parser bugs (surfaced by these experiments)\n\n- `String(item)` produces `[object Object]` on object array items.\n- The line-fallback drops items `<=10` chars, so a correct short fact like `Name: Can` is discarded.\n\n\n## Integration notes\n\n- `providers.tinyModel`, `providers.memoryModel`, and `providers.autoThinkingModel` default to\n `online`, so existing users get **no downloads or on-device inference cost** unless they opt in.\n- Local inference runs **in a worker** (off the main thread); models are cached on disk and\n downloaded on first use.\n- The memory local path applies the refined recipes (line-format + small-talk-guarded extraction\n prompt, hardened consolidation prompt) via Mnemopi prompt overrides; the **online path is\n unchanged**.\n- `providers.autoThinkingModel` uses the same shipped local options as `providers.memoryModel`.\n",
|
|
29
29
|
"lsp-config.md": "# LSP configuration in OMP\n\nThis guide explains how to configure language servers for the OMP coding agent.\n\nSource of truth in code:\n\n- Server config type: `packages/coding-agent/src/lsp/types.ts` (`ServerConfig`)\n- Config loader: `packages/coding-agent/src/lsp/config.ts`\n- Built-in server definitions: `packages/coding-agent/src/lsp/defaults.json`\n\n## Auto-detection\n\nWhen no LSP config file is present, OMP auto-detects servers by intersecting two conditions:\n\n1. The project directory contains at least one of the server's `rootMarkers`.\n2. The server binary is available — checked in project-local bin directories first (e.g., `node_modules/.bin/`, `.venv/bin/`), then `$PATH`.\n\nNo configuration is required for common setups. The built-in server list covers most popular languages; see [`defaults.json`](../packages/coding-agent/src/lsp/defaults.json) for the full set.\n\n## Config file locations\n\nOMP merges LSP config from multiple files, lowest to highest priority:\n\n| Priority | Location |\n| ----------- | --------------------------------------------------------------------------------------------------------------------------- |\n| 5 (lowest) | `~/lsp.json`, `~/.lsp.json`, `~/lsp.yaml`, `~/.lsp.yaml`, `~/lsp.yml`, `~/.lsp.yml` |\n| 4 | Plugin LSP configs (marketplace / `--plugin-dir` roots) |\n| 3 | User config dirs: `~/.omp/agent/lsp.*`, `~/.claude/lsp.*`, `~/.codex/lsp.*`, `~/.gemini/lsp.*` |\n| 2 | Project config dirs: `<project>/.omp/lsp.*`, `<project>/.claude/lsp.*`, `<project>/.codex/lsp.*`, `<project>/.gemini/lsp.*` |\n| 1 (highest) | Project root: `<project>/lsp.*` and `<project>/.lsp.*` |\n\nEach location accepts `.json`, `.yaml`, and `.yml` variants, including hidden-file versions (`.lsp.json`, `.lsp.yaml`, `.lsp.yml`). Files are merged in order: higher-priority files override lower-priority fields for the same server. Servers not mentioned in any override file remain at their built-in defaults.\n\n**Recommended locations:**\n\n- User-wide preferences → `~/.omp/agent/lsp.json`\n- Project-specific overrides → `<project>/.omp/lsp.json`\n\n> **Note:** Auto-detection is skipped only when at least one config file contributes server overrides. A config file that only sets `idleTimeoutMs` still lets OMP auto-detect built-in servers. When server overrides exist, OMP merges them with defaults and then loads servers that have matching `rootMarkers`, an available binary, and are not explicitly `disabled`.\n\n## File shape\n\nBoth JSON and YAML are accepted. The top-level object can use either a `servers` wrapper key or a flat map directly:\n\n```json\n{\n \"servers\": {\n \"server-name\": { ... }\n },\n \"idleTimeoutMs\": 300000\n}\n```\n\nor (flat, without the `servers` wrapper):\n\n```json\n{\n \"server-name\": { ... },\n \"idleTimeoutMs\": 300000\n}\n```\n\nTop-level keys:\n\n- `servers` — map of server name to `ServerConfig` (optional wrapper; flat form is equivalent)\n- `idleTimeoutMs` — shut down idle language servers after this many milliseconds; disabled by default\n\n## ServerConfig fields\n\n| Field | Type | Required | Description |\n| ----------------- | ---------- | -------- | ---------------------------------------------------------------------------------------------------------------- |\n| `command` | `string` | yes | Binary name (resolved via PATH/local bins) or absolute path |\n| `args` | `string[]` | no | Arguments passed to the binary |\n| `fileTypes` | `string[]` | yes | File extensions this server handles, e.g. `[\".ts\", \".tsx\"]` |\n| `rootMarkers` | `string[]` | yes | Files/dirs that indicate a project root; glob patterns (e.g. `*.cabal`) are supported |\n| `initOptions` | `object` | no | Sent as `initializationOptions` during LSP handshake |\n| `settings` | `object` | no | Workspace settings pushed via `workspace/didChangeConfiguration` |\n| `disabled` | `boolean` | no | Set to `true` to disable this server entirely |\n| `warmupTimeoutMs` | `number` | no | Startup timeout in ms for this server (overrides the global default) |\n| `isLinter` | `boolean` | no | Mark server as linter/formatter only; excluded from type-intelligence operations (hover, go-to-definition, etc.) |\n| `capabilities` | `object` | no | Opt-in server-specific features; see [Capabilities](#capabilities) |\n\n`resolvedCommand` is populated automatically at runtime — do not set it manually.\n\n### Capabilities\n\nThe `capabilities` object enables optional server-specific features that OMP supports on a per-server basis:\n\n```json\n{\n \"capabilities\": {\n \"flycheck\": true,\n \"ssr\": true,\n \"expandMacro\": true,\n \"runnables\": true,\n \"relatedTests\": true\n }\n}\n```\n\nAll fields are boolean and optional. They are currently used by `rust-analyzer`.\n\n## Common recipes\n\n### Override a built-in server's settings\n\nPartial overrides are merged onto the built-in defaults. You only need to specify the fields you want to change.\n\n```json\n{\n \"servers\": {\n \"typescript-language-server\": {\n \"args\": [\"--stdio\", \"--log-level\", \"4\"]\n }\n }\n}\n```\n\n```yaml\nservers:\n gopls:\n settings:\n gopls:\n gofumpt: false\n staticcheck: false\n```\n\n### Disable a built-in server\n\n```json\n{\n \"servers\": {\n \"eslint\": {\n \"disabled\": true\n }\n }\n}\n```\n\n### Register a custom server\n\nNew servers require `command`, `fileTypes`, and `rootMarkers`. All other fields are optional.\n\n```json\n{\n \"servers\": {\n \"my-lsp\": {\n \"command\": \"my-lsp-server\",\n \"args\": [\"--stdio\"],\n \"fileTypes\": [\".xyz\"],\n \"rootMarkers\": [\".xyz-project\", \".git\"]\n }\n }\n}\n```\n\n### Set a global idle timeout\n\nShut down language servers that have been inactive for more than five minutes:\n\n```json\n{\n \"idleTimeoutMs\": 300000\n}\n```\n\n### Disable a server for one project, keep it globally\n\nPlace the override in `<project>/.omp/lsp.json`:\n\n```json\n{\n \"servers\": {\n \"pylsp\": {\n \"disabled\": true\n }\n }\n}\n```\n\nThe user-level config in `~/.omp/agent/lsp.json` is unaffected; pylsp is only suppressed in this project.\n\n## Built-in server list\n\nThe following servers ship in `defaults.json` and are eligible for auto-detection:\n\n| Server key | Language(s) | Binary |\n| ----------------------------- | ----------------------------- | --------------------------------- |\n| `rust-analyzer` | Rust | `rust-analyzer` |\n| `clangd` | C, C++, ObjC | `clangd` |\n| `zls` | Zig | `zls` |\n| `gopls` | Go | `gopls` |\n| `typescript-language-server` | TypeScript, JavaScript | `typescript-language-server` |\n| `denols` | TypeScript, JavaScript (Deno) | `deno` |\n| `biome` | TS/JS/JSON (linter) | `biome` |\n| `eslint` | TS/JS/Vue/Svelte (linter) | `vscode-eslint-language-server` |\n| `vscode-html-language-server` | HTML | `vscode-html-language-server` |\n| `vscode-css-language-server` | CSS, SCSS, Less | `vscode-css-language-server` |\n| `vscode-json-language-server` | JSON | `vscode-json-language-server` |\n| `tailwindcss` | HTML, CSS, TS/JS | `tailwindcss-language-server` |\n| `svelte` | Svelte | `svelteserver` |\n| `vue-language-server` | Vue | `vue-language-server` |\n| `astro` | Astro | `astro-ls` |\n| `pyright` | Python | `pyright-langserver` |\n| `basedpyright` | Python | `basedpyright-langserver` |\n| `pylsp` | Python | `pylsp` |\n| `ruff` | Python (linter) | `ruff` |\n| `jdtls` | Java | `jdtls` |\n| `kotlin-lsp` | Kotlin | `kotlin-lsp` |\n| `metals` | Scala | `metals` |\n| `hls` | Haskell | `haskell-language-server-wrapper` |\n| `ocamllsp` | OCaml | `ocamllsp` |\n| `elixirls` | Elixir | `elixir-ls` |\n| `expert` | Elixir | `expert` |\n| `erlangls` | Erlang | `erlang_ls` |\n| `gleam` | Gleam | `gleam` |\n| `solargraph` | Ruby | `solargraph` |\n| `ruby-lsp` | Ruby | `ruby-lsp` |\n| `rubocop` | Ruby (linter) | `rubocop` |\n| `bashls` | Bash, Zsh | `bash-language-server` |\n| `lua-language-server` | Lua | `lua-language-server` |\n| `intelephense` | PHP | `intelephense` |\n| `phpactor` | PHP | `phpactor` |\n| `omnisharp` | C# | `omnisharp` |\n| `yamlls` | YAML | `yaml-language-server` |\n| `terraformls` | Terraform | `terraform-ls` |\n| `dockerls` | Dockerfile | `docker-langserver` |\n| `helm-ls` | Helm | `helm_ls` |\n| `nixd` | Nix | `nixd` |\n| `nil` | Nix | `nil` |\n| `ols` | Odin | `ols` |\n| `dartls` | Dart | `dart` |\n| `marksman` | Markdown | `marksman` |\n| `texlab` | LaTeX | `texlab` |\n| `graphql` | GraphQL | `graphql-lsp` |\n| `prismals` | Prisma | `prisma-language-server` |\n| `vimls` | Vim script | `vim-language-server` |\n| `emmet-language-server` | HTML, CSS, JSX | `emmet-language-server` |\n| `sourcekit-lsp` | Swift | `sourcekit-lsp` |\n| `swiftlint` | Swift (linter) | `swiftlint` |\n| `tlaplus` | TLA+ | `tlapm_lsp` |\n",
|
|
30
30
|
"macos-signing-notarization.md": "# macOS signing & notarization\n\nThe compiled macOS `omp` binaries shipped on GitHub Releases are signed with a\n**Developer ID Application** certificate and **notarized** by Apple. This makes\nthem Gatekeeper-acceptable and is the prerequisite for an official Homebrew\nsubmission (see [#776](https://github.com/can1357/oh-my-pi/issues/776)).\n\nSigning happens in CI, in the `release_binary` job's darwin matrix legs\n(`.github/workflows/ci.yml`), via `scripts/ci-macos-sign.sh`. It **auto-skips**\nuntil the `APPLE_*` repository secrets below are configured, so releases keep\nworking (ad-hoc signed, as before) in the meantime.\n\n## How it works\n\n1. `ci:release:build-binaries` builds and **ad-hoc** signs the binary (so it can\n run on the build runner).\n2. `scripts/ci-macos-sign.sh` then:\n - imports the Developer ID cert into a throwaway keychain;\n - re-signs with `--options runtime --timestamp` (hardened runtime + secure\n timestamp) and `--entitlements scripts/macos-entitlements.plist`;\n - runs `--version` and `--smoke-test` under the new signature to fail fast;\n - notarizes the binary via `notarytool submit --wait`.\n3. `release_github_verify` re-downloads the published arm64 asset and asserts it\n is **not** ad-hoc, passes `codesign --verify --strict`, and boots cleanly.\n\n### Why the entitlements are mandatory\n\nThe binary is a Bun single-file executable, so the hardened runtime needs:\n\n| Entitlement | Reason |\n| --- | --- |\n| `com.apple.security.cs.allow-jit` | JavaScriptCore JITs at runtime. |\n| `com.apple.security.cs.allow-unsigned-executable-memory` | JSC executable memory pages. |\n| `com.apple.security.cs.disable-library-validation` | omp extracts its native addon (`pi_natives.<triple>.node`) and other optional dylibs to a runtime cache and `dlopen()`s them. They do not share the main binary's Team ID, so without this the hardened runtime aborts with *\"mapping process and mapped file have different Team IDs\"* — breaking effectively every command. |\n\nWithout `disable-library-validation`, a signed+notarized binary signs and\nnotarizes fine but **fails at first real use**. `scripts/ci-macos-sign.sh` runs\n`--smoke-test` after signing specifically to catch this before notarizing.\n\n### Stapling limitation (important)\n\nA bare Mach-O executable **cannot be stapled** (`stapler` only supports\n`.app`/`.pkg`/`.dmg`). The binary is genuinely notarized — `notarytool` returns\n`Accepted` and the ticket exists on Apple's servers keyed to its cdhash — but\nbecause there is no *stapled* ticket, a direct `spctl -a -t exec` assessment\nreports `rejected / source=Unnotarized Developer ID`. This is expected and is\n**not** a signing or credential failure.\n\nWhat this means in practice:\n\n- `curl https://omp.sh/install | sh` — `curl` sets no quarantine bit, so\n Gatekeeper is never consulted; the binary just runs. ✅\n- Homebrew **formula** installs — Homebrew does not quarantine formula files, so\n Gatekeeper is never consulted. ✅\n- Anything that **quarantines** the binary (a browser download, or a Homebrew\n **cask**) and is assessed offline will be blocked, because there is no stapled\n ticket. For that route, wrap the binary in a stapleable, notarized **`.pkg` or\n `.dmg`** (`xcrun stapler staple` works on those). That is a follow-up and is\n **not** required for the `curl`/formula paths.\n\n## Required GitHub secrets\n\nAdd these under **Settings → Secrets and variables → Actions** (repo secrets).\nAll five secrets (cert, password, and API key trio) must be present for\nsigning to engage.\n\n| Secret | What it is |\n| --- | --- |\n| `APPLE_CERTIFICATE_P12` | base64 of the exported Developer ID Application `.p12` (cert + private key). |\n| `APPLE_CERTIFICATE_PASSWORD` | password you set when exporting the `.p12`. |\n| `APPLE_API_KEY_ID` | App Store Connect API **Key ID**. |\n| `APPLE_API_ISSUER_ID` | App Store Connect API **Issuer ID** (UUID). |\n| `APPLE_API_KEY` | base64 of the App Store Connect `.p8` private key. |\n\n### Producing the credential files\n\nDrop these into a working directory (default `~/omp-signing`):\n\n| File | How |\n| --- | --- |\n| `*.p12` | **Keychain Access** → right-click your *Developer ID Application: …* identity (the entry that expands to a cert **with** a private key) → **Export…** → save as `.p12` and set a password. |\n| `p12-password.txt` | the password you just set on the `.p12`. |\n| `AuthKey_<KEYID>.p8` | App Store Connect → **Users and Access → Integrations → App Store Connect API** → create a key (**Account Holder** role also allows API cert creation; **Developer** is enough for notarization) → **download once** (non-recoverable). |\n| `issuer-id.txt` | the **Issuer ID** (UUID) shown above the keys table. |\n| `key-id.txt` | *optional* — the Key ID; otherwise read from the `.p8` filename. |\n\nThe App Store Connect API key is the one credential that **cannot** be minted\nfrom a CLI — it is the bootstrap credential for the API itself, and the `.p8`\ndownloads exactly once. Everything else is local.\n\n### Uploading (no value leaves disk)\n\n`scripts/ci-macos-upload-secrets.sh` validates the files (opens the `.p12` with\nyour password, sanity-checks the `.p8`) and pipes each value to `gh secret set`\nover stdin — no secret is ever printed to the terminal, argv, or shell history:\n\n```sh\nscripts/ci-macos-upload-secrets.sh ~/omp-signing --dry-run # validate first\nscripts/ci-macos-upload-secrets.sh ~/omp-signing # upload all five\ngh secret list --repo can1357/oh-my-pi # confirm\n```\n\nRe-run it whenever the certificate is renewed.\n\n### Finding your signing identity / Team ID (sanity check)\n\n```sh\nsecurity find-identity -v -p codesigning\n# e.g. \"Developer ID Application: Your Name (TEAMID1234)\"\n```\n\nThe script selects the first `Developer ID Application` identity automatically;\nyou do not need to store the identity string or Team ID as a secret.\n\n## Local dry run\n\nYou can exercise the full sign+notarize path locally (real cert + API key) by\nexporting the five env vars and running:\n\n```sh\nRELEASE_TARGETS=darwin-arm64 bun run ci:release:build-binaries\nAPPLE_CERTIFICATE_P12=… APPLE_CERTIFICATE_PASSWORD=… \\\nAPPLE_API_KEY_ID=… APPLE_API_ISSUER_ID=… APPLE_API_KEY=… \\\n bash scripts/ci-macos-sign.sh packages/coding-agent/binaries/omp-darwin-arm64\n```\n",
|
|
31
|
-
"marketplace.md": "# Marketplace plugin system\n\nThe marketplace system lets you discover, install, and manage plugins from Git, local, or direct-catalog sources. It is compatible with the Claude Code plugin registry format.\n\n## Quick start\n\n```\n/marketplace add anthropics/claude-plugins-official\n/marketplace install wordpress.com@claude-plugins-official\n```\n\nIn the TUI, `/marketplace` with no arguments opens the interactive plugin browser. In non-TUI command handling, `/marketplace` lists configured marketplaces; use `/marketplace discover` to browse.\n\n## Concepts\n\nA **marketplace** is a Git repository (or local directory) containing a catalog file at `.claude-plugin/marketplace.json`. The catalog lists available plugins with their sources, descriptions, and metadata.\n\nA **plugin** is a directory containing Claude/OMP plugin content such as skills, commands, agents, hooks, tools, MCP servers, or LSP servers. Extension modules (`package.json` `omp.extensions` entry points) are not loaded from marketplace installs — they only load for npm-installed or `omp plugin link`ed plugins. Plugins are identified by `name@marketplace` (e.g. `code-review@claude-plugins-official`).\n\n**Scopes**: marketplace plugins can be installed at two scopes:\n\n- **user** (default) -- available in all projects, stored in `~/.omp/plugins/installed_plugins.json`\n- **project** -- available only in the active project, stored in the nearest project `.omp/plugins/installed_plugins.json`\n\nEnabled project-scoped installs shadow enabled user-scoped installs of the same plugin. A disabled project install does not shadow the user install.\n\n## Commands\n\n### Interactive mode\n\n| Command | Effect |\n| -------------- | ----------------------------------------- |\n| `/marketplace` | Open interactive plugin browser (install) |\n\n### Marketplace management\n\n| Command | Effect |\n| ---------------------------- | -------------------------------------------- |\n| `/marketplace add <source>` | Add a marketplace source |\n| `/marketplace remove <name>` | Remove a marketplace |\n| `/marketplace update [name]` | Re-fetch catalog(s); omit name to update all |\n| `/marketplace list` | List configured marketplaces |\n\n### Plugin operations\n\n| Command | Effect |\n| ------------------------------------------------------------------------- | -------------------------------------------------- |\n| `/marketplace discover [marketplace]` | Browse available plugins |\n| `/marketplace install [--force] [--scope user\\|project] name@marketplace` | Install a plugin |\n| `/marketplace uninstall [--scope user\\|project] name@marketplace` | Uninstall a plugin; no args opens the TUI selector |\n| `/marketplace installed` | List installed marketplace plugins |\n| `/marketplace upgrade [--scope user\\|project] [name@marketplace]` | Upgrade one or all plugins |\n| `/plugins list` | List npm/link and marketplace plugins |\n| `/plugins enable [--scope user\\|project] name@marketplace` | Enable a marketplace plugin |\n| `/plugins disable [--scope user\\|project] name@marketplace` | Disable a marketplace plugin |\n\n### CLI equivalents\n\nThe same operations are available from the command line:\n\n```\nomp plugin marketplace add <source>\nomp plugin marketplace remove <name>\nomp plugin marketplace update [name]\nomp plugin marketplace list\nomp plugin discover [marketplace]\nomp plugin install [--force] [--scope user|project] name@marketplace\nomp plugin uninstall [--scope user|project] name@marketplace\nomp plugin upgrade [--scope user|project] [name@marketplace]\nomp plugin enable [--scope user|project] name@marketplace\nomp plugin disable [--scope user|project] name@marketplace\n```\n\n## Marketplace sources\n\nWhen you run `/marketplace add <source>`, the system classifies the source:\n\n| Source format | Type | Example |\n| ------------------------------- | -------------------------------------------------- | -------------------------------------- |\n| `owner/repo` | GitHub shorthand | `anthropics/claude-plugins-official` |\n| `https://...*.json` | Direct catalog URL | `https://example.com/marketplace.json` |\n| `https://...` / `http://...` | Git repository unless the URL path ends in `.json` | `https://github.com/org/repo` |\n| `git@...` / `ssh://...` | Git repository | `git@github.com:org/repo.git` |\n| `./path` or `~/path` or `/path` | Local directory | `./my-marketplace` |\n\nGit and local sources must contain `.claude-plugin/marketplace.json`. Direct catalog URLs cache only the JSON catalog; plugins in URL-sourced catalogs cannot use relative string sources like `\"./plugins/foo\"`.\n\n## Catalog format (marketplace.json)\n\nA marketplace catalog lives at `.claude-plugin/marketplace.json` in the repository root:\n\n```json\n{\n \"$schema\": \"https://anthropic.com/claude-code/marketplace.schema.json\",\n \"name\": \"my-marketplace\",\n \"owner\": {\n \"name\": \"Your Name\",\n \"email\": \"you@example.com\"\n },\n \"metadata\": {\n \"description\": \"A collection of plugins\",\n \"version\": \"1.0.0\",\n \"pluginRoot\": \"plugins\"\n },\n \"plugins\": [\n {\n \"name\": \"my-plugin\",\n \"description\": \"What this plugin does\",\n \"source\": \"./my-plugin\",\n \"category\": \"development\",\n \"homepage\": \"https://github.com/you/my-plugin\"\n }\n ]\n}\n```\n\n### Required fields\n\n| Field | Description |\n| ------------ | ---------------------------------------------------------------------------------------------------------------- |\n| `name` | Marketplace name. Lowercase alphanumeric, hyphens, and dots. Must start and end with alphanumeric. Max 64 chars. |\n| `owner.name` | Marketplace owner name |\n| `plugins` | Array of plugin entries |\n\nTop-level `metadata.description`, `metadata.version`, and `metadata.pluginRoot` are optional. When `metadata.pluginRoot` is set, it is prepended to relative plugin `source` paths.\n\n### Plugin entry fields\n\n| Field | Required | Description |\n| ------------- | -------- | --------------------------------------------------------------------------------------- |\n| `name` | yes | Plugin name (same rules as marketplace name) |\n| `source` | yes | Where to find the plugin (see below) |\n| `description` | no | Short description |\n| `version` | no | Version string; install version falls back to plugin manifest, source SHA, then `0.0.0` |\n| `author` | no | `{ name, email? }` |\n| `homepage` | no | URL |\n| `repository` | no | Repository URL/string |\n| `license` | no | License string |\n| `keywords` | no | Array of string keywords |\n| `category` | no | Category string (e.g. `development`, `productivity`, `security`) |\n| `tags` | no | Array of string tags |\n| `strict` | no | Boolean |\n| `commands` | no | Slash commands provided |\n| `agents` | no | Agents provided |\n| `hooks` | no | Hook definitions |\n| `mcpServers` | no | MCP server definitions |\n| `lspServers` | no | LSP server definitions or path; copied to `.lsp.json` on install |\n\n### Plugin source formats\n\nThe `source` field supports these formats. String sources must start with `./` and are resolved inside the marketplace root, after optional `metadata.pluginRoot` is prepended:\n\n**Relative path** (within the marketplace repo):\n\n```json\n\"source\": \"./my-plugin\"\n```\n\n**Git repository URL**:\n\n```json\n\"source\": {\n \"source\": \"url\",\n \"url\": \"https://github.com/org/repo.git\",\n \"sha\": \"abc123...\"\n}\n```\n\n**GitHub shorthand**:\n\n```json\n\"source\": {\n \"source\": \"github\",\n \"repo\": \"org/repo\",\n \"ref\": \"main\",\n \"sha\": \"abc123...\"\n}\n```\n\n**Git subdirectory** (monorepo):\n\n```json\n\"source\": {\n \"source\": \"git-subdir\",\n \"url\": \"https://github.com/org/monorepo.git\",\n \"path\": \"plugins/my-plugin\",\n \"ref\": \"main\",\n \"sha\": \"abc123...\"\n}\n```\n\n**npm package** (parsed but not installable yet):\n\n```json\n\"source\": {\n \"source\": \"npm\",\n \"package\": \"@scope/my-plugin\",\n \"version\": \"1.0.0\"\n}\n```\n\nCurrent installer behavior rejects npm marketplace sources with `npm plugin sources are not yet supported`; use relative, GitHub, URL, or git-subdir sources.\n\n## On-disk layout\n\n```\n~/.omp/\n marketplaces.json # Registry of added marketplaces\n plugins/\n installed_plugins.json # User-scoped marketplace plugins (version: 2)\n cache/\n marketplaces/<name>/ # Cached marketplace clone/catalog\n plugins/<marketplace>___<plugin>___<version>/ # Cached plugin directories\n\n<project>/.omp/\n plugins/\n installed_plugins.json # Project-scoped marketplace plugins (version: 2)\n```\n\n## Naming rules\n\nMarketplace and plugin names must:\n\n- Start and end with a lowercase letter or digit\n- Contain only lowercase letters, digits, hyphens, and dots\n- Be at most 64 characters\n\nPlugin IDs (`name@marketplace`) must be at most 128 characters total.\n\nValid examples: `my-plugin`, `code-review`, `wordpress.com`, `ai-firstify`\nInvalid examples: `-bad`, `bad-`, `.bad`, `Bad`, `under_score`\n",
|
|
31
|
+
"marketplace.md": "# Marketplace plugin system\n\nThe marketplace system lets you discover, install, and manage plugins from Git, local, or direct-catalog sources. It is compatible with the Claude Code plugin registry format.\n\n## Quick start\n\n```\n/marketplace add anthropics/claude-plugins-official\n/marketplace install wordpress.com@claude-plugins-official\n```\n\nIn the TUI, `/marketplace` with no arguments opens the interactive plugin browser. In non-TUI command handling, `/marketplace` lists configured marketplaces; use `/marketplace discover` to browse.\n\n## Concepts\n\nA **marketplace** is a Git repository (or local directory) containing a catalog file at `.omp-plugin/marketplace.json` (preferred) or `.claude-plugin/marketplace.json` (Claude Code-compatible fallback). The catalog lists available plugins with their sources, descriptions, and metadata.\n\nA **plugin** is a directory containing Claude/OMP plugin content such as skills, commands, agents, hooks, tools, MCP servers, or LSP servers. Extension modules (`package.json` `omp.extensions` entry points) are not loaded from marketplace installs — they only load for npm-installed or `omp plugin link`ed plugins. Plugins are identified by `name@marketplace` (e.g. `code-review@claude-plugins-official`).\n\n**Scopes**: marketplace plugins can be installed at two scopes:\n\n- **user** (default) -- available in all projects, stored in `~/.omp/plugins/installed_plugins.json`\n- **project** -- available only in the active project, stored in the nearest project `.omp/plugins/installed_plugins.json`\n\nEnabled project-scoped installs shadow enabled user-scoped installs of the same plugin. A disabled project install does not shadow the user install.\n\n## Commands\n\n### Interactive mode\n\n| Command | Effect |\n| -------------- | ----------------------------------------- |\n| `/marketplace` | Open interactive plugin browser (install) |\n\n### Marketplace management\n\n| Command | Effect |\n| ---------------------------- | -------------------------------------------- |\n| `/marketplace add <source>` | Add a marketplace source |\n| `/marketplace remove <name>` | Remove a marketplace |\n| `/marketplace update [name]` | Re-fetch catalog(s); omit name to update all |\n| `/marketplace list` | List configured marketplaces |\n\n### Plugin operations\n\n| Command | Effect |\n| ------------------------------------------------------------------------- | -------------------------------------------------- |\n| `/marketplace discover [marketplace]` | Browse available plugins |\n| `/marketplace install [--force] [--scope user\\|project] name@marketplace` | Install a plugin |\n| `/marketplace uninstall [--scope user\\|project] name@marketplace` | Uninstall a plugin; no args opens the TUI selector |\n| `/marketplace installed` | List installed marketplace plugins |\n| `/marketplace upgrade [--scope user\\|project] [name@marketplace]` | Upgrade one or all plugins |\n| `/plugins list` | List npm/link and marketplace plugins |\n| `/plugins enable [--scope user\\|project] name@marketplace` | Enable a marketplace plugin |\n| `/plugins disable [--scope user\\|project] name@marketplace` | Disable a marketplace plugin |\n\n### CLI equivalents\n\nThe same operations are available from the command line:\n\n```\nomp plugin marketplace add <source>\nomp plugin marketplace remove <name>\nomp plugin marketplace update [name]\nomp plugin marketplace list\nomp plugin discover [marketplace]\nomp plugin install [--force] [--scope user|project] name@marketplace\nomp plugin uninstall [--scope user|project] name@marketplace\nomp plugin upgrade [--scope user|project] [name@marketplace]\nomp plugin enable [--scope user|project] name@marketplace\nomp plugin disable [--scope user|project] name@marketplace\n```\n\n## Marketplace sources\n\nWhen you run `/marketplace add <source>`, the system classifies the source:\n\n| Source format | Type | Example |\n| ------------------------------- | -------------------------------------------------- | -------------------------------------- |\n| `owner/repo` | GitHub shorthand | `anthropics/claude-plugins-official` |\n| `https://...*.json` | Direct catalog URL | `https://example.com/marketplace.json` |\n| `https://...` / `http://...` | Git repository unless the URL path ends in `.json` | `https://github.com/org/repo` |\n| `git@...` / `ssh://...` | Git repository | `git@github.com:org/repo.git` |\n| `./path` or `~/path` or `/path` | Local directory | `./my-marketplace` |\n\nGit and local sources must contain a catalog at `.omp-plugin/marketplace.json` (preferred) or `.claude-plugin/marketplace.json` (Claude Code-compatible fallback). Direct catalog URLs cache only the JSON catalog; plugins in URL-sourced catalogs cannot use relative string sources like `\"./plugins/foo\"`.\n\n## Catalog format (marketplace.json)\n\nA marketplace catalog lives at `.omp-plugin/marketplace.json` in the repository root. When omp is the only intended consumer, prefer this path. To remain Claude Code-compatible (omp loads the same shape from either path), publish at `.claude-plugin/marketplace.json` instead — omp uses it as a fallback when `.omp-plugin/marketplace.json` is absent. A repository may ship both: omp reads the `.omp-plugin/` copy, Claude Code reads the `.claude-plugin/` copy. Same catalog format either way:\n\n```json\n{\n \"$schema\": \"https://anthropic.com/claude-code/marketplace.schema.json\",\n \"name\": \"my-marketplace\",\n \"owner\": {\n \"name\": \"Your Name\",\n \"email\": \"you@example.com\"\n },\n \"metadata\": {\n \"description\": \"A collection of plugins\",\n \"version\": \"1.0.0\",\n \"pluginRoot\": \"plugins\"\n },\n \"plugins\": [\n {\n \"name\": \"my-plugin\",\n \"description\": \"What this plugin does\",\n \"source\": \"./my-plugin\",\n \"category\": \"development\",\n \"homepage\": \"https://github.com/you/my-plugin\"\n }\n ]\n}\n```\n\n### Required fields\n\n| Field | Description |\n| ------------ | ---------------------------------------------------------------------------------------------------------------- |\n| `name` | Marketplace name. Lowercase alphanumeric, hyphens, and dots. Must start and end with alphanumeric. Max 64 chars. |\n| `owner.name` | Marketplace owner name |\n| `plugins` | Array of plugin entries |\n\nTop-level `metadata.description`, `metadata.version`, and `metadata.pluginRoot` are optional. When `metadata.pluginRoot` is set, it is prepended to relative plugin `source` paths.\n\n### Plugin entry fields\n\n| Field | Required | Description |\n| ------------- | -------- | --------------------------------------------------------------------------------------- |\n| `name` | yes | Plugin name (same rules as marketplace name) |\n| `source` | yes | Where to find the plugin (see below) |\n| `description` | no | Short description |\n| `version` | no | Version string; install version falls back to plugin manifest, source SHA, then `0.0.0` |\n| `author` | no | `{ name, email? }` |\n| `homepage` | no | URL |\n| `repository` | no | Repository URL/string |\n| `license` | no | License string |\n| `keywords` | no | Array of string keywords |\n| `category` | no | Category string (e.g. `development`, `productivity`, `security`) |\n| `tags` | no | Array of string tags |\n| `strict` | no | Boolean |\n| `commands` | no | Slash commands provided |\n| `agents` | no | Agents provided |\n| `hooks` | no | Hook definitions |\n| `mcpServers` | no | MCP server definitions |\n| `lspServers` | no | LSP server definitions or path; copied to `.lsp.json` on install |\n\n### Plugin source formats\n\nThe `source` field supports these formats. String sources must start with `./` and are resolved inside the marketplace root, after optional `metadata.pluginRoot` is prepended:\n\n**Relative path** (within the marketplace repo):\n\n```json\n\"source\": \"./my-plugin\"\n```\n\n**Git repository URL**:\n\n```json\n\"source\": {\n \"source\": \"url\",\n \"url\": \"https://github.com/org/repo.git\",\n \"sha\": \"abc123...\"\n}\n```\n\n**GitHub shorthand**:\n\n```json\n\"source\": {\n \"source\": \"github\",\n \"repo\": \"org/repo\",\n \"ref\": \"main\",\n \"sha\": \"abc123...\"\n}\n```\n\n**Git subdirectory** (monorepo):\n\n```json\n\"source\": {\n \"source\": \"git-subdir\",\n \"url\": \"https://github.com/org/monorepo.git\",\n \"path\": \"plugins/my-plugin\",\n \"ref\": \"main\",\n \"sha\": \"abc123...\"\n}\n```\n\n**npm package** (parsed but not installable yet):\n\n```json\n\"source\": {\n \"source\": \"npm\",\n \"package\": \"@scope/my-plugin\",\n \"version\": \"1.0.0\"\n}\n```\n\nCurrent installer behavior rejects npm marketplace sources with `npm plugin sources are not yet supported`; use relative, GitHub, URL, or git-subdir sources.\n\n## On-disk layout\n\n```\n~/.omp/\n marketplaces.json # Registry of added marketplaces\n plugins/\n installed_plugins.json # User-scoped marketplace plugins (version: 2)\n cache/\n marketplaces/<name>/ # Cached marketplace clone/catalog\n plugins/<marketplace>___<plugin>___<version>/ # Cached plugin directories\n\n<project>/.omp/\n plugins/\n installed_plugins.json # Project-scoped marketplace plugins (version: 2)\n```\n\n## Naming rules\n\nMarketplace and plugin names must:\n\n- Start and end with a lowercase letter or digit\n- Contain only lowercase letters, digits, hyphens, and dots\n- Be at most 64 characters\n\nPlugin IDs (`name@marketplace`) must be at most 128 characters total.\n\nValid examples: `my-plugin`, `code-review`, `wordpress.com`, `ai-firstify`\nInvalid examples: `-bad`, `bad-`, `.bad`, `Bad`, `under_score`\n",
|
|
32
32
|
"mcp-config.md": "# MCP configuration in OMP\n\nThis guide explains how to add, edit, and validate MCP servers for the OMP coding agent.\n\nSource of truth in code:\n\n- Runtime config types: `packages/coding-agent/src/mcp/types.ts`\n- Config writer: `packages/coding-agent/src/mcp/config-writer.ts`\n- Loader + validation: `packages/coding-agent/src/mcp/config.ts`\n- Standalone `mcp.json` discovery: `packages/coding-agent/src/discovery/mcp-json.ts`\n- Schema: `packages/coding-agent/src/config/mcp-schema.json`\n\n## Preferred config locations\n\nOMP can discover MCP servers from multiple tools (`.claude/`, `.cursor/`, `.vscode/`, `opencode.json`, and more), but for OMP-native configuration you should usually use one of these primary files:\n\n- Project: `.omp/mcp.json`\n- User: `~/.omp/agent/mcp.json` (or `~/.omp/profiles/<name>/agent/mcp.json` when a named profile is active — see [Profiles](#profiles))\n\nThe native provider also reads `.omp/.mcp.json` and `~/.omp/agent/.mcp.json` for compatibility, but OMP writes to the primary `mcp.json` paths above.\n\nOMP also accepts fallback standalone files in the project root:\n\n- `mcp.json`\n- `.mcp.json`\n\nUse `.omp/mcp.json` or `~/.omp/agent/mcp.json` when you want OMP to own the configuration. Use root `mcp.json` / `.mcp.json` only when you want a portable fallback file that other MCP clients may also read.\n\n### Profiles\n\nNamed profiles (`omp --profile <name>`, the `--alias` shortcut, or `OMP_PROFILE`/`PI_PROFILE`) isolate user-level MCP config. When a profile is active, the **user** scope resolves to the profile's agent directory instead of the default one:\n\n- Default profile: `~/.omp/agent/mcp.json`\n- Profile `<name>`: `~/.omp/profiles/<name>/agent/mcp.json`\n\nDiscovery, the `/mcp` commands, and the config writer all follow the active profile, so a profile sees **only** its own user-level servers — never the default profile's `~/.omp/agent/mcp.json`. Add a server to a profile by launching under it (`omp --profile <name>`) and running `/mcp add` → User level, or by editing `~/.omp/profiles/<name>/agent/mcp.json` directly.\n\nProject-scoped MCP config (`.omp/mcp.json`) is keyed to the working directory, not the profile, so it applies under every profile. External-tool configs (`.claude/`, `.cursor/`, etc.) are also profile-independent because they belong to those tools rather than to an OMP profile.\n\nMCP follows the same profile rules as the rest of OMP-native config; see [Configuration Discovery → Profiles](./config-usage.md#profiles).\n\n## Add a schema reference\n\nAdd this line at the top of the file for editor autocomplete and validation:\n\n```json\n{\n \"$schema\": \"https://raw.githubusercontent.com/can1357/oh-my-pi/main/packages/coding-agent/src/config/mcp-schema.json\",\n \"mcpServers\": {}\n}\n```\n\nOMP now writes this automatically when `/mcp add`, `/mcp enable`, `/mcp disable`, `/mcp reauth`, or other config-writing flows create or update an OMP-managed MCP file.\n\n## File shape\n\nOMP supports this top-level structure:\n\n```json\n{\n \"$schema\": \"https://raw.githubusercontent.com/can1357/oh-my-pi/main/packages/coding-agent/src/config/mcp-schema.json\",\n \"mcpServers\": {\n \"server-name\": {\n \"type\": \"stdio\",\n \"command\": \"npx\",\n \"args\": [\"-y\", \"some-mcp-server\"]\n }\n },\n \"disabledServers\": [\"server-name\"]\n}\n```\n\nTop-level keys:\n\n- `$schema` — optional JSON Schema URL for tooling\n- `mcpServers` — map of server name to server config\n- `disabledServers` — user-level denylist used to turn off discovered servers by name; runtime loading reads this list from the active profile's user MCP file (`~/.omp/agent/mcp.json`, or `~/.omp/profiles/<name>/agent/mcp.json` under a named profile)\n\nServer names must match `^[a-zA-Z0-9_.-]{1,100}$`.\n\n## Supported server fields\n\nShared fields for every transport:\n\n- `enabled?: boolean` — skip this server when `false`\n- `timeout?: number` — MCP request timeout in milliseconds; `0` disables client-side MCP timeouts\n- `auth?: { ... }` — auth metadata used by OMP for OAuth/API-key flows\n- `oauth?: { ... }` — explicit OAuth client settings used during auth/reauth\n\nSet `OMP_MCP_TIMEOUT_MS=0` to disable the client-side timeout for every MCP server in the current process. Set it to a positive millisecond value, such as `OMP_MCP_TIMEOUT_MS=120000`, to apply one global timeout without editing each server entry.\n\n### `stdio` transport\n\n`stdio` is the default when `type` is omitted.\n\nRequired:\n\n- `command: string`\n\nOptional:\n\n- `type?: \"stdio\"`\n- `args?: string[]`\n- `env?: Record<string, string>`\n- `cwd?: string`\n\nExample:\n\n```json\n{\n \"$schema\": \"https://raw.githubusercontent.com/can1357/oh-my-pi/main/packages/coding-agent/src/config/mcp-schema.json\",\n \"mcpServers\": {\n \"filesystem\": {\n \"command\": \"npx\",\n \"args\": [\n \"-y\",\n \"@modelcontextprotocol/server-filesystem\",\n \"/Users/alice/projects\",\n \"/Users/alice/Documents\"\n ]\n }\n }\n}\n```\n\nThis follows the official Filesystem MCP server package (`@modelcontextprotocol/server-filesystem`).\n\n### `http` transport\n\nRequired:\n\n- `type: \"http\"`\n- `url: string`\n\nOptional:\n\n- `headers?: Record<string, string>`\n\nExample:\n\n```json\n{\n \"$schema\": \"https://raw.githubusercontent.com/can1357/oh-my-pi/main/packages/coding-agent/src/config/mcp-schema.json\",\n \"mcpServers\": {\n \"github\": {\n \"type\": \"http\",\n \"url\": \"https://api.githubcopilot.com/mcp/\"\n }\n }\n}\n```\n\nThis matches GitHub's hosted GitHub MCP server endpoint.\n\n### `sse` transport\n\nRequired:\n\n- `type: \"sse\"`\n- `url: string`\n\nOptional:\n\n- `headers?: Record<string, string>`\n\nExample:\n\n```json\n{\n \"$schema\": \"https://raw.githubusercontent.com/can1357/oh-my-pi/main/packages/coding-agent/src/config/mcp-schema.json\",\n \"mcpServers\": {\n \"legacy-remote\": {\n \"type\": \"sse\",\n \"url\": \"https://example.com/mcp/sse\"\n }\n }\n}\n```\n\n`sse` is still supported for compatibility, but the MCP spec now prefers Streamable HTTP (`type: \"http\"`) for new servers.\n\n## Auth fields\n\nOMP understands two auth-related objects.\n\n### `auth`\n\n```json\n{\n \"type\": \"oauth\" | \"apikey\",\n \"credentialId\": \"optional-stored-credential-id\",\n \"tokenUrl\": \"optional-token-endpoint\",\n \"clientId\": \"optional-client-id\",\n \"clientSecret\": \"optional-client-secret\",\n \"resource\": \"optional-mcp-resource-uri\"\n}\n```\n\nUse this when OMP should remember how to rehydrate credentials for a server.\n\nYou normally do not need to write this block: when OMP completes an OAuth flow\nfor an `http`/`sse` server it stores the credential under a deterministic id\nderived from the active profile and server URL\n(`mcp_oauth:profile:<profile>:<url>`), with the refresh material embedded. Any\nconfig that points at the same URL — including a *definition-only* entry in a\nshared project `mcp.json` with no `auth` block at all — resolves the active\nprofile's own credential automatically, including when auth storage is backed by\na shared auth broker. This is what makes project-scoped servers safe across\nprofiles: commit the definition, and each profile authorizes (and stays signed\nin as) its own account via `/mcp reauth <name>`. An explicit `credentialId` is\nstill honored when it resolves; if it points at another profile's row, OMP falls\nback to the profile-scoped url-keyed binding.\n\n`/mcp reauth` on a definition-only entry leaves the file untouched — the\ncredential (refresh material included) lives entirely in the active profile's\nauth storage (local `agent.db` or broker), so a committed project config never\npicks up local auth state. An explicitly\nconfigured `Authorization` header always wins over the url-keyed binding.\n\nThe binding is per profile but not per project: once a profile has authorized\na URL, *any* checkout whose `mcp.json` defines a server at that URL connects\nwith that profile's credential automatically. Committed MCP definitions are\ntrusted input — the same already applies to `stdio` entries, which run\narbitrary commands — so review a repository's `mcp.json` before opening it\nwith a profile that holds credentials you care about, or use a dedicated\nprofile for untrusted checkouts.\n\n### `oauth`\n\n```json\n{\n \"clientId\": \"...\",\n \"clientSecret\": \"...\",\n \"redirectUri\": \"...\",\n \"callbackPort\": 3334,\n \"callbackPath\": \"/oauth/callback\",\n \"prompt\": \"consent\"\n}\n```\n\nUse this when the MCP server requires explicit OAuth client settings.\n\n`prompt` controls the OAuth `prompt` parameter sent with the authorization request. It defaults to `\"consent\"` so the provider always shows its consent/account screen — without it, a provider with an active browser session silently re-approves the same account, making it impossible to switch accounts or workspaces when reauthorizing (e.g. to use a different Linear workspace per OMP profile). Set it to `\"\"` to omit the parameter for providers that reject it, or to another value the provider understands (e.g. `\"select_account\"`).\n\nSlack is the clearest current example. Slack's MCP server is hosted at `https://mcp.slack.com/mcp`, uses Streamable HTTP, and requires confidential OAuth with your Slack app's client credentials.\n\nExample:\n\n```json\n{\n \"$schema\": \"https://raw.githubusercontent.com/can1357/oh-my-pi/main/packages/coding-agent/src/config/mcp-schema.json\",\n \"mcpServers\": {\n \"slack\": {\n \"type\": \"http\",\n \"url\": \"https://mcp.slack.com/mcp\",\n \"oauth\": {\n \"clientId\": \"YOUR_SLACK_CLIENT_ID\",\n \"clientSecret\": \"YOUR_SLACK_CLIENT_SECRET\"\n },\n \"auth\": {\n \"type\": \"oauth\",\n \"tokenUrl\": \"https://slack.com/api/oauth.v2.user.access\",\n \"clientId\": \"YOUR_SLACK_CLIENT_ID\",\n \"clientSecret\": \"YOUR_SLACK_CLIENT_SECRET\"\n }\n }\n }\n}\n```\n\nRelevant Slack endpoints from Slack's docs:\n\n- MCP endpoint: `https://mcp.slack.com/mcp`\n- Authorization endpoint: `https://slack.com/oauth/v2_user/authorize`\n- Token endpoint: `https://slack.com/api/oauth.v2.user.access`\n\n## Common copy-paste examples\n\n### Filesystem server via stdio\n\n```json\n{\n \"$schema\": \"https://raw.githubusercontent.com/can1357/oh-my-pi/main/packages/coding-agent/src/config/mcp-schema.json\",\n \"mcpServers\": {\n \"filesystem\": {\n \"command\": \"npx\",\n \"args\": [\n \"-y\",\n \"@modelcontextprotocol/server-filesystem\",\n \"/absolute/path/one\",\n \"/absolute/path/two\"\n ]\n }\n }\n}\n```\n\n### GitHub hosted server via HTTP\n\n```json\n{\n \"$schema\": \"https://raw.githubusercontent.com/can1357/oh-my-pi/main/packages/coding-agent/src/config/mcp-schema.json\",\n \"mcpServers\": {\n \"github\": {\n \"type\": \"http\",\n \"url\": \"https://api.githubcopilot.com/mcp/\"\n }\n }\n}\n```\n\n### GitHub local server via Docker\n\n```json\n{\n \"$schema\": \"https://raw.githubusercontent.com/can1357/oh-my-pi/main/packages/coding-agent/src/config/mcp-schema.json\",\n \"mcpServers\": {\n \"github\": {\n \"command\": \"docker\",\n \"args\": [\n \"run\",\n \"-i\",\n \"--rm\",\n \"-e\",\n \"GITHUB_PERSONAL_ACCESS_TOKEN\",\n \"ghcr.io/github/github-mcp-server\"\n ],\n \"env\": {\n \"GITHUB_PERSONAL_ACCESS_TOKEN\": \"GITHUB_PERSONAL_ACCESS_TOKEN\"\n }\n }\n }\n}\n```\n\nThis matches GitHub's official local Docker image `ghcr.io/github/github-mcp-server`.\n\n### Slack hosted server via OAuth\n\n```json\n{\n \"$schema\": \"https://raw.githubusercontent.com/can1357/oh-my-pi/main/packages/coding-agent/src/config/mcp-schema.json\",\n \"mcpServers\": {\n \"slack\": {\n \"type\": \"http\",\n \"url\": \"https://mcp.slack.com/mcp\",\n \"oauth\": {\n \"clientId\": \"YOUR_SLACK_CLIENT_ID\",\n \"clientSecret\": \"YOUR_SLACK_CLIENT_SECRET\"\n },\n \"auth\": {\n \"type\": \"oauth\",\n \"tokenUrl\": \"https://slack.com/api/oauth.v2.user.access\",\n \"clientId\": \"YOUR_SLACK_CLIENT_ID\",\n \"clientSecret\": \"YOUR_SLACK_CLIENT_SECRET\"\n }\n }\n }\n}\n```\n\n## Secrets and variable resolution\n\nThis is the part that usually trips people up.\n\n### Discovery-time `${...}` expansion\n\nOMP expands `${VAR}` and `${VAR:-default}` placeholders while discovering MCP configs from OMP-native files and standalone fallback files. Expansion applies recursively to string values in `command`, `args`, `env`, `cwd`, `url`, `headers`, `auth`, and `oauth`; unresolved placeholders remain literal strings.\n\nExample:\n\n```json\n{\n \"mcpServers\": {\n \"github\": {\n \"type\": \"http\",\n \"url\": \"https://api.githubcopilot.com/mcp/\",\n \"headers\": {\n \"Authorization\": \"Bearer ${GITHUB_TOKEN}\"\n }\n }\n }\n}\n```\n\n### Pre-connect env/header resolution\n\nBefore OMP launches a stdio server or makes an HTTP/SSE request, it resolves stdio `env` values and HTTP/SSE `headers` values like this:\n\n1. If a value starts with `!`, OMP runs the rest as a shell command with a 10s timeout and uses trimmed stdout.\n2. If the command fails, times out, or prints only whitespace, that `env`/`headers` entry is omitted.\n3. Otherwise OMP checks whether the value names an environment variable.\n4. If that environment variable is set to a non-empty value, OMP uses the environment value; otherwise it uses the string literally.\n\nExamples:\n\n```json\n{\n \"env\": {\n \"GITHUB_PERSONAL_ACCESS_TOKEN\": \"GITHUB_PERSONAL_ACCESS_TOKEN\"\n },\n \"headers\": {\n \"X-MCP-Insiders\": \"true\"\n }\n}\n```\n\nThat means this is valid and convenient for local secrets:\n\n- `\"GITHUB_PERSONAL_ACCESS_TOKEN\": \"GITHUB_PERSONAL_ACCESS_TOKEN\"` → copy from the current shell environment\n- `\"Authorization\": \"Bearer hardcoded-token\"` → use the literal value\n- `\"Authorization\": \"!printf 'Bearer %s' \\\"$GITHUB_TOKEN\\\"\"` → build the header from a command\n\n## `disabledServers`\n\n`disabledServers` is read from the user config file (`~/.omp/agent/mcp.json`) when a server is discovered from any source and you want OMP to ignore it without editing that other tool's config.\n\nExample:\n\n```json\n{\n \"$schema\": \"https://raw.githubusercontent.com/can1357/oh-my-pi/main/packages/coding-agent/src/config/mcp-schema.json\",\n \"disabledServers\": [\"github\", \"slack\"]\n}\n```\n\n## `/mcp add` vs editing JSON directly\n\nUse `/mcp add` when you want guided setup.\n\nUse direct JSON editing when:\n\n- you need a transport or auth option the wizard does not prompt for yet\n- you want to paste a server definition from another MCP client\n- you want schema-backed validation in your editor\n\nAfter editing, use:\n\n- `/mcp reload` to rediscover and reconnect servers in the current session\n- `/mcp list` to see which config file a server came from\n- `/mcp test <name>` to test a single server\n- `/mcp reconnect <name>` to reconnect one server without rediscovering all configs\n- `/mcp resources`, `/mcp prompts`, and `/mcp notifications` to inspect non-tool MCP capabilities\n\n## Validation rules OMP enforces\n\nFrom `validateServerConfig()` in `packages/coding-agent/src/mcp/config.ts`:\n\n- `stdio` requires `command`\n- `http` and `sse` require `url`\n- a server cannot set both `command` and `url`\n- unknown `type` values are rejected\n\nPractical implications:\n\n- Omitting `type` means `stdio`\n- If you paste a remote server config and forget `\"type\": \"http\"`, OMP will treat it as `stdio` and complain that `command` is missing\n- `sse` remains valid for compatibility, but new hosted servers should usually be configured as `http`\n\n## Discovery and precedence\n\nOMP does not merge duplicate server definitions across files. Discovery providers are prioritized, and the higher-priority definition wins. Separately, `disabledServers` from `~/.omp/agent/mcp.json` can suppress a discovered server by name.\n\nIn practice:\n\n- prefer `.omp/mcp.json` or `~/.omp/agent/mcp.json` when you want an OMP-specific override\n- keep server names unique across tools when possible\n- use `disabledServers` in the user config when a third-party config keeps reintroducing a server you do not want\n\n## Troubleshooting\n\n### `Server \"name\": stdio server requires \"command\" field`\n\nYou probably omitted `type: \"http\"` on a remote server.\n\n### `Server \"name\": both \"command\" and \"url\" are set`\n\nPick one transport. OMP treats `command` as stdio and `url` as http/sse.\n\n### `/mcp add` worked but the server still does not connect\n\nThe JSON is valid, but the server may still be unreachable. Use `/mcp test <name>` and check whether:\n\n- the binary or Docker image exists\n- required environment variables are set\n- the remote URL is reachable\n- the OAuth or API token is valid\n\n### The server exists in another tool's config but not in OMP\n\nRun `/mcp list`. OMP discovers many third-party MCP files, but project-level loading can also be disabled via the `mcp.enableProjectConfig` setting, and a user-level `disabledServers` entry can suppress a server by name.\n\n## References\n\n- MCP transport spec: https://modelcontextprotocol.io/specification/2025-03-26/basic/transports\n- Filesystem server package: https://www.npmjs.com/package/@modelcontextprotocol/server-filesystem\n- GitHub MCP server: https://github.com/github/github-mcp-server\n- Slack MCP server docs: https://docs.slack.dev/ai/slack-mcp-server/\n",
|
|
33
33
|
"mcp-protocol-transports.md": "# MCP Protocol and Transport Internals\n\nThis document describes how coding-agent implements MCP JSON-RPC messaging and how protocol concerns are split from transport concerns.\n\n## Scope\n\nCovers:\n\n- JSON-RPC request/response and notification flow\n- Server-to-client request handling (`ping`, `roots/list`)\n- Request correlation and lifecycle for stdio and HTTP/SSE transports\n- Timeout, cancellation, and auth-refresh behavior\n- Error propagation and malformed payload handling\n- Transport selection boundaries (`stdio` vs `http`/`sse`)\n- Which reconnect/retry responsibilities are transport-level vs manager/tool-bridge-level\n\nDoes not cover extension authoring UX or command UI.\n\n## Implementation files\n\n- [`src/mcp/types.ts`](../packages/coding-agent/src/mcp/types.ts)\n- [`src/mcp/transports/stdio.ts`](../packages/coding-agent/src/mcp/transports/stdio.ts)\n- [`src/mcp/transports/http.ts`](../packages/coding-agent/src/mcp/transports/http.ts)\n- [`src/mcp/transports/index.ts`](../packages/coding-agent/src/mcp/transports/index.ts)\n- [`src/mcp/json-rpc.ts`](../packages/coding-agent/src/mcp/json-rpc.ts)\n- [`src/mcp/client.ts`](../packages/coding-agent/src/mcp/client.ts)\n- [`src/mcp/manager.ts`](../packages/coding-agent/src/mcp/manager.ts)\n\n## Layer boundaries\n\n### Protocol layer (JSON-RPC + MCP methods)\n\n- Message shapes are defined in `types.ts` (`JsonRpcRequest`, `JsonRpcNotification`, `JsonRpcResponse`, `JsonRpcMessage`).\n- MCP client logic (`client.ts`) decides method order and session handshake:\n 1. `initialize` request\n 2. for HTTP/SSE transports, start the optional background SSE listener after the initialize response has established any session id\n 3. `notifications/initialized` notification\n 4. method calls like `tools/list`, `tools/call`\n\n### Transport layer (`MCPTransport`)\n\n`MCPTransport` abstracts delivery and lifecycle:\n\n- `request(method, params, options?) -> Promise<T>`\n- `notify(method, params?) -> Promise<void>`\n- `close()`\n- `connected`\n- optional callbacks: `onClose`, `onError`, `onNotification`, `onRequest`\n\nTransport implementations own framing and I/O details:\n\n- `StdioTransport`: newline-delimited JSON over subprocess stdio\n- `HttpTransport`: JSON-RPC over HTTP POST, with optional SSE responses/listening\n\n### Manager/client wiring\n\n`connectToServer()` always installs an `onRequest` handler for standard server-to-client requests. `MCPManager` installs notification handlers, OAuth refresh hooks for HTTP OAuth servers, and `onClose` reconnect handling for managed connections.\n\n## Transport selection\n\n`client.ts:createTransport()` chooses transport from config:\n\n- `type` omitted or `\"stdio\"` -> `createStdioTransport`\n- `\"http\"` or `\"sse\"` -> `createHttpTransport`\n\n`\"sse\"` is treated as an HTTP transport variant (same class), not a separate transport implementation.\n\n## JSON-RPC message flow and correlation\n\n## Request IDs\n\nEach transport generates per-request IDs with `Snowflake.next()`. IDs are transport-local correlation tokens.\n\n## Stdio correlation path\n\n- Outbound request is serialized as one JSON object + `\\n`.\n- `#pendingRequests: Map<id, {resolve,reject}>` stores in-flight requests.\n- Read loop parses JSONL from stdout and calls `#handleMessage`.\n- If inbound message has matching `id`, request resolves/rejects.\n- If inbound message has `method` and no `id`, treated as notification and sent to `onNotification`.\n- If inbound message has both `method` and `id`, treated as a server-to-client request and answered through `onRequest`; without a handler the transport replies with JSON-RPC `-32601 Method not found`.\n\nUnknown response IDs are ignored (no rejection, no error callback).\n\n## HTTP correlation path\n\n- Outbound request is HTTP `POST` with JSON body and generated `id`.\n- Non-SSE response path: parse one JSON-RPC response and return `result`/throw on `error`.\n- SSE response path (`Content-Type: text/event-stream`): stream events, return first message whose `id` matches expected request ID and has `result` or `error`.\n- SSE messages with `method` and no `id` are treated as notifications.\n- SSE messages with both `method` and `id` are treated as server-to-client requests and answered with a POSTed JSON-RPC response.\n\nIf SSE stream ends before matching response, request fails with `No response received for request ID ...`. After the matching response is captured, the transport drains remaining SSE messages in the background.\n\n## Notifications\n\nClient emits JSON-RPC notifications via `transport.notify(...)`.\n\n- Stdio: writes notification frame to stdin (`jsonrpc`, `method`, `params`) plus newline via `writeFrame()`; a failed write closes the transport and throws.\n- HTTP: sends POST body without `id`; success accepts `2xx` or `202 Accepted`.\n\nServer-initiated notifications are surfaced through transport `onNotification`; `MCPManager` consumes known MCP list/update notifications and can forward all notifications through its own callback.\n\n## Stdio transport internals\n\n## Lifecycle and state transitions\n\n- Initial: `connected=false`, `process=null`, pending map empty\n- `connect()`:\n - spawn subprocess with configured command/args/env/cwd\n - mark connected\n - start stdout read loop (`readJsonl`)\n - start stderr loop (read/discard; currently silent)\n- `close()`:\n - `#handleClose()`: mark disconnected, reject all pending requests (`Transport closed`), emit `onClose`\n - kill subprocess\n - await read loop shutdown\n\nIf read loop exits unexpectedly, `finally` triggers `#handleClose()` which performs the same pending-request rejection and close callback.\n\n## Timeout and cancellation\n\nPer request:\n\n- timeout from `resolveMCPTimeoutMs`: `OMP_MCP_TIMEOUT_MS` env override, else `config.timeout ?? 30000`; `0` disables\n- optional `AbortSignal` from caller\n- abort and timeout both reject the pending promise and clean map entry\n\nCancellation is local only: transport does not send protocol-level cancellation notification to the server.\n\n## Malformed payload handling\n\nIn read loop:\n\n- each parsed JSONL line is passed to `#handleMessage` in `try/catch`\n- malformed/invalid message handling exceptions are dropped (`Skip malformed lines` comment)\n- loop continues, so one bad message does not kill the connection\n\nIf the underlying stream parser throws, `onError` is invoked (when still connected), then connection closes.\n\n## Disconnect/failure behavior\n\nWhen process exits or stream closes:\n\n- all in-flight requests are rejected with `Transport closed`\n- no automatic restart or reconnect\n- higher layers must reconnect by creating a new transport\n\n## Backpressure/streaming notes\n\n- `request()` awaits `stdin.write()` + `flush()` so broken-pipe failures reject the request; `notify()` writes through `writeFrame()`, which does not await and neutralizes async EPIPE rejections.\n- There is no explicit queue or high-watermark management in transport.\n- Inbound processing is stream-driven (`for await` over `readJsonl`), one parsed message at a time.\n\n## HTTP/SSE transport internals\n\n## Lifecycle and connection semantics\n\nHTTP transport has logical connection state, but request path is stateless per HTTP call:\n\n- `connect()` sets `connected=true` (no socket/session handshake)\n- optional server session tracking via `Mcp-Session-Id` header\n- `close()` optionally sends `DELETE` with `Mcp-Session-Id`, aborts SSE listener, emits `onClose`\n\nSo `connected` means \"transport usable\", not \"persistent stream established\".\n\n## Session header behavior\n\n- On POST response, if `Mcp-Session-Id` header is present, transport stores it.\n- Subsequent requests/notifications include `Mcp-Session-Id`.\n- `close()` tries to terminate server session with HTTP DELETE; termination failures are ignored.\n\n## Timeout, cancellation, and auth refresh\n\nFor `request()`:\n\n- timeout uses `AbortController` via `createMCPTimeout` (`OMP_MCP_TIMEOUT_MS` override, else `config.timeout ?? 30000`; `0` disables)\n- external signal, if provided, is merged via `AbortSignal.any([...])`\n- AbortError handling distinguishes caller abort vs timeout\n\nFor `notify()`:\n\n- timeout uses an internal `AbortController` with the same resolved timeout\n- there is no external abort option on the transport interface\n\nFor HTTP OAuth configs managed by `MCPManager`, outbound requests and best-effort server-request responses retry once on `HTTP 401`/`403` if token refresh returns replacement headers.\n\n## HTTP error propagation\n\nOn non-OK response:\n\n- response text is included in thrown error (`HTTP <status>: <text>`)\n- if present, auth hints from `WWW-Authenticate` and `Mcp-Auth-Server` are appended\n\nOn JSON-RPC error object:\n\n- throws `MCP error <code>: <message>`\n\nMalformed JSON body (`response.json()` failure) propagates as parse exception.\n\n## SSE behavior and modes\n\nTwo SSE paths exist:\n\n1. **Per-request SSE response** (`#parseSSEResponse`)\n - used when POST response content type is `text/event-stream`\n - consumes stream until matching response id found\n - can process interleaved notifications during same stream\n\n2. **Background SSE listener** (`startSSEListener()`)\n - optional GET listener for server-initiated notifications and server-to-client requests\n - `connectToServer()` starts it for HTTP/SSE transports after `initialize` and before `notifications/initialized`\n - listener startup waits up to one second, or less for very small request timeouts; `timeout: 0` / `OMP_MCP_TIMEOUT_MS=0` disables that startup deadline\n - if GET returns `405`, another non-OK status, no body, or times out, listener silently disables itself\n\n## Malformed payload and disconnect handling\n\nSSE JSON parsing errors bubble out of `readSseJson` and reject request/listener.\n\n- Request SSE parse errors reject the active request.\n- Background listener errors trigger `onError` (except AbortError), and an established listener ending while still connected triggers `onClose` so the manager can reconnect.\n- Transport does not restart the listener itself; managed connections may reconnect through manager `onClose` handling.\n\n## `json-rpc.ts` utility vs transport abstraction\n\n`src/mcp/json-rpc.ts` provides `callMCP()` and `parseSSE()` helpers for direct HTTP MCP calls (used by Exa integration), not the `MCPTransport` abstraction used by `MCPClient`/`MCPManager`.\n\nNotable differences from `HttpTransport`:\n\n- parses entire response text first, then extracts first `data: ` line (`parseSSE`), with JSON fallback\n- optional caller `AbortSignal` (`CallMcpOptions`), with a hard 60s `AbortSignal.timeout` default when none is given; no session-id handling, no transport lifecycle\n- returns raw JSON-RPC envelope object\n\nThis path is lightweight but less robust than full transport implementation.\n\n## Retry/reconnect responsibilities\n\n## Transport-level\n\nCurrent transport implementations do **not**:\n\n- retry ordinary failed requests, except the HTTP transport's single OAuth-refresh retry when `onAuthError` is wired\n- reconnect after stdio process exit\n- reconnect SSE listeners by themselves\n- resend in-flight requests after disconnect\n\nThey fail fast and propagate errors.\n\n## Manager/tool-bridge level\n\n`MCPManager` wires `transport.onClose` for managed connections and runs `reconnectServer(name)` when a transport closes unexpectedly. Reconnect tears down the stale connection, re-resolves auth/config values, retries with backoff (`500`, `1000`, `2000`, `4000` ms), reloads tools, and preserves stale tools while reconnecting.\n\n`MCPTool` and `DeferredMCPTool` also attempt one reconnect + retry for retriable connection errors during a tool call. This is tool availability recovery, not transport-level retry.\n\n## Failure scenarios summary\n\n- **Malformed stdio message line**: dropped; stream continues.\n- **Stdio stream/process ends**: transport closes; pending requests rejected as `Transport closed`; manager-managed connections trigger reconnect.\n- **HTTP non-2xx**: request/notify throws HTTP error; managed OAuth requests can refresh auth and retry once on 401/403.\n- **Invalid JSON response**: parse exception propagated.\n- **SSE ends without matching id**: request fails with `No response received for request ID ...`.\n- **Timeout**: transport-specific timeout error.\n- **Caller abort**: AbortError/reason propagated from caller signal where the method accepts one.\n\n## Practical boundary rule\n\nIf the concern is message shape, id correlation, or MCP method ordering, it belongs to protocol/client logic.\n\nIf the concern is framing (JSONL vs HTTP/SSE), stream parsing, fetch/spawn lifecycle, timeout clocks, or connection teardown, it belongs to transport implementation.\n",
|
|
34
34
|
"mcp-runtime-lifecycle.md": "# MCP runtime lifecycle\n\nThis document describes how MCP servers are discovered, connected, exposed as tools, refreshed, and torn down in the coding-agent runtime.\n\n## Lifecycle at a glance\n\n1. **SDK startup** kicks off MCP discovery (unless MCP is disabled): headless/SDK sessions await `discoverAndLoadMCPTools()`; interactive sessions (`hasUI: true`) create the manager up front and defer `discoverAndConnect()` until the session is live.\n2. **Discovery** (`loadAllMCPConfigs`) resolves MCP server configs from capability sources, filters disabled/project/Exa entries and browser MCP servers when the built-in browser tool is enabled, and preserves source metadata.\n3. **Manager connect phase** (`MCPManager.connectServers`) starts per-server connect + `tools/list` in parallel.\n4. **Fast startup gate** waits up to 250ms, then may return:\n - fully loaded `MCPTool`s,\n - failures per server,\n - or cached `DeferredMCPTool`s for still-pending servers.\n5. **SDK wiring** merges MCP tools into runtime tool registry for the session.\n6. **Post-connect enrichment** best-effort loads resources, resource templates, prompts, and optional resource subscriptions.\n7. **Live session** can refresh MCP tools via `/mcp` flows (`disconnectAll` + rediscover + `session.refreshMCPTools`) and can reconnect individual servers on transport close or `/mcp reconnect`.\n8. **Teardown** happens when callers invoke `disconnectServer`/`disconnectAll`; manager also clears MCP tool/resource/prompt registrations for disconnected servers.\n\n## Discovery and load phase\n\n### Entry path from SDK\n\n`createAgentSession()` in `src/sdk.ts` performs MCP startup when `enableMCP` is true (default). There are two paths:\n\n- **Headless/SDK** (no UI, no provided manager): awaits `discoverAndLoadMCPTools(cwd, { ... })` and merges the returned tools into the startup `customTools` set.\n- **Interactive/TUI** (`hasUI: true`, no provided manager): constructs `MCPManager` immediately (with cache + auth storage), defers `discoverAndConnect()` to a background task started after the session exists, then binds tools via `session.refreshMCPTools(...)` (disposing the manager if the session was torn down mid-connect).\n\nBoth paths:\n\n- pass `authStorage`, cache storage, `mcp.enableProjectConfig`, and browser-MCP filtering based on the `browser.enabled` setting,\n- always set `filterExa: true`,\n- log per-server load/connect errors,\n- store the manager in `toolSession.mcpManager` and the session result.\n\nIf `enableMCP` is false, MCP discovery is skipped entirely.\n\n### Config discovery and filtering\n\n`loadAllMCPConfigs()` (`src/mcp/config.ts`) loads canonical MCP server items through capability discovery, then converts to legacy `MCPServerConfig`.\n\nFiltering behavior:\n\n- `enableProjectConfig: false` removes project-level entries (`_source.level === \"project\"`).\n- `enabled: false` servers are skipped before connect attempts.\n- Exa servers are filtered out by default and API keys are extracted for native Exa tool integration; browser automation MCP servers are filtered when `filterBrowser` is true.\n\nResult includes both `configs` and `sources` (metadata used later for provider labeling).\n\n### Discovery-level failure behavior\n\n`discoverAndLoadMCPTools()` distinguishes two failure classes:\n\n- **Discovery hard failure** (exception from `manager.discoverAndConnect`, typically from config discovery): returns an empty tool set and one synthetic error `{ path: \".mcp.json\", error }`.\n- **Per-server runtime/connect failure**: manager returns partial success with `errors` map; other servers continue.\n\nSo startup does not fail the whole agent session when individual MCP servers fail.\n\n## Manager state model\n\n`MCPManager` tracks runtime lifecycle with separate registries:\n\n- `#connections: Map<string, MCPServerConnection>` — fully connected servers.\n- `#pendingConnections: Map<string, Promise<MCPServerConnection>>` — handshake in progress.\n- `#pendingToolLoads: Map<string, Promise<{ connection, serverTools }>>` — connected but tools still loading.\n- `#tools: CustomTool[]` — current MCP tool view exposed to callers.\n- `#sources: Map<string, SourceMeta>` — provider/source metadata even before connect completes.\n- `#pendingReconnections: Map<string, Promise<MCPServerConnection | null>>` — reconnects in progress after a dropped transport or explicit reconnect.\n- `#serverConfigs: Map<string, MCPServerConfig>` — original unresolved configs preserved so reconnect can re-resolve credentials without leaking resolved tokens.\n\n`getConnectionStatus(name)` derives status from these maps:\n\n- `connected` if in `#connections`,\n- `connecting` if pending connect, pending tool load, or pending reconnect,\n- `disconnected` otherwise.\n\n## Connection establishment and startup timing\n\n## Per-server connect pipeline\n\nFor each discovered server in `connectServers()`:\n\n1. store/update source metadata,\n2. skip if already connected/pending/reconnecting,\n3. validate transport fields (`validateServerConfig`),\n4. resolve auth/shell substitutions (`#resolveAuthConfig`),\n5. call `connectToServer(name, resolvedConfig)` with manager notification/request handlers,\n6. wire HTTP OAuth refresh and transport `onClose` reconnect handling,\n7. call `listTools(connection)`,\n8. cache tool definitions (`MCPToolCache.set`) best-effort,\n9. best-effort load resources, resource templates, prompts, and subscriptions after tools load.\n\n`connectToServer()` behavior (`src/mcp/client.ts`):\n\n- creates stdio or HTTP/SSE transport,\n- performs MCP `initialize`,\n- for HTTP/SSE, starts the optional background SSE listener before `notifications/initialized`,\n- sends `notifications/initialized`,\n- uses timeout (`OMP_MCP_TIMEOUT_MS`, `config.timeout`, or 30s default; `0` disables the client-side timeout),\n- closes transport on init failure.\n\n### Fast startup gate + deferred fallback\n\n`connectServers()` waits on a race between:\n\n- all connect/tool-load tasks settled, and\n- `STARTUP_TIMEOUT_MS = 250`.\n\nAfter 250ms:\n\n- fulfilled tasks become live `MCPTool`s,\n- rejected tasks produce per-server errors,\n- still-pending tasks:\n - use cached tool definitions if available (`MCPToolCache.get`) to create `DeferredMCPTool`s,\n - otherwise contribute no tools at startup; they stay in flight, and the background continuation registers their tools via `#onToolsChanged` once connect/list finishes (a slow server no longer blocks startup — issue #2100).\n\nThis is a hybrid startup model: fast return with deferred handles when cache is available, late background registration when it is not.\n\n### Background completion behavior\n\nEach pending `toolsPromise` also has a background continuation that eventually:\n\n- replaces that server’s tool slice in manager state via `#replaceServerTools`,\n- writes cache,\n- logs late failures only after startup (`allowBackgroundLogging`).\n\n## Tool exposure and live-session availability\n\n### Startup registration\n\n`discoverAndLoadMCPTools()` converts manager tools into `LoadedCustomTool[]` and decorates paths (`mcp:<server> via <providerName>` when known).\n\n`createAgentSession()` then pushes these tools into `customTools`, which are wrapped and added to the runtime tool registry with names like `mcp__<server>_<tool>`.\n\n### Tool calls\n\n- `MCPTool` calls tools through an already connected `MCPServerConnection`.\n- `DeferredMCPTool` waits for `waitForConnection(server)` before calling; this allows cached tools to exist before connection is ready.\n- Both attempt a reconnect + single retry for retriable connection failures.\n\nBoth return structured tool output and convert remaining transport/tool errors into `MCP error: ...` tool content (abort remains abort).\n\n## Refresh/reload paths (startup vs live reload)\n\n### Initial startup path\n\n- one-time discovery/load in `sdk.ts`,\n- tools are registered in initial session tool registry.\n\n### Interactive reload path\n\n`/mcp reload` path (`src/modes/controllers/mcp-command-controller.ts`) does:\n\n1. `mcpManager.disconnectAll()`,\n2. `mcpManager.discoverAndConnect()`,\n3. `session.refreshMCPTools(mcpManager.getTools())`.\n\n`session.refreshMCPTools()` (`src/session/agent-session.ts`) removes all `mcp__` tools, re-wraps latest MCP tools, and re-activates tool set so MCP changes apply without restarting session.\n\nThere is also a follow-up path for late connections: after waiting for a specific server, if status becomes `connected`, it re-runs `session.refreshMCPTools(...)` so newly available tools are rebound in-session.\n\n## Health, reconnect, and partial failure behavior\n\nCurrent runtime behavior is connection-event driven:\n\n- **No autonomous polling health monitor** in manager/client.\n- **Automatic reconnect is wired to `transport.onClose`** for managed connections.\n- Reconnect retries with backoff (`500`, `1000`, `2000`, `4000` ms), reloads tools, and notifies consumers on success. A crash-storm circuit breaker suspends automatic reconnects for a server after more than 5 reconnect attempts within 30s; manual `/mcp reconnect` resets that history.\n- Tool calls that see retriable connection errors also attempt one reconnect + retry.\n- Reconnect is also explicit via `/mcp reconnect <name>` or broader `/mcp reload`.\n\nOperationally:\n\n- one server failing does not remove tools from healthy servers,\n- connect/list failures are isolated per server,\n- stale tools may remain visible while reconnect is attempted; calls report MCP errors if recovery fails,\n- tool cache, resource/prompt loading, subscriptions, and background updates are best-effort (warnings/errors logged, no hard stop).\n\n## Teardown semantics\n\n### Server-level teardown\n\n`disconnectServer(name)`:\n\n- removes pending entries, source metadata, saved config, resource refresh/subscription state,\n- detaches `onClose` so explicit close does not trigger reconnect,\n- closes transport if connected,\n- removes manager tool entries using the current raw-name prefix filter (`mcp__${name}_`); generated tool names are sanitized by `tool-bridge.ts`.\n\n### Global teardown\n\n`disconnectAll()`:\n\n- detaches `onClose` for all active transports, then closes them with `Promise.allSettled`,\n- clears pending maps, sources, saved configs, connections, subscriptions, resource refreshes, and manager tool list.\n\nIn current wiring, explicit teardown is used in MCP command flows (for reload/remove/disable). Startup stores the manager on the session; callers that need deterministic MCP shutdown should invoke manager disconnect methods.\n\n## Failure modes and guarantees\n\n| Scenario | Behavior | Hard fail vs best-effort |\n| ---------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------- | ------------------------------ |\n| Discovery throws (capability/config load path) | Loader returns empty tools + synthetic `.mcp.json` error | Best-effort session startup |\n| Invalid server config | Server skipped with validation error entry | Best-effort per server |\n| Connect timeout/init failure | Server error recorded; others continue | Best-effort per server |\n| `tools/list` still pending at startup with cache hit | Deferred tools returned immediately | Best-effort fast startup |\n| `tools/list` still pending at startup without cache | No tools at startup; background continuation registers them via `#onToolsChanged` when ready | Best-effort late registration |\n| Late background tool-load failure | Logged after startup gate | Best-effort logging |\n| Runtime dropped transport | Manager attempts reconnect; stale tools remain while reconnecting and future calls may retry once or fail with MCP errors | Best-effort automatic recovery |\n\n## Public API surface\n\n`src/mcp/index.ts` re-exports loader/manager/client APIs for external callers. `src/sdk.ts` exposes `discoverMCPServers()` as a convenience wrapper returning the same loader result shape.\n\n## Implementation files\n\n- [`src/mcp/loader.ts`](../packages/coding-agent/src/mcp/loader.ts) — loader facade, discovery error normalization, `LoadedCustomTool` conversion.\n- [`src/mcp/manager.ts`](../packages/coding-agent/src/mcp/manager.ts) — lifecycle state registries, parallel connect/list flow, refresh/disconnect.\n- [`src/mcp/client.ts`](../packages/coding-agent/src/mcp/client.ts) — transport setup, initialize handshake, list/call/disconnect.\n- [`src/mcp/index.ts`](../packages/coding-agent/src/mcp/index.ts) — MCP module API exports.\n- [`src/sdk.ts`](../packages/coding-agent/src/sdk.ts) — startup wiring into session/tool registry.\n- [`src/mcp/config.ts`](../packages/coding-agent/src/mcp/config.ts) — config discovery/filtering/validation used by manager.\n- [`src/mcp/tool-bridge.ts`](../packages/coding-agent/src/mcp/tool-bridge.ts) — `MCPTool` and `DeferredMCPTool` runtime behavior.\n- [`src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts) — `refreshMCPTools` live rebinding.\n- [`src/modes/controllers/mcp-command-controller.ts`](../packages/coding-agent/src/modes/controllers/mcp-command-controller.ts) — interactive reload/reconnect flows.\n- [`src/task/executor.ts`](../packages/coding-agent/src/task/executor.ts) — subagent MCP proxying via parent manager connections.\n",
|
|
@@ -50,7 +50,7 @@ export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
|
|
|
50
50
|
"porting-from-pi-mono.md": "# Porting From pi-mono: A Practical Merge Guide\n\nThis guide is a repeatable checklist for porting changes from pi-mono into this repo.\nUse it for any merge: single file, feature branch, or full release sync.\n\n## Last Sync Point (historical upstream marker)\n\n**Commit:** `b21b42d032919de2f2e6920a76fa9a37c3920c0a`\n**Date:** 2026-03-22\n\nUpdate this section after each sync; do not reuse the previous range. This commit is an upstream pi-mono marker and may not exist in this repo's local object database.\n\nWhen starting a new sync, generate patches from this commit forward in a pi-mono checkout or remote that contains the commit:\n\n```bash\ngit format-patch b21b42d032919de2f2e6920a76fa9a37c3920c0a..HEAD --stdout > changes.patch\n```\n\n## 0) Define the scope\n\n- Identify the upstream reference (commit, tag, or PR).\n- List the packages or folders you plan to touch.\n- Decide which features are in-scope and which are intentionally skipped.\n\n## 1) Bring code over safely\n\n- Prefer a clean, focused diff rather than a wholesale copy.\n- Avoid copying built artifacts or generated files.\n- If upstream added new files, add them explicitly and review contents.\n\n## 2) Match import extension conventions\n\nMost runtime TypeScript sources omit `.js` in internal imports, but several current entrypoints and tool modules keep `.js` for ESM/runtime compatibility. Follow the surrounding file and package export style; do not blanket-strip or blanket-add extensions.\n\n- In `packages/coding-agent` runtime sources, prefer extensionless internal imports when the surrounding module does, but preserve existing `.js` imports in files that already require them.\n- In `packages/tui/test` and `packages/natives/bench`, keep `.js` where surrounding files already use it.\n- Keep real file extensions when required by tooling or import assertions (e.g., `.json`, `.css`, `.md` text embeds).\n- Example: `import { x } from \"./foo.js\";` → `import { x } from \"./foo\";` only when that package/file convention is extensionless.\n\n## 3) Replace import scopes\n\nUpstream uses different package scopes. Replace them consistently.\n\n- Replace old scopes with the local scope used here.\n- Examples (adjust to match the actual packages you are porting):\n - `@mariozechner/pi-coding-agent` → `@oh-my-pi/pi-coding-agent`\n - `@mariozechner/pi-agent-core` → `@oh-my-pi/pi-agent-core`\n - `@mariozechner/pi-tui` → `@oh-my-pi/pi-tui`\n - `@mariozechner/pi-ai` → `@oh-my-pi/pi-ai`\n - `@mariozechner/pi-utils` → `@oh-my-pi/pi-utils`\n- Some upstream packages publish under the `@earendil-works/*` scope instead of `@mariozechner/*`. Map it the same way (`@earendil-works/pi-coding-agent` → `@oh-my-pi/pi-coding-agent`, and so on).\n- The bare `typebox` package is not an `@oh-my-pi/*` scope; do not rewrite it as one. See the Extensions divergence in section 15 for how tool-parameter schemas map.\n\n## 4) Use Bun APIs where they improve on Node\n\nWe run on Bun, but the current source intentionally mixes Bun APIs with small Node standard-library APIs. Replace Node APIs only when Bun provides a clearer, safer, or simpler implementation; do not mechanically rewrite every Node import.\n\n**Prefer replacing when porting new code:**\n\n- Process spawning: prefer Bun Shell `$` for simple commands; use `Bun.spawn`/`Bun.spawnSync` for streaming or process control. Keep existing `child_process` only where its exact semantics are needed.\n- HTTP clients: `node-fetch`, `axios` → native `fetch`\n- SQLite: `better-sqlite3` → `bun:sqlite`\n- Env loading: `dotenv` → Bun loads `.env` automatically\n- Runtime text/assets: prefer Bun imports such as `with { type: \"text\" }` or `Bun.file()` over copy steps or bundled fallback file reads.\n\n**DO NOT replace (these work fine in Bun):**\n\n- `os.homedir()` — do NOT replace with `Bun.env.HOME` or literal `\"~\"`\n- `os.tmpdir()` — do NOT replace with `Bun.env.TMPDIR || \"/tmp\"` or hardcoded paths\n- `fs.mkdtempSync()` — do NOT replace with manual path construction\n- `path.join()`, `path.resolve()`, etc. — these are fine\n\n**Import style:** Use the `node:` prefix for Node standard-library imports. Namespace imports are common, but named imports are acceptable where the surrounding code already uses them.\n\n**Additional Bun conventions:**\n\n- Prefer Bun Shell `$` for short, non-streaming commands; use `Bun.spawn` only when you need streaming I/O or process control.\n- Use `Bun.file()`/`Bun.write()` for simple files and `node:fs/promises` for directory-oriented operations. Existing synchronous `node:fs` calls are acceptable when the calling flow is intentionally synchronous.\n- Avoid `Bun.file().exists()` checks; use `isEnoent` handling in try/catch.\n- Prefer `Bun.sleep(ms)` over `setTimeout` wrappers.\n\n**Wrong:**\n\n```typescript\n// BROKEN: env vars may be undefined, \"~\" is not expanded\nconst home = Bun.env.HOME || \"~\";\nconst tmp = Bun.env.TMPDIR || \"/tmp\";\n```\n\n**Correct:**\n\n```typescript\nimport * as os from \"node:os\";\nimport * as fs from \"node:fs\";\nimport * as path from \"node:path\";\n\nconst configDir = path.join(os.homedir(), \".config\", \"myapp\");\nconst tempDir = fs.mkdtempSync(path.join(os.tmpdir(), \"myapp-\"));\n```\n\n## 5) Prefer Bun embeds (no copying)\n\nDo not add new runtime asset copy steps. Keep assets in repo and prefer Bun embeds/imports; preserve existing explicit generation workflows such as `packages/coding-agent/src/export/html/tool-views.generated.js` (built from collab-web sources via `bun run build-tool-views`).\n\n- If upstream copies assets into a dist folder, replace with Bun-friendly embeds.\n- Prompts are static `.md` files; use Bun text imports (`with { type: \"text\" }`) and Handlebars instead of inline prompt strings.\n- Use `import.meta.dir` + `Bun.file` to load adjacent non-text resources.\n- Keep assets in-repo and let the bundler include them.\n- Eliminate copy scripts unless the user explicitly requests them or the package already has an intentional generation step.\n- If upstream reads a bundled fallback file at runtime, replace filesystem reads with a Bun text embed import unless the current package already uses a generated asset pipeline.\n - Example (Codex instructions fallback):\n - `const FALLBACK_PROMPT_PATH = join(import.meta.dir, \"codex-instructions.md\");` -> removed\n - `import FALLBACK_INSTRUCTIONS from \"./codex-instructions.md\" with { type: \"text\" };`\n - Use `return FALLBACK_INSTRUCTIONS;` instead of `readFileSync(FALLBACK_PROMPT_PATH, \"utf8\")`\n\n## 6) Port `package.json` carefully\n\nTreat `package.json` as a contract. Merge intentionally.\n\n- Keep existing `name`, `version`, `type`, `exports`, and `bin` unless the port requires changes.\n- Replace npm/node scripts with Bun equivalents (e.g., `bun check`, `bun test`).\n- Ensure dependencies use the correct scope.\n- Do not downgrade dependencies to fix type errors; upgrade instead.\n- Validate workspace package links and `peerDependencies`.\n\n## 7) Align code style and tooling\n\n- Keep existing formatting conventions.\n- Do not introduce `any` unless required.\n- Avoid dynamic imports unless they are required for optional dependencies, startup cost, or runtime-only modules; prefer top-level imports otherwise.\n- Never build prompts in code; prompts are static `.md` files rendered with Handlebars.\n- In `packages/coding-agent`, use `logger` from `@oh-my-pi/pi-utils` for internal/runtime logging; CLI command files may use `console.*` for intentional user-facing output.\n- Use `Promise.withResolvers()` instead of `new Promise((resolve, reject) => ...)`.\n- Prefer ES `#` private fields for new encapsulated state. Constructor parameter properties already exist in current code and are acceptable; do not churn unrelated access modifiers while porting.\n- Prefer existing helpers and utilities over new ad-hoc code.\n Preserve Bun-first infrastructure changes already made in this repo:\n - Runtime is Bun (no Node entry points for the main CLI).\n - Package manager is Bun (no npm lockfiles).\n - Heavy Node APIs should not be introduced casually; current source still uses selected Node APIs (`node:crypto`, `node:readline`, synchronous `node:fs`, and `child_process`) where they fit provider, CLI, or process-control semantics.\n - Lightweight Node APIs (`os.homedir`, `os.tmpdir`, `fs.mkdtempSync`, `path.*`) are kept.\n - CLI shebangs use `bun` (not `node`, not `tsx`).\n - TypeScript packages generally use source files directly; `@oh-my-pi/pi-natives` exports generated native bindings from `packages/natives/native`.\n - CI workflows run Bun for install/check/test.\n\n## 8) Remove old compatibility layers\n\nUnless requested, remove upstream compatibility shims.\n\n- Delete old APIs that were replaced.\n- Update all call sites to the new API directly.\n- Do not keep `*_v2` or parallel versions.\n\n## 9) Update docs and references\n\n- Replace pi-mono repo links where appropriate.\n- Update examples to use Bun and correct package scopes.\n- Ensure README instructions still match the current repo behavior.\n\n## 10) Validate the port\n\nRun the standard checks after changes:\n\n- `bun check`\n\nIf the repo already has failing checks unrelated to your changes, call that out.\nTests use Bun's runner (not Vitest), but only run `bun test` when explicitly requested.\n\n## 11) Protect improved features (regression trap list)\n\nIf you already improved behavior locally, treat those as **non‑negotiable**. Before porting, write down\nthe improvements and add explicit checks so they don’t get lost in the merge.\n\n- **Freeze the expected behavior**: add a short “before/after” note for each improvement (inputs, outputs,\n defaults, edge cases). This prevents silent rollback.\n- **Map old → new APIs**: if upstream renamed concepts (hooks → extensions, custom tools → tools, etc.),\n ensure every old entry point still wires through. One missed flag or export equals lost functionality.\n- **Verify exports**: check `package.json` `exports`, public types, and barrel files. Upstream ports often\n forget to re-export local additions.\n- **Cover non‑happy paths**: if you fixed error handling, timeouts, or fallback logic, add a test or at\n least a manual checklist that exercises those paths.\n- **Check defaults and config merge order**: improvements often live in defaults. Confirm new defaults\n didn’t revert (e.g., new config precedence, disabled features, tool lists).\n- **Audit env/shell behavior**: if you fixed execution or sandboxing, verify the new path still uses your\n sanitized env and does not reintroduce alias/function overrides.\n- **Re-run targeted samples**: keep a minimal set of \"known good\" examples and run them after the port\n (CLI flags, extension registration, tool execution).\n\n## 12) Detect and handle reworked code\n\nBefore porting a file, check if upstream significantly refactored it:\n\n```bash\n# Compare the file you're about to port against what you have locally\ngit diff HEAD upstream/main -- path/to/file.ts\n```\n\nIf the diff shows the file was **reworked** (not just patched):\n\n- New abstractions, renamed concepts, merged modules, changed data flow\n\nThen you must **read the new implementation thoroughly** before porting. Blind merging of reworked code loses functionality because:\n\nNote: interactive mode was recently split into controllers/utils/types. When backporting related changes, port updates into the individual files we created and ensure `interactive-mode.ts` wiring stays in sync.\n\n1. **Defaults change silently** - A new variable `defaultFoo = [a, b]` may replace an old `getAllFoo()` that returned `[a, b, c, d, e]`.\n\n2. **API options get dropped** - When systems merge (e.g., `hooks` + `customTools` → `extensions`), old options may not wire through to the new implementation.\n\n3. **Code paths go stale** - A renamed concept (e.g., `hookMessage` → `custom`) needs updates in every switch statement, type guard, and handler—not just the definition.\n\n4. **Context/capabilities shrink** - Old APIs may have exposed `{ logger, typebox, pi }` that new APIs forgot to include.\n\n### Semantic porting process\n\nWhen upstream reworked a module:\n\n1. **Read the old implementation** - Understand what it did, what options it accepted, what it exposed.\n\n2. **Read the new implementation** - Understand the new abstractions and how they map to old behavior.\n\n3. **Verify feature parity** - For each capability in the old code, confirm the new code preserves it or explicitly removes it.\n\n4. **Grep for stragglers** - Search for old names/concepts that may have been missed in switch statements, handlers, UI components.\n\n5. **Test the boundaries** - CLI flags, SDK options, event handlers, default values—these are where regressions hide.\n\n### Quick checks\n\n```bash\n# Find all uses of an old concept that may need updating\nrg \"oldConceptName\" --type ts\n\n# Compare default values between versions\ngit show upstream/main:path/to/file.ts | rg \"default|DEFAULT\"\n\n# Check if all enum/union values have handlers\nrg \"case \\\"\" path/to/file.ts\n```\n\n## 13) Quick audit checklist\n\nUse this as a final pass before you finish:\n\n- [ ] Import extensions follow the local package convention (no blanket `.js` stripping)\n- [ ] No newly introduced Node-only APIs unless they match an existing justified pattern\n- [ ] All package scopes updated\n- [ ] `package.json` scripts use Bun\n- [ ] Prompts are `.md` text imports (no inline prompt strings)\n- [ ] No internal/runtime `console.*` in coding-agent; CLI user-facing output is intentional\n- [ ] Assets load via Bun embed/import patterns, or through an existing intentional generation pipeline\n- [ ] Tests or checks run (or explicitly noted as blocked)\n- [ ] No functionality regressions (see sections 11-12)\n\n## 14) Commit message format\n\nWhen committing a backport, follow the repo format `<type>(scope): <past-tense description>` and keep the commit\nrange in the title.\n\n```\nfix(coding-agent): backported pi-mono changes (<from>..<to>)\n\npackages/<package>:\n- <type>: <description>\n- <type>: <description> (#<issue> by @<contributor>)\n\npackages/<other-package>:\n- <type>: <description>\n```\n\n**Example:**\n\n```\nfix(coding-agent): backported pi-mono changes (9f3eef65f..52532c7c0)\n\npackages/ai:\n- fix: handle \"sensitive\" stop reason from Anthropic API\n- fix: normalize tool call IDs with special characters for Responses API\n- fix: add overflow detection for Bedrock, MiniMax, Kimi providers\n- fix: 429 status is rate limiting, not context overflow\n\npackages/tui:\n- fix: refactored autocomplete state tracking\n- fix: file autocomplete should not trigger on empty text\n- fix: configurable autocomplete max visible items\n- fix: improved table column width calculation with word-aware wrapping\n\npackages/coding-agent:\n- fix: preserve external config.yml edits on save (#1046 by @nicobailonMD)\n- fix: resolve macOS NFD and curly quote variants in file paths\n```\n\n**Rules:**\n\n- Group changes by package\n- Use conventional commit types (`fix`, `feat`, `refactor`, `perf`, `docs`)\n- Include upstream issue/PR numbers and contributor attribution for external contributions\n- The commit range in the title helps track sync points\n\n## 15) Intentional Divergences\n\nOur fork has architectural decisions that differ from upstream. **Do not port these upstream patterns:**\n\n### UI Architecture\n\n| Upstream | Our Fork | Reason |\n| ------------------------------------------- | --------------------------------------------------------- | --------------------------------------------------------------------- |\n| `FooterDataProvider` class | `StatusLineComponent` | Simpler, integrated status line |\n| `ctx.ui.setHeader()` / `ctx.ui.setFooter()` | No-op stubs in current extension contexts | Not currently wired to replace the TUI status/header UI |\n| `ctx.ui.setEditorComponent()` | No-op stubs in current extension contexts | Custom editor replacement is not currently wired |\n| `InteractiveModeOptions` options object | Positional constructor args (options type still exported) | Keep constructor signature; update the type when upstream adds fields |\n\n### Component Naming\n\n| Upstream | Our Fork |\n| ---------------------------- | ----------------------- |\n| `extension-input.ts` | `hook-input.ts` |\n| `extension-selector.ts` | `hook-selector.ts` |\n| `ExtensionInputComponent` | `HookInputComponent` |\n| `ExtensionSelectorComponent` | `HookSelectorComponent` |\n\n### API Naming\n\n| Upstream | Our Fork | Notes |\n| ---------------------------------------- | ---------------------------------------- | ----------------------------------------- |\n| `sessionManager.appendSessionInfo(name)` | `sessionManager.setSessionName(name)` | We use `sessionName` throughout |\n| `sessionManager.getSessionName()` | `sessionManager.getSessionName()` | Same (we unified to match upstream's RPC) |\n| `agent.sessionName` / `setSessionName()` | `agent.sessionName` / `setSessionName()` | Same |\n\n### File Consolidation\n\n| Upstream | Our Fork | Reason |\n| -------------------------------------------------- | --------------------------------------------------------- | --------------------------------------------- |\n| `clipboard.ts` + `clipboard-image.ts` (tool files) | `src/utils/clipboard.ts` backed by `@oh-my-pi/pi-natives` | Native implementation with a small TS wrapper |\n\n### Test Framework\n\n| Upstream | Our Fork |\n| ------------------------- | ----------------------------- |\n| `vitest` with `vi.mock()` | `bun:test` with `vi` from bun |\n| `node:test` assertions | `expect()` matchers |\n\n### Tool Architecture\n\n| Upstream | Our Fork | Notes |\n| ----------------------------------- | ------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------- |\n| `createTool(cwd: string, options?)` | `createTools(session: ToolSession)` via `BUILTIN_TOOLS` registry | Tool factories accept `ToolSession` and can return `null` |\n| Per-tool `*Operations` interfaces | Only current per-tool override interfaces remain (for example `FindOperations`) | Used for SSH/remote overrides where present |\n| Node.js `fs/promises` everywhere | Bun file APIs for simple file writes/reads, `node:fs/promises` for dirs, selected sync `node:fs` where needed | Prefer Bun APIs when they simplify |\n\n### Auth Storage\n\n| Upstream | Our Fork | Notes |\n| ------------------------------- | ------------------------------------------- | -------------------------------------------- |\n| `proper-lockfile` + `auth.json` | `agent.db` (bun:sqlite) | Credentials stored exclusively in `agent.db` |\n| Single credential per provider | Multi-credential with round-robin selection | Session affinity and backoff logic preserved |\n\n### Extensions\n\n| Upstream | Our Fork |\n| ---------------------------------------------------------------- | ------------------------------------------------------------------------------------------------- |\n| `jiti` for TypeScript loading | Native Bun `import()` |\n| `pkg.pi` manifest field | `pkg.omp` preferred; fallback to `pkg.pi` remains |\n| `StringEnum` from `pi-ai` | `Type.Enum` from the `pi.typebox` shim (or author the schema with `pi.zod`); `pi-ai` no longer exports `StringEnum` |\n| `formatSize` from `pi-coding-agent` | `formatBytes` from `@oh-my-pi/pi-utils` |\n| `DefaultResourceLoader` / `DefaultPackageManager` / `SettingsManager` / `createEventBus` | Capability-based discovery (`loadCapability(...)`) plus the `Settings` singleton and `EventBus` |\n\n### Skip These Upstream Features\n\nWhen porting, **skip** these files/features entirely:\n\n- `footer-data-provider.ts` — we use StatusLineComponent\n- `clipboard-image.ts` — image clipboard support is exposed through `src/utils/clipboard.ts` backed by `@oh-my-pi/pi-natives`\n- GitHub workflow files — we have our own CI\n- `models.generated.ts` — auto-generated, regenerate locally (as models.json instead)\n\n### Features We Added (Preserve These)\n\nThese exist in our fork but not upstream. **Never overwrite:**\n\n- `StatusLineComponent` in interactive mode\n- Multi-credential auth with session affinity\n- Capability-based discovery system (`defineCapability`, `registerProvider`, `loadCapability`, `skillCapability`, etc.)\n- MCP/Exa/SSH integrations\n- LSP writethrough for format-on-save\n- Bash interception (`checkBashInterception`)\n- Fuzzy path suggestions in read tool\n",
|
|
51
51
|
"porting-to-natives.md": "# Porting to pi-natives (N-API) — Field Notes\n\nThis is a practical guide for moving hot paths into `crates/pi-natives` and wiring them through the generated native package entrypoint. It exists to avoid the same failures happening twice.\n\n## When to port\n\nPort when any of these are true:\n\n- The hot path runs in render loops, tight UI updates, or large batches.\n- JS allocations dominate (string churn, regex backtracking, large arrays).\n- You already have a JS baseline and can benchmark both versions side by side.\n- The work is CPU-bound or blocking I/O that can run on the libuv thread pool.\n- The work is async I/O that can run on Tokio's runtime (for example shell execution).\n\nAvoid ports that depend on JS-only state or dynamic imports. N-API exports should be data-in/data-out. Long-running work should go through `task::blocking` (CPU-bound/blocking I/O) or `task::future` (async I/O) with cancellation where the caller needs `timeoutMs` or `AbortSignal`.\n\n## Current package shape\n\n`@oh-my-pi/pi-natives` no longer has a `packages/natives/src/<module>` TypeScript wrapper layer. The package root points at generated native artifacts:\n\n- runtime entry/export wrapper: `packages/natives/native/index.js`\n- types entry: `packages/natives/native/index.d.ts`\n- loader helpers: `packages/natives/native/loader-state.js`\n- embedded manifest: `packages/natives/native/embedded-addon.js`\n\nConsumers import directly from `@oh-my-pi/pi-natives`. The generated declarations and explicit ESM exports are produced during `bun --cwd=packages/natives run build`.\n\n## Anatomy of a native export\n\n**Rust side:**\n\n- Implementation lives in `crates/pi-natives/src/<module>.rs`.\n- If you add a new module, register it in `crates/pi-natives/src/lib.rs`.\n- Export with `#[napi]`; snake_case exports are converted to camelCase automatically. Use explicit JS names only for true aliases/non-default names. Use `#[napi(object)]` for object-shaped structs.\n- For CPU-bound or blocking work, use `task::blocking(tag, cancel_token, work)`.\n- For async work that needs Tokio, use `task::future(env, tag, work)`.\n- Pass a `CancelToken` when the API exposes `timeoutMs` or `AbortSignal`, and call `heartbeat()` inside long loops.\n\n**Package/build side:**\n\n- `packages/natives/scripts/build-native.ts` runs napi-rs, installs the `.node` artifact, copies generated `index.d.ts`, and regenerates explicit ESM class/function exports plus enum runtime exports in the checked-in `native/index.js`.\n- `packages/natives/native/index.js` is the ESM entrypoint that calls the loader, exposes named exports, and rejects install/compiled `.node` files that do not expose the package-version sentinel.\n- `packages/natives/package.json` exposes only the package root (`@oh-my-pi/pi-natives`) as the import surface. At publish time the binaries are split out: the core ships the loader only (no `.node`), and each platform's `.node` is published as an optional-dependency leaf package `@oh-my-pi/pi-natives-<tag>` (`scripts/ci-release-publish.ts` + `packages/natives/scripts/gen-npm-packages.ts`). This is transparent to importers — you still `import` from `@oh-my-pi/pi-natives`.\n\n**Consumer side:**\n\n- Update direct imports/callsites in `packages/coding-agent` or `packages/tui` when the new export replaces a JS implementation.\n- Keep higher-level policy in consumers unless it belongs in the native primitive itself.\n\n## Porting checklist\n\n1. **Add the Rust implementation**\n\n- Put the core logic in a plain Rust function.\n- If it is a new module, add it to `crates/pi-natives/src/lib.rs`.\n- Expose it with `#[napi]` so the default snake_case -> camelCase mapping stays consistent.\n- Keep signatures owned and simple: `String`, `Vec<String>`, `Uint8Array`, `Either<JsString, Uint8Array>`, or `#[napi(object)]` structs.\n- For CPU-bound or blocking work, use `task::blocking`; for async work, use `task::future`.\n- If exposing cancellation, include `timeout_ms: Option<u32>` and `signal: Option<Unknown<'env>>` in options, create `CancelToken::new(...)`, and heartbeat in long loops.\n\n2. **Build generated bindings**\n\n- Run `bun --cwd=packages/natives run build`.\n- Confirm the generated `packages/natives/native/index.d.ts` includes the new export with the intended JS name/signature.\n- Confirm `packages/natives/native/index.js` has generated explicit ESM exports for the new class/function and enum objects when enum changes are involved.\n\n3. **Update consumers**\n\n- Import the new export directly from `@oh-my-pi/pi-natives`.\n- Replace only callsites where the native implementation is faster/equivalent and preserves behavior.\n- Remove obsolete JS implementation code in the same change when the native path becomes canonical.\n\n4. **Add benchmarks**\n\n- Put benchmarks next to the owning package (`packages/tui/bench`, `packages/natives/bench`, or `packages/coding-agent/bench`).\n- Include a JS baseline and native version in the same run.\n- Use `Bun.nanoseconds()` and a fixed iteration count.\n- Keep benchmark inputs realistic for the hot path.\n\n5. **Run focused verification**\n\n- Build the native package.\n- Run the benchmark.\n- Run the narrow tests or scenario covering the changed export/callsites.\n\n## Pain points and how to avoid them\n\n### 1) Stale platform/variant artifacts\n\nThe loader probes platform-tagged artifacts in deterministic order. For x64, selected variant candidates are tried before the unsuffixed default fallback:\n\n- `modern`: `pi_natives.<tag>-modern.node`, then `...-baseline.node`, then `pi_natives.<tag>.node`.\n- `baseline`: `pi_natives.<tag>-baseline.node`, then `pi_natives.<tag>.node`.\n\nNon-x64 uses `pi_natives.<tag>.node`.\n\nCompiled binaries also probe `<getNativesDir()>/<version>/...` and a legacy user-data directory before package/executable locations. Windows `node_modules` installs stage leaf/core addons into the same versioned directory before probing. If any earlier candidate is stale, a new export may appear missing unless the version sentinel rejects it first.\n\n**Fix:** remove stale candidate/cache files and rebuild.\n\n```bash\nrm packages/natives/native/pi_natives.<platform>-<arch>.node\nrm packages/natives/native/pi_natives.<platform>-<arch>-modern.node\nrm packages/natives/native/pi_natives.<platform>-<arch>-baseline.node\nbun --cwd=packages/natives run build\n```\n\nFor compiled binaries or Windows staging, delete the versioned addon cache shown in the loader error (normally under `~/.omp/natives/<version>` unless `$XDG_DATA_HOME/omp` is used).\n\n### 2) Generated types do not match loaded binary\n\nThis can happen when `native/index.d.ts` was regenerated but the `.node` file being loaded is stale, same-version incomplete, or from a different platform/variant. Different-version install/compiled binaries should be rejected by the version sentinel during loading.\n\nVerify the loaded export set from the actual candidate path reported by the loader:\n\n```bash\nbun -e 'import { createRequire } from \"node:module\"; const require = createRequire(import.meta.url); const mod = require(process.argv[2]); console.log(Object.keys(mod).sort())' -- /path/from/loader/error/pi_natives.<tag>[-variant].node\n```\n\nFix the build/candidate mismatch. Do not paper over it with optional consumer checks if the export is required.\n\n### 3) Rust signature mismatch\n\nKeep N-API signatures simple and owned. Avoid borrowed references like `&str` in public exports. If you need structured data, use `#[napi(object)]` structs. If you need callbacks, use napi-rs `ThreadsafeFunction` and keep callback error/value behavior explicit.\n\n### 4) Enum runtime exports and ESM named exports\n\nnapi-rs declarations alone are not enough for JS callers that import named symbols or use enum objects at runtime. `scripts/gen-enums.ts` reads `native/index.d.ts`, writes explicit `export const ... = nativeBindings...` entries for public classes/functions, and emits enum objects in `native/index.js`. If you add or change a native export, verify both `native/index.d.ts` and the generated export block in `native/index.js`.\n\n### 5) Benchmarking mistakes\n\n- Do not compare different inputs or allocations.\n- Keep JS and native using identical input arrays.\n- Run both in the same benchmark file to avoid skew.\n- Include enough iterations to smooth startup noise, but keep inputs realistic.\n\n## Benchmark template\n\n```ts\nconst ITERATIONS = 2000;\n\nfunction bench(name: string, fn: () => void): number {\n const start = Bun.nanoseconds();\n for (let i = 0; i < ITERATIONS; i++) fn();\n const elapsed = (Bun.nanoseconds() - start) / 1e6;\n console.log(\n `${name}: ${elapsed.toFixed(2)}ms total (${(elapsed / ITERATIONS).toFixed(6)}ms/op)`,\n );\n return elapsed;\n}\n\nbench(\"feature/js\", () => {\n jsImpl(sample);\n});\n\nbench(\"feature/native\", () => {\n nativeImpl(sample);\n});\n```\n\n## Verification checklist\n\n- Generated `native/index.d.ts` includes the new export and intended TS signature.\n- `native/index.js` includes the generated named export; enum objects are present when the change adds/changes enums.\n- The loaded `.node` file's `Object.keys(require(candidate))` includes the new export and the package-version sentinel.\n- Bench numbers are recorded in the PR/notes.\n- Call sites are updated only if native is faster/equal and behavior-compatible.\n- Obsolete JS code is removed when the native implementation becomes canonical.\n\n## Rule of thumb\n\n- If native is slower, do not switch callsites. Keep or remove the export based on whether it has a near-term owner.\n- If native is faster and behavior-compatible, switch callsites and keep a benchmark to catch regressions.\n",
|
|
52
52
|
"provider-streaming-internals.md": "# Provider streaming internals\n\nThis document explains how token/tool streaming is normalized in `@oh-my-pi/pi-ai`, then propagated through `@oh-my-pi/pi-agent-core` and `coding-agent` session events.\n\n## End-to-end flow\n\n1. `streamSimple()` (`packages/ai/src/stream.ts`) maps generic options and dispatches to a provider stream function.\n2. Provider stream functions translate provider-native stream events into the unified `AssistantMessageEvent` sequence. Current built-ins include Anthropic, OpenAI Responses/Completions/Codex/Azure Responses, Google Gemini/Gemini CLI/Vertex, Bedrock Converse, Ollama, Cursor, pi-native gateway transport, plus GitLab Duo/Kimi/Synthetic/xAI-Grok-Responses wrappers and extension-registered custom APIs.\n3. Each provider pushes events into `AssistantMessageEventStream` (`packages/ai/src/utils/event-stream.ts`), which exposes:\n - async iteration for incremental updates\n - `result()` for final `AssistantMessage`\n4. `agentLoop` (`packages/agent/src/agent-loop.ts`) consumes those events, mutates in-flight assistant state, and emits `message_update` events carrying the raw `assistantMessageEvent`.\n5. `AgentSession` (`packages/coding-agent/src/session/agent-session.ts`) subscribes to agent events, persists messages, drives extension hooks, and applies session behaviors (retry, compaction, TTSR, streaming-edit abort checks).\n\n## Unified stream contract in `@oh-my-pi/pi-ai`\n\nAll providers emit the same shape (`AssistantMessageEvent` in `packages/ai/src/types.ts`):\n\n- `start`\n- content block lifecycle triplets:\n - text: `text_start` → `text_delta`\\* → `text_end`\n - thinking: `thinking_start` → `thinking_delta`\\* → `thinking_end`\n - tool call: `toolcall_start` → `toolcall_delta`\\* → `toolcall_end`\n- terminal event:\n - `done` with `reason: \"stop\" | \"length\" | \"toolUse\"`\n - or `error` with `reason: \"aborted\" | \"error\"`\n\n`AssistantMessageEventStream` guarantees:\n\n- final result is resolved by terminal event (`done` or `error`)\n- events are delivered to consumers immediately, in push order (no batching or merging)\n\n## Delta throttling behavior\n\n`AssistantMessageEventStream` itself no longer throttles or merges delta events — every provider event is delivered as pushed. The per-delta cost control moved into tool-call argument parsing: providers accumulate partial JSON and re-parse it via `parseStreamingJsonThrottled()` (`packages/ai/src/utils/json-parse.ts`), which skips the re-parse until at least `STREAMING_JSON_PARSE_MIN_GROWTH` (256) new bytes have arrived, bounding mid-stream parse cost from quadratic to linear. The final `toolcall_end` parse is always unconditional and authoritative.\n\nThere is no provider backpressure: providers still produce at full speed, while the local stream queues.\n\n## Provider normalization details\n\n## Anthropic (`anthropic-messages`)\n\nSource: `packages/ai/src/providers/anthropic.ts`\n\nNormalization points:\n\n- `message_start` initializes usage (input/output/cache tokens)\n- `content_block_start` maps to text/thinking/toolcall starts\n- `content_block_delta` maps:\n - `text_delta` → `text_delta`\n - `thinking_delta` → `thinking_delta`\n - `input_json_delta` → `toolcall_delta`\n - `signature_delta` updates `thinkingSignature` only (no event)\n- `content_block_stop` emits corresponding `*_end`\n- `message_delta.stop_reason` maps via `mapStopReason()`\n\nTool-call argument streaming:\n\n- each tool block carries internal `partialJson`\n- every JSON delta appends to `partialJson`\n- `arguments` are reparsed on appended deltas via `parseStreamingJsonThrottled()` (re-parse only after ≥256 new bytes)\n- `toolcall_end` reparses once more, then strips `partialJson`\n\n## OpenAI Responses family (`openai-responses`, `openai-codex-responses`, `azure-openai-responses`)\n\nSources: `packages/ai/src/providers/openai-responses.ts`, `openai-codex-responses.ts`, and `azure-openai-responses.ts`\n\nNormalization points:\n\n- `response.output_item.added` starts reasoning/text/function-call/custom-tool blocks\n- reasoning summary events (`response.reasoning_summary_text.delta`) and raw reasoning events (`response.reasoning_text.delta`) become `thinking_delta`\n- output/refusal deltas become `text_delta`\n- `response.function_call_arguments.delta` and `response.custom_tool_call_input.delta` become `toolcall_delta`\n- `response.output_item.done` emits `thinking_end` / `text_end` / `toolcall_end`\n- `response.completed` maps status to stop reason and usage; `response.failed` / SDK `error` events throw into the wrapper's terminal `error` path\n\nTool-call argument streaming:\n\n- same `partialJson` accumulation pattern as Anthropic for function-call JSON arguments\n- custom tools stream raw string input and expose final arguments as `{ input: <raw> }`\n- providers that send only `response.function_call_arguments.done` still populate final args\n- tool call IDs are normalized as `\"<call_id>|<item_id>\"`\n\n## Google Generative AI (`google-generative-ai`)\n\nSource: `packages/ai/src/providers/google.ts` (thin request wrapper) and `google-shared.ts` (`streamGoogleGenAI`, shared chunk-to-block translation)\n\nNormalization points:\n\n- iterates `candidate.content.parts`\n- text parts are split into thinking vs text by `isThinkingPart(part)`\n- block transitions close previous block before starting a new one\n- `part.functionCall` is treated as a complete tool call (start/delta/end emitted immediately)\n- finish reason mapped by `mapStopReason()` from `google-shared.ts`\n\nTool-call argument streaming:\n\n- function call args arrive as structured object, not incremental JSON text\n- implementation emits one synthetic `toolcall_delta` containing `JSON.stringify(arguments)`\n- no partial JSON parser needed for Google in this path\n\n## Partial tool-call JSON accumulation and recovery\n\nShared behavior for Anthropic/OpenAI Responses uses `parseStreamingJson()` / `parseStreamingJsonThrottled()` (`packages/ai/src/utils/json-parse.ts`):\n\n1. try `JSON.parse`\n2. fallback to `repairJson()` + the `partial-json` parser for incomplete fragments\n3. if both fail, return `{}`\n\nImplications:\n\n- malformed or truncated argument deltas do not crash stream processing immediately\n- in-progress `arguments` may temporarily be `{}`\n- later valid deltas can recover structured arguments because parsing is retried as the buffer grows (throttled to ≥256-byte growth steps mid-stream)\n- final `toolcall_end` performs one more parse attempt before emission\n\n## Stop reasons vs transport/runtime errors\n\nProvider stop reasons are mapped to normalized `stopReason`:\n\n- Anthropic: `end_turn`→`stop`, `max_tokens`→`length`, `tool_use`→`toolUse`, safety/refusal cases→`error`\n- OpenAI Responses: `completed`→`stop`, `incomplete`→`length`, `failed/cancelled`→`error`\n- Google: `STOP`→`stop`, `MAX_TOKENS`→`length`, safety/prohibited/malformed-function-call classes→`error`\n\nError semantics are split in two stages:\n\n1. **Model completion semantics** (provider reported finish reason/status)\n2. **Transport/runtime failure** (network/client/parser/abort exceptions)\n\nIf provider stream throws or signals failure, each provider wrapper catches and emits terminal `error` event with:\n\n- `stopReason = \"aborted\"` when abort signal is set\n- otherwise `stopReason = \"error\"`\n- `errorMessage = formatErrorMessageWithRetryAfter(error)`\n\n## Malformed chunk / SSE parse failure behavior\n\nThe OpenAI Completions/Responses paths delegate chunk/SSE framing to the `openai` SDK stream. Anthropic uses the in-repo `AnthropicMessagesClient` (`packages/ai/src/providers/anthropic-client.ts`); the Google paths and the Codex SSE fallback read SSE via `readSseJson()` directly, and websocket Codex frames are normalized through the same event handler.\n\nObserved behavior in current implementation:\n\n- malformed SDK stream parsing surfaces as an exception or stream `error` event\n- malformed Codex SSE JSON/framing throws from the local SSE reader\n- provider wrapper converts failures into unified terminal `error` events\n- no provider-specific resume/retry inside the stream function itself, except Codex websocket-to-SSE transport fallback before replay-unsafe output is emitted\n- higher-level retries are handled in `AgentSession` auto-retry logic (message-level retry, not stream-chunk replay)\n\n## Cancellation boundaries\n\nCancellation is layered:\n\n- AI provider request: `options.signal` is passed into provider client stream call.\n- Provider wrapper: after stream loop, aborted signal forces error path (`\"Request was aborted\"`).\n- Agent loop: checks `signal.aborted` before handling each provider event and can synthesize an aborted assistant message from the latest partial.\n- Session/agent controls: `AgentSession.abort()` -> `agent.abort()` -> shared abort controller cancellation.\n\nTool execution cancellation is separate from model stream cancellation:\n\n- tool runners use `AbortSignal.any([agentSignal, steeringAbortSignal])`\n- steering interrupts can abort remaining tool execution while preserving already-produced tool results\n\n## Backpressure boundaries\n\nThere is no hard backpressure mechanism between provider SDK stream and downstream consumers:\n\n- `EventStream` uses in-memory queues with no max size\n- the throttled partial-JSON re-parse reduces per-delta CPU cost but does not slow provider intake\n- if consumers lag significantly, queued events can grow until completion\n\nCurrent design favors responsiveness and simple ordering over bounded-buffer flow control.\n\n## How stream events surface as agent/session events\n\n`agentLoop.streamAssistantResponse()` bridges `AssistantMessageEvent` to `AgentEvent`:\n\n- on `start`: pushes placeholder assistant message and emits `message_start`\n- on block events (`text_*`, `thinking_*`, `toolcall_*`): updates last assistant message, emits `message_update` with raw `assistantMessageEvent`\n- on terminal (`done`/`error`): resolves final message from `response.result()`, emits `message_end`\n\n`AgentSession` then consumes those events for session-level behaviors:\n\n- TTSR watches `message_update.assistantMessageEvent` for `text_delta`, `thinking_delta`, and `toolcall_delta`\n- streaming edit guard inspects `toolcall_delta`/`toolcall_end` on `edit` calls and can abort early\n- persistence writes finalized messages at `message_end`\n- auto-retry examines assistant `stopReason === \"error\"` plus `errorMessage` heuristics\n\n## Unified vs provider-specific responsibilities\n\nUnified (common contract):\n\n- event shape (`AssistantMessageEvent`)\n- final result extraction (`done`/`error`)\n- immediate in-order event delivery\n- agent/session event propagation model\n\nProvider-specific (not fully abstracted):\n\n- upstream event taxonomies and mapping logic\n- stop-reason translation tables\n- tool-call ID conventions\n- reasoning/thinking block semantics and signatures\n- usage token semantics and availability timing\n- message conversion constraints per API\n\n## Implementation files\n\n- [`../../ai/src/stream.ts`](../packages/ai/src/stream.ts) — provider dispatch, option mapping, API key/session plumbing, custom API dispatch, and provider-specific credential handling.\n- [`../../ai/src/utils/event-stream.ts`](../packages/ai/src/utils/event-stream.ts) — generic stream queue + final-result resolution.\n- [`../../ai/src/utils/json-parse.ts`](../packages/ai/src/utils/json-parse.ts) — partial JSON parsing for streamed tool arguments.\n- [`../../ai/src/providers/anthropic.ts`](../packages/ai/src/providers/anthropic.ts) — Anthropic event translation and tool JSON delta accumulation.\n- [`../../ai/src/providers/openai-responses.ts`](../packages/ai/src/providers/openai-responses.ts), [`openai-responses-shared.ts`](../packages/ai/src/providers/openai-responses-shared.ts), [`openai-codex-responses.ts`](../packages/ai/src/providers/openai-codex-responses.ts), [`azure-openai-responses.ts`](../packages/ai/src/providers/azure-openai-responses.ts) — Responses-family event translation and status mapping.\n- [`../../ai/src/providers/google.ts`](../packages/ai/src/providers/google.ts), [`google-gemini-cli.ts`](../packages/ai/src/providers/google-gemini-cli.ts), [`google-vertex.ts`](../packages/ai/src/providers/google-vertex.ts) — Gemini stream chunk-to-block translation variants.\n- [`../../ai/src/providers/google-shared.ts`](../packages/ai/src/providers/google-shared.ts) — Gemini finish-reason mapping and shared conversion rules.\n- [`../../ai/src/providers/amazon-bedrock.ts`](../packages/ai/src/providers/amazon-bedrock.ts), [`openai-completions.ts`](../packages/ai/src/providers/openai-completions.ts), [`ollama.ts`](../packages/ai/src/providers/ollama.ts), [`cursor.ts`](../packages/ai/src/providers/cursor.ts), [`pi-native-client.ts`](../packages/ai/src/providers/pi-native-client.ts) — additional built-in stream adapters using the same event contract.\n- [`../../agent/src/agent-loop.ts`](../packages/agent/src/agent-loop.ts) — provider stream consumption and `message_update` bridging.\n- [`../src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts) — session-level handling of streaming updates, abort, retry, and persistence.\n",
|
|
53
|
-
"providers.md": "# Providers\n\nProviders are the model backends `omp` can route requests to: Anthropic, OpenAI, Google Gemini, Groq, OpenRouter, Mistral, xAI, local engines like Ollama, hosted gateways, custom `models.yml` providers, and providers registered by extensions.\n\nA **provider** is the account or backend namespace, such as `anthropic`, `openai`, `google`, or `ollama`. A **model** is a concrete model under that provider, selected as `provider/model-id`, such as `anthropic/claude-opus-4-6`. Disabling a provider removes every model under it from selection; if you only want to narrow individual models, use model settings instead.\n\nThis page covers how providers become available, how credentials are resolved, the provider/environment-variable map, local engines, disabling providers, and custom providers. For model selection and the full `models.yml` schema, see [Model and Provider Configuration](./models.md). For config-file locations and merge precedence, see [Settings](./settings.md). For credential storage and login flows in depth, see [Secrets and credentials](./secrets.md). For the complete environment-variable reference, see [Environment variables](./environment-variables.md). For local engine setup, see [Local models](./local-models.md). For context-file discovery providers, see [Context files](./context-files.md).\n\n## How `omp` decides a provider is available\n\nAt startup the model registry assembles its catalog from four sources, in order:\n\n1. The bundled model catalog (every built-in provider and its known models).\n2. Custom provider and model entries from `~/.omp/agent/models.yml`.\n3. Runtime-discovered models for providers that support discovery (local engines and discovery-enabled gateways).\n4. Providers and models registered by extensions.\n\nThe registry can hold a model even when it is not currently selectable. A model becomes **available** only when both conditions hold:\n\n1. its provider ID is **not** in the effective `disabledProviders` list; **and**\n2. the provider is either **keyless** (an implicit local provider, or a custom provider with `auth: none`) **or** has resolvable credentials.\n\n`disabledProviders` is checked *before* credentials. If a provider ID is disabled, no stored key, OAuth session, environment variable, `.env` entry, or `models.yml` `apiKey` will make it selectable — the provider's models are dropped from availability regardless of credentials. Removing the ID from the effective list restores them.\n\nKeyless local engines are a special case: `ollama`, `llama.cpp`, and `lm-studio` are treated as keyless when no key is configured, so their discovered models are selectable as soon as the engine answers — no login required. See [Built-in local engines](#built-in-local-engines).\n\n## Credentials and precedence\n\nWhen a provider needs an API key, `omp` resolves it in this order (first match wins):\n\n1. **Runtime override** — a key supplied for the current process, e.g. CLI `--api-key`. Never persisted.\n2. **`models.yml` config key** — an `apiKey` pinned on a custom provider, registered as a config-sourced bearer. This deliberately beats stored OAuth, so a key supplied for a custom `baseUrl`/gateway is honored instead of forwarding an upstream OAuth token the proxy would reject.\n3. **Stored API key** — an API-key credential saved in the auth store.\n4. **Stored OAuth credential** — refreshed when needed; multiple accounts are ranked/rotated automatically.\n5. **Provider environment variable** — including values loaded from `.env` files (see [the env-var table](#environment-variables-and-env-files)).\n6. **`models.yml` fallback resolver** — keys for custom providers not otherwise registered.\n\nStored credentials live in the auth store at `~/.omp/agent/agent.db` for local auth, or in the configured auth-broker snapshot when running in broker mode. (`PI_CODING_AGENT_DIR` relocates the `~/.omp/agent` base, and the auth store moves with it.)\n\n### OAuth vs API key, and provider-scoped logins\n\nLogins are **provider-scoped**: authenticating `anthropic` does not authenticate `openai`, and each provider tracks its own credentials. A disabled provider stays disabled even with valid stored auth.\n\nUse the interactive slash commands inside a session:\n\n- `/login` — opens the OAuth/key selector. `/login <provider>` jumps straight to one provider (e.g. `/login anthropic`); for an OAuth flow that needs a pasted callback, run `/login <redirect-url>` to complete it.\n- `/logout` — opens the provider selector to remove stored credentials.\n\nFor headless or remote setups backed by a shared auth broker, the CLI exposes `omp auth-broker login <provider>` / `omp auth-broker logout` (and `status`, `list`, `import`, `migrate`). See [Secrets and credentials](./secrets.md) for the broker model.\n\nWhen a model has no credentials, `omp` tells you to run `/login` or set the provider's environment variable.\n\n### Pinning a key in `models.yml`\n\nA custom provider's `apiKey` is resolved as **environment-variable-name-or-literal**: if the value names an existing environment variable, that variable's value is used; otherwise the string itself is the key. Prefixing the value with `!` runs it as a shell command and uses the trimmed stdout (see [Model and Provider Configuration](./models.md) for the full value syntax).\n\n```yaml\n# ~/.omp/agent/models.yml\nproviders:\n my-gateway:\n baseUrl: https://gateway.example.com/v1\n api: openai-completions\n apiKey: MY_GATEWAY_API_KEY # reads this env var if set, else literal text\n models:\n - id: claude-sonnet\n name: Claude Sonnet via Gateway\n contextWindow: 200000\n maxTokens: 8192\n```\n\nIf `authHeader: true` is set on a custom provider, the resolved key is injected as an `Authorization: Bearer <key>` header on every request to that provider.\n\n## Environment variables and `.env` files\n\nEach provider has one or more environment variables that supply a key when no stored credential exists. The table below is the verified provider → variable map; the full catalog is large, so it is split into core and additional providers. OAuth-backed providers can also accept a token variable in addition to (or instead of) an API key.\n\n### Core providers\n\n| Provider ID | Environment variable(s) |\n|---|---|\n| `anthropic` | `ANTHROPIC_OAUTH_TOKEN`, then `ANTHROPIC_API_KEY` (Foundry mode prefers `ANTHROPIC_FOUNDRY_API_KEY` when `CLAUDE_CODE_USE_FOUNDRY=true`) |\n| `openai` | `OPENAI_API_KEY` |\n| `openai-codex` | `OPENAI_CODEX_OAUTH_TOKEN` |\n| `google` | `GEMINI_API_KEY` |\n| `google-vertex` | `GOOGLE_CLOUD_API_KEY`, or Application Default Credentials (`GOOGLE_APPLICATION_CREDENTIALS` + `GOOGLE_CLOUD_PROJECT` + `GOOGLE_CLOUD_LOCATION`) |\n| `groq` | `GROQ_API_KEY` |\n| `openrouter` | `OPENROUTER_API_KEY` |\n| `mistral` | `MISTRAL_API_KEY` |\n| `xai` | `XAI_API_KEY` |\n| `xai-oauth` | `XAI_OAUTH_TOKEN`, then `XAI_API_KEY` |\n| `github-copilot` | `COPILOT_GITHUB_TOKEN` |\n| `cursor` | `CURSOR_ACCESS_TOKEN` |\n| `azure-openai-responses` | `AZURE_OPENAI_API_KEY` |\n| `amazon-bedrock` | `AWS_PROFILE`, or `AWS_ACCESS_KEY_ID` + `AWS_SECRET_ACCESS_KEY`, or an ECS/IRSA credential chain |\n\n### Additional hosted providers\n\n| Provider ID | Environment variable(s) |\n|---|---|\n| `cerebras` | `CEREBRAS_API_KEY` |\n| `deepseek` | `DEEPSEEK_API_KEY` |\n| `fireworks` | `FIREWORKS_API_KEY` |\n| `together` | `TOGETHER_API_KEY` |\n| `nvidia` | `NVIDIA_API_KEY` |\n| `huggingface` | `HUGGINGFACE_HUB_TOKEN`, then `HF_TOKEN` |\n| `moonshot` | `MOONSHOT_API_KEY` |\n| `kimi-code` | `KIMI_API_KEY` |\n| `nanogpt` | `NANO_GPT_API_KEY` |\n| `venice` | `VENICE_API_KEY` |\n| `vercel-ai-gateway` | `AI_GATEWAY_API_KEY` (also `VERCEL_AI_GATEWAY_API_KEY` for catalog discovery) |\n| `cloudflare-ai-gateway` | `CLOUDFLARE_AI_GATEWAY_API_KEY` |\n| `litellm` | `LITELLM_API_KEY` |\n| `kilo` | `KILO_API_KEY` |\n| `zai` | `ZAI_API_KEY` |\n| `zenmux` | `ZENMUX_API_KEY` |\n| `zhipu-coding-plan` | `ZHIPU_API_KEY` |\n| `qianfan` | `QIANFAN_API_KEY` |\n| `qwen-portal` | `QWEN_OAUTH_TOKEN`, then `QWEN_PORTAL_API_KEY` |\n| `synthetic` | `SYNTHETIC_API_KEY` |\n| `minimax` | `MINIMAX_API_KEY` |\n| `alibaba-coding-plan` | `ALIBABA_CODING_PLAN_API_KEY` |\n| `aimlapi` | `AIMLAPI_API_KEY` |\n| `gitlab-duo` | `GITLAB_TOKEN` |\n| `opencode-zen`, `opencode-go` | `OPENCODE_API_KEY` |\n| `firepass` | `FIREPASS_API_KEY` |\n| `wafer-pass` | `WAFER_PASS_API_KEY` |\n| `wafer-serverless` | `WAFER_SERVERLESS_API_KEY` |\n| `xiaomi` | `XIAOMI_API_KEY` |\n| `ollama-cloud` | `OLLAMA_CLOUD_API_KEY` |\n| `ollama` | `OLLAMA_API_KEY` (optional; local discovery is keyless by default) |\n| `lm-studio` | `LM_STUDIO_API_KEY` (optional; keyless by default) |\n| `llama.cpp` | `LLAMA_CPP_API_KEY` (only when the server requires auth) |\n\nOAuth-backed providers such as `anthropic`, `github-copilot`, `cursor`, `ollama-cloud`, `qwen-portal`, `xai-oauth`, `wafer-pass`, `wafer-serverless`, `google-gemini-cli`, and `google-antigravity` are normally reached through `/login` rather than an environment variable. See [Environment variables](./environment-variables.md) for search-tool and configuration variables not listed here.\n\n### `.env` discovery and precedence\n\n`omp` eagerly loads `.env` files into the process environment before any provider lookup. It reads four files and, for each variable, the **first** source that defines it wins. Effective precedence, high to low:\n\n1. The process environment inherited by `omp` (already-set variables always win).\n2. `<cwd>/.env`\n3. `~/.omp/agent/.env`\n4. `~/.omp/.env`\n5. `~/.env`\n\nA variable already present in the process environment is never overwritten by a `.env` file. Among the files, a value set in `<cwd>/.env` wins over `~/.omp/agent/.env`, which wins over `~/.omp/.env`, which wins over `~/.env`. So a shell-exported `OPENAI_API_KEY` beats every `.env` file, and a project's `<cwd>/.env` beats your home `~/.env`.\n\nProject-local `.env` is the simplest way to make one repository use a project-specific gateway, key, or local endpoint:\n\n```dotenv\n# <project>/.env\nOPENROUTER_API_KEY=sk-or-...\nOLLAMA_BASE_URL=http://127.0.0.1:11434\n```\n\n`.env` parsing is intentionally minimal:\n\n- blank lines and lines starting with `#` are ignored;\n- keys must match `[A-Za-z_][A-Za-z0-9_]*` (shell-identifier shape) — other names are dropped;\n- values may be wrapped in single or double quotes, which are stripped;\n- values containing a NUL byte are dropped;\n- an `OMP_`-prefixed key is also mirrored to the matching `PI_`-prefixed name.\n\n## Built-in local engines\n\nThree local engines are discovered automatically without needing a `models.yml` entry. Each uses a base URL that can be overridden by an environment variable:\n\n| Provider ID | Base URL (env override → default) | Notes |\n|---|---|---|\n| `ollama` | `OLLAMA_BASE_URL`, then `OLLAMA_HOST` (normalized), else `http://127.0.0.1:11434` | Keyless by default. |\n| `llama.cpp` | `LLAMA_CPP_BASE_URL`, else `http://127.0.0.1:8080` | Keyless unless a key is stored for `llama.cpp`. |\n| `lm-studio` | `LM_STUDIO_BASE_URL`, else `http://127.0.0.1:1234/v1` | Keyless by default. |\n\nThese implicit engines are **skipped** when:\n\n- a provider with the same ID is already configured in `models.yml` (your explicit config wins); or\n- the provider ID appears in the effective `disabledProviders` list.\n\nFor installing and running these engines, see [Local models](./local-models.md).\n\n## Disabling model providers\n\nUse the `disabledProviders` setting to remove a provider's models from selection:\n\n```yaml\n# ~/.omp/agent/config.yml or <project>/.omp/config.yml\ndisabledProviders:\n - anthropic\n - openai\n - google\n - groq\n```\n\nProvider IDs are matched exactly. Disable `google` to hide the Google Gemini API provider; the OAuth-backed Google providers `google-gemini-cli` and `google-antigravity` are separate IDs and must be disabled individually. Disable `ollama`, `llama.cpp`, or `lm-studio` to stop local discovery for that engine.\n\n`disabledProviders` applies uniformly to:\n\n- bundled catalog providers;\n- custom `models.yml` providers;\n- runtime-discovered provider models;\n- extension-registered providers;\n- implicit local engines.\n\nDisabling a provider does not delete its stored credentials — re-enable it by removing its ID from the effective list.\n\n## Project-specific provider control\n\nProject settings live in `<project>/.omp/config.yml`. Use them when one repository must allow or hide a different provider set than your global default:\n\n```yaml\n# <project>/.omp/config.yml\ndisabledProviders:\n - openai\n - openrouter\n```\n\nSettings arrays are **replaced** wholesale by the higher-precedence layer, not merged or appended. If the global file disables three providers and the project file disables one, the project sees only the project list:\n\n```yaml\n# ~/.omp/agent/config.yml\ndisabledProviders:\n - anthropic\n - openai\n - google\n\n# <project>/.omp/config.yml\ndisabledProviders:\n - groq\n```\n\nEffective result inside the project:\n\n```json\n[\"groq\"]\n```\n\nThe project array re-enables `anthropic`, `openai`, and `google` for sessions launched from that project. If you want a project to *add* to the global set, repeat the global IDs in the project file. See [Settings](./settings.md) for the full precedence chain, including `--config` overlays and runtime overrides.\n\n## Path-scoped `disabledProviders`\n\n`disabledProviders` can mix plain string entries (apply everywhere) with path-scoped entries (apply only when the current working directory matches a configured path):\n\n```yaml\ndisabledProviders:\n - ollama\n - path: ~/projects/sensitive\n providers:\n - anthropic\n - openai\n - paths:\n - ~/work/client-a\n - ~/work/client-b\n values:\n - openrouter\n```\n\n- Bare string entries always apply.\n- A scoped entry applies when the current working directory **is** the configured path or sits **under** it. `~` expands to the home directory.\n- Accepted path keys: `path`, `paths`, `pathPrefix`, `pathPrefixes`.\n- Accepted value keys: `providers`, `values`, `items`.\n\nFor the example above:\n\n- `ollama` is disabled everywhere.\n- `anthropic` and `openai` are additionally disabled under `~/projects/sensitive`.\n- `openrouter` is additionally disabled under `~/work/client-a` and `~/work/client-b`.\n\nPath scopes are resolved **after** the settings merge. Because a higher-precedence layer replaces the whole array, a project-level `disabledProviders` array drops any scoped entries that only existed in the global array. `enabledModels` is the only other setting that supports the same path-scoped form. See [Settings](./settings.md) for details.\n\n## Provider IDs vs discovery provider IDs\n\n`disabledProviders` uses a **single shared ID namespace** that gates two different subsystems:\n\n- **Model providers** — the backends on this page (`anthropic`, `openai`, `ollama`, a custom `models.yml` ID, …). Disabling one removes its models from selection.\n- **Discovery providers** — sources of context files, MCP servers, commands, skills, hooks, tools, prompts, and settings. Disabling one stops that source from contributing capability items.\n\n| Entry type | Examples | Effect |\n|---|---|---|\n| Model provider ID | `anthropic`, `openai`, `google`, `groq`, `openrouter`, `ollama`, `my-gateway` | Removes that provider's models from availability. |\n| Discovery provider ID | `native`, `claude`, `codex`, `gemini`, `agents`, `github` | Stops that discovery source from contributing capability items. |\n\nWatch the related names. The Google Gemini **API** models use the model provider ID `google`; `gemini` is a **discovery** provider ID (the source that reads `GEMINI.md`), not the Google model provider. Use discovery IDs only when you intend to disable an entire config source. See [Context files](./context-files.md) for the discovery-provider side.\n\n## Custom providers in `models.yml`\n\nCustom providers live in `~/.omp/agent/models.yml` under `providers:`. A provider ID defined there participates in the same selection, credential resolution, and `disabledProviders` rules as built-in providers.\n\nMinimal OpenAI-compatible provider:\n\n```yaml\nproviders:\n my-openai-compatible:\n baseUrl: https://api.example.com/v1\n api: openai-completions\n apiKey: MY_OPENAI_COMPATIBLE_KEY # env-var-name or literal\n models:\n - id: fast-chat\n name: Fast Chat\n contextWindow: 128000\n maxTokens: 8192\n```\n\nKeyless local provider (no credentials required):\n\n```yaml\nproviders:\n local-proxy:\n baseUrl: http://127.0.0.1:4000/v1\n api: openai-completions\n auth: none\n models:\n - id: local-model\n name: Local Model\n contextWindow: 32768\n maxTokens: 4096\n```\n\nDiscovery-enabled provider (models fetched from the endpoint at runtime):\n\n```yaml\nproviders:\n team-proxy:\n baseUrl: https://models.example.com/v1\n apiKey: TEAM_PROXY_API_KEY\n authHeader: true # send Authorization: Bearer <resolved key>\n disableStrictTools: true\n discovery:\n type: proxy\n```\n\nFor the full schema, all allowed `api` values, discovery `type`s, model overrides, and equivalence settings, see [Model and Provider Configuration](./models.md).\n\nTo disable a custom provider, list its ID exactly:\n\n```yaml\ndisabledProviders:\n - my-openai-compatible\n - team-proxy\n```\n\n## Troubleshooting\n\n**A provider's models are not selectable.** Confirm the provider has credentials (`/login <provider>`, an exported environment variable, or a `models.yml` `apiKey`) and that its ID is not in the effective `disabledProviders` list. Remember the rule: not disabled **and** (keyless **or** has credentials). Keyless local engines only appear once the engine is actually running and responding.\n\n**The wrong key is being used (a stale key from `.env`).** Resolution favors runtime `--api-key`, then a `models.yml` config key, then stored credentials, then environment/`.env`. An already-set process environment variable also beats every `.env` file, and `<cwd>/.env` beats `~/.env`. If an unexpected key wins, check for an exported shell variable and the four `.env` files in precedence order, and clear the one that should not apply.\n\n**A provider still appears even though I disabled it.** `disabledProviders` arrays are replaced, not merged: a project `<project>/.omp/config.yml` array fully overrides the global one. Verify the *effective* list for the directory you are in (path-scoped entries only apply at or under their configured path), and confirm the ID is spelled exactly. Use `omp config get disabledProviders` to inspect the merged value (see [Settings](./settings.md)).\n\n**A discovery provider name had no effect on models (or vice-versa).** The ID namespace is shared. `gemini`, `codex`, `claude`, `native`, and `agents` are discovery-source IDs; the Google model backend is `google`. Make sure you are disabling the right kind of provider.\n\n**A custom `models.yml` provider does not load.** A YAML or schema error makes the registry skip the custom file. Validate the file with `omp models` (use `omp models find <substr>` to scope it to one provider), confirm each provider has a `baseUrl`, a valid `api`, and at least one model entry, and that an implicit local engine is not silently shadowing it (an explicit `ollama`/`lm-studio`/`llama.cpp` entry replaces the built-in discovery for that ID). See [Model and Provider Configuration](./models.md).\n",
|
|
53
|
+
"providers.md": "# Providers\n\nProviders are the model backends `omp` can route requests to: Anthropic, OpenAI, Google Gemini, Groq, OpenRouter, Mistral, xAI, local engines like Ollama, hosted gateways, custom `models.yml` providers, and providers registered by extensions.\n\nA **provider** is the account or backend namespace, such as `anthropic`, `openai`, `google`, or `ollama`. A **model** is a concrete model under that provider, selected as `provider/model-id`, such as `anthropic/claude-opus-4-6`. Disabling a provider removes every model under it from selection; if you only want to narrow individual models, use model settings instead.\n\nThis page covers how providers become available, how credentials are resolved, the provider/environment-variable map, local engines, disabling providers, and custom providers. For model selection and the full `models.yml` schema, see [Model and Provider Configuration](./models.md). For config-file locations and merge precedence, see [Settings](./settings.md). For credential storage and login flows in depth, see [Secrets and credentials](./secrets.md). For the complete environment-variable reference, see [Environment variables](./environment-variables.md). For local engine setup, see [Local models](./local-models.md). For context-file discovery providers, see [Context files](./context-files.md).\n\n## How `omp` decides a provider is available\n\nAt startup the model registry assembles its catalog from four sources, in order:\n\n1. The bundled model catalog (every built-in provider and its known models).\n2. Custom provider and model entries from `~/.omp/agent/models.yml`.\n3. Runtime-discovered models for providers that support discovery (local engines and discovery-enabled gateways).\n4. Providers and models registered by extensions.\n\nThe registry can hold a model even when it is not currently selectable. A model becomes **available** only when both conditions hold:\n\n1. its provider ID is **not** in the effective `disabledProviders` list; **and**\n2. the provider is either **keyless** (an implicit local provider, or a custom provider with `auth: none`) **or** has resolvable credentials.\n\n`disabledProviders` is checked *before* credentials. If a provider ID is disabled, no stored key, OAuth session, environment variable, `.env` entry, or `models.yml` `apiKey` will make it selectable — the provider's models are dropped from availability regardless of credentials. Removing the ID from the effective list restores them.\n\nKeyless local engines are a special case: `ollama`, `llama.cpp`, and `lm-studio` are treated as keyless when no key is configured, so their discovered models are selectable as soon as the engine answers — no login required. See [Built-in local engines](#built-in-local-engines).\n\n## Credentials and precedence\n\nWhen a provider needs an API key, `omp` resolves it in this order (first match wins):\n\n1. **Runtime override** — a key supplied for the current process, e.g. CLI `--api-key`. Never persisted.\n2. **`models.yml` config key** — an `apiKey` pinned on a custom provider, registered as a config-sourced bearer. This deliberately beats stored OAuth, so a key supplied for a custom `baseUrl`/gateway is honored instead of forwarding an upstream OAuth token the proxy would reject.\n3. **Stored API key** — an API-key credential saved in the auth store.\n4. **Stored OAuth credential** — refreshed when needed; multiple accounts are ranked/rotated automatically.\n5. **Provider environment variable** — including values loaded from `.env` files (see [the env-var table](#environment-variables-and-env-files)).\n6. **`models.yml` fallback resolver** — keys for custom providers not otherwise registered.\n\nStored credentials live in the auth store at `~/.omp/agent/agent.db` for local auth, or in the configured auth-broker snapshot when running in broker mode. (`PI_CODING_AGENT_DIR` relocates the `~/.omp/agent` base, and the auth store moves with it.)\n\n### OAuth vs API key, and provider-scoped logins\n\nLogins are **provider-scoped**: authenticating `anthropic` does not authenticate `openai`, and each provider tracks its own credentials. A disabled provider stays disabled even with valid stored auth.\n\nUse the interactive slash commands inside a session:\n\n- `/login` — opens the OAuth/key selector. `/login <provider>` jumps straight to one provider (e.g. `/login anthropic`); for an OAuth flow that needs a pasted callback, run `/login <redirect-url>` to complete it.\n- `/logout` — opens the provider selector to remove stored credentials.\n\nFor headless or remote setups backed by a shared auth broker, the CLI exposes `omp auth-broker login <provider>` / `omp auth-broker logout` (and `status`, `list`, `import`, `migrate`). See [Secrets and credentials](./secrets.md) for the broker model.\n\nWhen a model has no credentials, `omp` tells you to run `/login` or set the provider's environment variable.\n\n### Pinning a key in `models.yml`\n\nA custom provider's `apiKey` is resolved as **environment-variable-name-or-literal**: if the value names an existing environment variable, that variable's value is used; otherwise the string itself is the key. Prefixing the value with `!` runs it as a shell command and uses the trimmed stdout (see [Model and Provider Configuration](./models.md) for the full value syntax).\n\n```yaml\n# ~/.omp/agent/models.yml\nproviders:\n my-gateway:\n baseUrl: https://gateway.example.com/v1\n api: openai-completions\n apiKey: MY_GATEWAY_API_KEY # reads this env var if set, else literal text\n models:\n - id: claude-sonnet\n name: Claude Sonnet via Gateway\n contextWindow: 200000\n maxTokens: 8192\n```\n\nIf `authHeader: true` is set on a custom provider, the resolved key is injected as an `Authorization: Bearer <key>` header on every request to that provider.\n\n## Environment variables and `.env` files\n\nEach provider has one or more environment variables that supply a key when no stored credential exists. The table below is the verified provider → variable map; the full catalog is large, so it is split into core and additional providers. OAuth-backed providers can also accept a token variable in addition to (or instead of) an API key.\n\n### Core providers\n\n| Provider ID | Environment variable(s) |\n|---|---|\n| `anthropic` | `ANTHROPIC_OAUTH_TOKEN`, then `ANTHROPIC_API_KEY` (Foundry mode prefers `ANTHROPIC_FOUNDRY_API_KEY` when `CLAUDE_CODE_USE_FOUNDRY=true`) |\n| `openai` | `OPENAI_API_KEY` |\n| `openai-codex` | `OPENAI_CODEX_OAUTH_TOKEN` |\n| `google` | `GEMINI_API_KEY` |\n| `google-vertex` | `GOOGLE_CLOUD_API_KEY`, or Application Default Credentials (`GOOGLE_APPLICATION_CREDENTIALS` + `GOOGLE_CLOUD_PROJECT` + `GOOGLE_CLOUD_LOCATION`) |\n| `groq` | `GROQ_API_KEY` |\n| `openrouter` | `OPENROUTER_API_KEY` |\n| `mistral` | `MISTRAL_API_KEY` |\n| `xai` | `XAI_API_KEY` |\n| `xai-oauth` | `XAI_OAUTH_TOKEN`, then `XAI_API_KEY` |\n| `github-copilot` | `COPILOT_GITHUB_TOKEN` |\n| `cursor` | `CURSOR_ACCESS_TOKEN` |\n| `azure-openai-responses` | `AZURE_OPENAI_API_KEY` |\n| `amazon-bedrock` | `AWS_PROFILE`, or `AWS_ACCESS_KEY_ID` + `AWS_SECRET_ACCESS_KEY`, or an ECS/IRSA credential chain |\n\n### Additional hosted providers\n\n| Provider ID | Environment variable(s) |\n|---|---|\n| `cerebras` | `CEREBRAS_API_KEY` |\n| `deepseek` | `DEEPSEEK_API_KEY` |\n| `fireworks` | `FIREWORKS_API_KEY` |\n| `together` | `TOGETHER_API_KEY` |\n| `nvidia` | `NVIDIA_API_KEY` |\n| `huggingface` | `HUGGINGFACE_HUB_TOKEN`, then `HF_TOKEN` |\n| `moonshot` | `MOONSHOT_API_KEY` |\n| `kimi-code` | `KIMI_API_KEY` |\n| `nanogpt` | `NANO_GPT_API_KEY` |\n| `venice` | `VENICE_API_KEY` |\n| `vercel-ai-gateway` | `AI_GATEWAY_API_KEY` (also `VERCEL_AI_GATEWAY_API_KEY` for catalog discovery) |\n| `cloudflare-ai-gateway` | `CLOUDFLARE_AI_GATEWAY_API_KEY` |\n| `litellm` | `LITELLM_API_KEY` |\n| `kilo` | `KILO_API_KEY` |\n| `zai` | `ZAI_API_KEY` |\n| `zenmux` | `ZENMUX_API_KEY` |\n| `zhipu-coding-plan` | `ZHIPU_API_KEY` |\n| `umans` | `UMANS_AI_CODING_PLAN_API_KEY` |\n| `qianfan` | `QIANFAN_API_KEY` |\n| `qwen-portal` | `QWEN_OAUTH_TOKEN`, then `QWEN_PORTAL_API_KEY` |\n| `synthetic` | `SYNTHETIC_API_KEY` |\n| `minimax` | `MINIMAX_API_KEY` |\n| `alibaba-coding-plan` | `ALIBABA_CODING_PLAN_API_KEY` |\n| `aimlapi` | `AIMLAPI_API_KEY` |\n| `gitlab-duo` | `GITLAB_TOKEN` |\n| `opencode-zen`, `opencode-go` | `OPENCODE_API_KEY` |\n| `firepass` | `FIREPASS_API_KEY` |\n| `wafer-pass` | `WAFER_PASS_API_KEY` |\n| `wafer-serverless` | `WAFER_SERVERLESS_API_KEY` |\n| `xiaomi` | `XIAOMI_API_KEY` |\n| `ollama-cloud` | `OLLAMA_CLOUD_API_KEY` |\n| `ollama` | `OLLAMA_API_KEY` (optional; local discovery is keyless by default) |\n| `lm-studio` | `LM_STUDIO_API_KEY` (optional; keyless by default) |\n| `llama.cpp` | `LLAMA_CPP_API_KEY` (only when the server requires auth) |\n\nOAuth-backed providers such as `anthropic`, `github-copilot`, `cursor`, `ollama-cloud`, `qwen-portal`, `xai-oauth`, `wafer-pass`, `wafer-serverless`, `google-gemini-cli`, and `google-antigravity` are normally reached through `/login` rather than an environment variable. See [Environment variables](./environment-variables.md) for search-tool and configuration variables not listed here.\n\n### `.env` discovery and precedence\n\n`omp` eagerly loads `.env` files into the process environment before any provider lookup. It reads four files and, for each variable, the **first** source that defines it wins. Effective precedence, high to low:\n\n1. The process environment inherited by `omp` (already-set variables always win).\n2. `<cwd>/.env`\n3. `~/.omp/agent/.env`\n4. `~/.omp/.env`\n5. `~/.env`\n\nA variable already present in the process environment is never overwritten by a `.env` file. Among the files, a value set in `<cwd>/.env` wins over `~/.omp/agent/.env`, which wins over `~/.omp/.env`, which wins over `~/.env`. So a shell-exported `OPENAI_API_KEY` beats every `.env` file, and a project's `<cwd>/.env` beats your home `~/.env`.\n\nProject-local `.env` is the simplest way to make one repository use a project-specific gateway, key, or local endpoint:\n\n```dotenv\n# <project>/.env\nOPENROUTER_API_KEY=sk-or-...\nOLLAMA_BASE_URL=http://127.0.0.1:11434\n```\n\n`.env` parsing is intentionally minimal:\n\n- blank lines and lines starting with `#` are ignored;\n- keys must match `[A-Za-z_][A-Za-z0-9_]*` (shell-identifier shape) — other names are dropped;\n- values may be wrapped in single or double quotes, which are stripped;\n- values containing a NUL byte are dropped;\n- an `OMP_`-prefixed key is also mirrored to the matching `PI_`-prefixed name.\n\n## Built-in local engines\n\nThree local engines are discovered automatically without needing a `models.yml` entry. Each uses a base URL that can be overridden by an environment variable:\n\n| Provider ID | Base URL (env override → default) | Notes |\n|---|---|---|\n| `ollama` | `OLLAMA_BASE_URL`, then `OLLAMA_HOST` (normalized), else `http://127.0.0.1:11434` | Keyless by default. |\n| `llama.cpp` | `LLAMA_CPP_BASE_URL`, else `http://127.0.0.1:8080` | Keyless unless a key is stored for `llama.cpp`. |\n| `lm-studio` | `LM_STUDIO_BASE_URL`, else `http://127.0.0.1:1234/v1` | Keyless by default. |\n\nThese implicit engines are **skipped** when:\n\n- a provider with the same ID is already configured in `models.yml` (your explicit config wins); or\n- the provider ID appears in the effective `disabledProviders` list.\n\nFor installing and running these engines, see [Local models](./local-models.md).\n\n## Disabling model providers\n\nUse the `disabledProviders` setting to remove a provider's models from selection:\n\n```yaml\n# ~/.omp/agent/config.yml or <project>/.omp/config.yml\ndisabledProviders:\n - anthropic\n - openai\n - google\n - groq\n```\n\nProvider IDs are matched exactly. Disable `google` to hide the Google Gemini API provider; the OAuth-backed Google providers `google-gemini-cli` and `google-antigravity` are separate IDs and must be disabled individually. Disable `ollama`, `llama.cpp`, or `lm-studio` to stop local discovery for that engine.\n\n`disabledProviders` applies uniformly to:\n\n- bundled catalog providers;\n- custom `models.yml` providers;\n- runtime-discovered provider models;\n- extension-registered providers;\n- implicit local engines.\n\nDisabling a provider does not delete its stored credentials — re-enable it by removing its ID from the effective list.\n\n## Project-specific provider control\n\nProject settings live in `<project>/.omp/config.yml`. Use them when one repository must allow or hide a different provider set than your global default:\n\n```yaml\n# <project>/.omp/config.yml\ndisabledProviders:\n - openai\n - openrouter\n```\n\nSettings arrays are **replaced** wholesale by the higher-precedence layer, not merged or appended. If the global file disables three providers and the project file disables one, the project sees only the project list:\n\n```yaml\n# ~/.omp/agent/config.yml\ndisabledProviders:\n - anthropic\n - openai\n - google\n\n# <project>/.omp/config.yml\ndisabledProviders:\n - groq\n```\n\nEffective result inside the project:\n\n```json\n[\"groq\"]\n```\n\nThe project array re-enables `anthropic`, `openai`, and `google` for sessions launched from that project. If you want a project to *add* to the global set, repeat the global IDs in the project file. See [Settings](./settings.md) for the full precedence chain, including `--config` overlays and runtime overrides.\n\n## Path-scoped `disabledProviders`\n\n`disabledProviders` can mix plain string entries (apply everywhere) with path-scoped entries (apply only when the current working directory matches a configured path):\n\n```yaml\ndisabledProviders:\n - ollama\n - path: ~/projects/sensitive\n providers:\n - anthropic\n - openai\n - paths:\n - ~/work/client-a\n - ~/work/client-b\n values:\n - openrouter\n```\n\n- Bare string entries always apply.\n- A scoped entry applies when the current working directory **is** the configured path or sits **under** it. `~` expands to the home directory.\n- Accepted path keys: `path`, `paths`, `pathPrefix`, `pathPrefixes`.\n- Accepted value keys: `providers`, `values`, `items`.\n\nFor the example above:\n\n- `ollama` is disabled everywhere.\n- `anthropic` and `openai` are additionally disabled under `~/projects/sensitive`.\n- `openrouter` is additionally disabled under `~/work/client-a` and `~/work/client-b`.\n\nPath scopes are resolved **after** the settings merge. Because a higher-precedence layer replaces the whole array, a project-level `disabledProviders` array drops any scoped entries that only existed in the global array. `enabledModels` is the only other setting that supports the same path-scoped form. See [Settings](./settings.md) for details.\n\n## Provider IDs vs discovery provider IDs\n\n`disabledProviders` uses a **single shared ID namespace** that gates two different subsystems:\n\n- **Model providers** — the backends on this page (`anthropic`, `openai`, `ollama`, a custom `models.yml` ID, …). Disabling one removes its models from selection.\n- **Discovery providers** — sources of context files, MCP servers, commands, skills, hooks, tools, prompts, and settings. Disabling one stops that source from contributing capability items.\n\n| Entry type | Examples | Effect |\n|---|---|---|\n| Model provider ID | `anthropic`, `openai`, `google`, `groq`, `openrouter`, `ollama`, `my-gateway` | Removes that provider's models from availability. |\n| Discovery provider ID | `native`, `claude`, `codex`, `gemini`, `agents`, `github` | Stops that discovery source from contributing capability items. |\n\nWatch the related names. The Google Gemini **API** models use the model provider ID `google`; `gemini` is a **discovery** provider ID (the source that reads `GEMINI.md`), not the Google model provider. Use discovery IDs only when you intend to disable an entire config source. See [Context files](./context-files.md) for the discovery-provider side.\n\n## Custom providers in `models.yml`\n\nCustom providers live in `~/.omp/agent/models.yml` under `providers:`. A provider ID defined there participates in the same selection, credential resolution, and `disabledProviders` rules as built-in providers.\n\nMinimal OpenAI-compatible provider:\n\n```yaml\nproviders:\n my-openai-compatible:\n baseUrl: https://api.example.com/v1\n api: openai-completions\n apiKey: MY_OPENAI_COMPATIBLE_KEY # env-var-name or literal\n models:\n - id: fast-chat\n name: Fast Chat\n contextWindow: 128000\n maxTokens: 8192\n```\n\nKeyless local provider (no credentials required):\n\n```yaml\nproviders:\n local-proxy:\n baseUrl: http://127.0.0.1:4000/v1\n api: openai-completions\n auth: none\n models:\n - id: local-model\n name: Local Model\n contextWindow: 32768\n maxTokens: 4096\n```\n\nDiscovery-enabled provider (models fetched from the endpoint at runtime):\n\n```yaml\nproviders:\n team-proxy:\n baseUrl: https://models.example.com/v1\n apiKey: TEAM_PROXY_API_KEY\n authHeader: true # send Authorization: Bearer <resolved key>\n disableStrictTools: true\n discovery:\n type: proxy\n```\n\nFor the full schema, all allowed `api` values, discovery `type`s, model overrides, and equivalence settings, see [Model and Provider Configuration](./models.md).\n\nTo disable a custom provider, list its ID exactly:\n\n```yaml\ndisabledProviders:\n - my-openai-compatible\n - team-proxy\n```\n\n## Troubleshooting\n\n**A provider's models are not selectable.** Confirm the provider has credentials (`/login <provider>`, an exported environment variable, or a `models.yml` `apiKey`) and that its ID is not in the effective `disabledProviders` list. Remember the rule: not disabled **and** (keyless **or** has credentials). Keyless local engines only appear once the engine is actually running and responding.\n\n**The wrong key is being used (a stale key from `.env`).** Resolution favors runtime `--api-key`, then a `models.yml` config key, then stored credentials, then environment/`.env`. An already-set process environment variable also beats every `.env` file, and `<cwd>/.env` beats `~/.env`. If an unexpected key wins, check for an exported shell variable and the four `.env` files in precedence order, and clear the one that should not apply.\n\n**A provider still appears even though I disabled it.** `disabledProviders` arrays are replaced, not merged: a project `<project>/.omp/config.yml` array fully overrides the global one. Verify the *effective* list for the directory you are in (path-scoped entries only apply at or under their configured path), and confirm the ID is spelled exactly. Use `omp config get disabledProviders` to inspect the merged value (see [Settings](./settings.md)).\n\n**A discovery provider name had no effect on models (or vice-versa).** The ID namespace is shared. `gemini`, `codex`, `claude`, `native`, and `agents` are discovery-source IDs; the Google model backend is `google`. Make sure you are disabling the right kind of provider.\n\n**A custom `models.yml` provider does not load.** A YAML or schema error makes the registry skip the custom file. Validate the file with `omp models` (use `omp models find <substr>` to scope it to one provider), confirm each provider has a `baseUrl`, a valid `api`, and at least one model entry, and that an implicit local engine is not silently shadowing it (an explicit `ollama`/`lm-studio`/`llama.cpp` entry replaces the built-in discovery for that ID). See [Model and Provider Configuration](./models.md).\n",
|
|
54
54
|
"python-repl.md": "# Eval Tool Python Backend\n\nThis document describes the Python execution stack in `packages/coding-agent`.\nIt covers tool behavior, runner lifecycle, environment handling, execution semantics, output rendering, supported magics, and operational failure modes.\n\n## Scope and Key Files\n\n- Tool surface: `src/tools/eval.ts`\n- Session/per-call kernel orchestration: `src/eval/py/executor.ts`\n- Subprocess kernel client: `src/eval/py/kernel.ts`\n- Python wrapper / NDJSON server: `src/eval/py/runner.py`\n- Prelude helpers loaded into every kernel: `src/eval/py/prelude.py`\n- Host-side subagent helper bridge: `src/eval/agent-bridge.ts`\n- MIME bundle renderer (text + structured outputs): `src/eval/py/display.ts`\n- Interactive-mode renderer for user-triggered Python runs: `src/modes/components/eval-execution.ts`\n- Runtime/env filtering and Python resolution: `src/eval/py/runtime.ts`\n\n## What eval's Python backend is\n\nThe `eval` tool executes one or more Python cells inside a retained `python` subprocess that speaks NDJSON over stdin/stdout. No Jupyter gateway and no extra pip dependencies are required — a vanilla Python 3.8+ interpreter is enough. Rich `display()` output (PIL, pandas, plotly, matplotlib figures) keeps working because the wrapper implements MIME-bundle dispatch.\n\nTool params:\n\n```ts\n{\n cells: Array<{\n language: \"py\" | \"js\";\n code: string;\n title?: string;\n timeout?: number; // seconds, clamped to 1..3600, default 30. Inactivity budget — see \"Cell timeout\".\n reset?: boolean; // reset this cell's selected runtime before execution\n }>;\n}\n```\n\nThe tool is `concurrency = \"exclusive\"` for a session, so calls do not overlap.\n\n## Kernel lifecycle\n\nEach Python kernel is a single subprocess: `<resolved-python> -u <runner.py>`. The runner is bundled with the host binary (Bun text import), written to an `omp-python-runner` cache under the OS temp directory once per script hash, and reused by subsequent spawns.\n\nKernel startup sequence:\n\n1. Availability check (`checkPythonKernelAvailability`) — verifies that a Python interpreter resolves and runs.\n2. Spawn `python -u runner.py` with filtered env and `cwd`.\n3. Send an init request that runs `os.chdir(cwd)`, injects env entries, and adds `cwd` to `sys.path`.\n4. Execute `PYTHON_PRELUDE` (idempotent — only initializes once per process).\n\nKernel shutdown:\n\n- Send `{\"type\": \"exit\"}` over stdin.\n- Wait for process exit with `SHUTDOWN_GRACE_MS` budget.\n- Escalate to `SIGTERM` and finally `SIGKILL` if the process does not exit in time.\n\n## Wire protocol (NDJSON, host ↔ runner)\n\nOne JSON object per line, UTF-8, `\\n` terminated.\n\nHost → runner:\n\n```jsonc\n{\"id\": \"<reqId>\", \"code\": \"<source>\", \"silent\": false, \"storeHistory\": true, \"cwd\": \"<optional>\", \"env\": {\"KEY\": \"VAL\"}}\n{\"type\": \"exit\"}\n```\n\nRunner → host:\n\n```jsonc\n{\"type\": \"started\", \"id\": \"<reqId>\"}\n{\"type\": \"stdout\", \"id\": \"<reqId>\", \"data\": \"...\"}\n{\"type\": \"stderr\", \"id\": \"<reqId>\", \"data\": \"...\"}\n{\"type\": \"display\", \"id\": \"<reqId>\", \"bundle\": {<mime>: <value>}}\n{\"type\": \"result\", \"id\": \"<reqId>\", \"bundle\": {<mime>: <value>}}\n{\"type\": \"error\", \"id\": \"<reqId>\", \"ename\": \"...\", \"evalue\": \"...\", \"traceback\": [\"...\"]}\n{\"type\": \"done\", \"id\": \"<reqId>\", \"status\": \"ok\"|\"error\", \"executionCount\": N, \"cancelled\": false}\n```\n\nStatus events the prelude emits (e.g. `_emit_status(\"find\", count=…)`) ship inside display bundles under `application/x-omp-status` so the existing TUI status renderer keeps working.\n\n## Magics\n\nThe runner's source transformer rewrites IPython-style magics to plain Python calls before parsing. Supported set:\n\n| Magic | Effect |\n| --------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| `%pip <args>` | `python -m pip <args>` with live streaming output. Newly installed packages are evicted from `sys.modules` so the next `import` picks up the fresh install. |\n| `%cd <path>` | `os.chdir(path)` (with `~` expansion); emits status event. |\n| `%pwd` | Returns `os.getcwd()`. |\n| `%ls [path]` | Returns `sorted(os.listdir(path))`. |\n| `%env [KEY[=VAL]]` | List, read, or set env vars (matches prelude `env()` semantics). |\n| `%set_env KEY VALUE` | Set `os.environ[KEY]`. |\n| `%time <expr>` / `%timeit <expr>` | Time the expression; emits status event with elapsed ms. |\n| `%who` / `%whos` | List user-namespace names. |\n| `%reset` | Clear user globals and re-inject prelude. |\n| `%load <path>` | Read a file into a fresh cell and execute. |\n| `%run <path>` | `runpy.run_path` and merge globals back. |\n| `%%bash` / `%%sh` | Run the cell body via `bash`/`sh`. |\n| `%%capture [name]` | Run body with stdout/stderr captured into `name`. |\n| `%%timeit` | Time the cell body. |\n| `%%writefile <path>` | Write body to file. |\n| `!cmd` / `var = !cmd` | Run command via subprocess shell; returns an SList-style result with `.n` / `.s` helpers. |\n| `var = %name args` | Assignment forms work for line magics and `!cmd`. |\n\nUnknown magic names raise `NameError: UsageError: ...` inside the cell.\n\n## Session persistence semantics\n\n`python.kernelMode` controls retained kernel reuse:\n\n- `session` (default)\n - Reuses kernel sessions keyed by namespaced eval session id plus normalized cwd and interpreter.\n - Multiple owners can share the same retained kernel for that key.\n - Calls through the tool are exclusive, so tool invocations do not overlap.\n - A dead retained subprocess is replaced before execution.\n - If the subprocess dies during execution, it is replaced and the cell is retried once.\n- `per-call`\n - Spawns a fresh subprocess for each request.\n - Shuts the subprocess down after the request.\n - No cross-call state persistence.\n\n### Multi-cell behavior in a single tool call\n\nPython cells run sequentially in the same selected Python kernel instance for that tool call.\n\nIf an intermediate cell fails:\n\n- Earlier cell state remains in memory.\n- Tool returns a targeted error indicating which cell failed.\n- Later cells are not executed.\n\n`reset=true` is per cell and resets that language runtime before the cell executes.\n\n## Environment filtering and runtime resolution\n\nEnvironment is filtered before launching the runner:\n\n- Allowlist includes core vars like `PATH`, `HOME`, locale vars, `VIRTUAL_ENV`, `PYTHONPATH`, etc.\n- Allow-prefixes: `LC_`, `XDG_`, `PI_`\n- Denylist strips common API keys (OpenAI/Anthropic/Gemini/etc.)\n\nRuntime selection order (skipped entirely when the `python.interpreter` setting names an explicit executable):\n\n1. Active/located venv (`VIRTUAL_ENV`, then `CONDA_PREFIX`, then `<cwd>/.venv`, `<cwd>/venv`)\n2. Managed venv at `~/.omp/python-env`\n3. `python` or `python3` on PATH\n\nWhen a venv is selected, its bin/Scripts path is prepended to `PATH`.\n\nThe runner additionally receives `PYTHONUNBUFFERED=1` and `PYTHONIOENCODING=utf-8` so streamed output reaches the host promptly.\n\n## Tool availability and mode selection\n\n`eval.py` / `eval.js` (both default `true`) plus optional boolean env flags `PI_PY` / `PI_JS` control eval backend exposure:\n\n- Python backend only (`eval.py=true`, `eval.js=false`, or `PI_PY=1 PI_JS=0`)\n- JavaScript backend only (`eval.py=false`, `eval.js=true`, or `PI_PY=0 PI_JS=1`)\n- both backends (`eval.py=true`, `eval.js=true`, or `PI_PY=1 PI_JS=1`)\n\n`PI_PY` and `PI_JS` use normal boolean flag parsing. Each flag, when set, overrides only its own setting; an unset flag falls back to its setting (`eval.py` / `eval.js`, both default `true`).\n\nIf Python preflight fails and `eval.js` is enabled, `eval` remains available for `js` cells; `py` cells fail with a Python-backend availability error.\n\nPython prelude helpers include `agent(prompt, *, agent_type=\"task\", model=None, label=None, schema=None)`. It synchronously calls the host bridge, runs one subagent through the task executor, and returns the final text. When `schema` is supplied, the helper parses the subagent's JSON output and returns the object.\n\n## Execution flow and cancellation/timeout\n\n### Cell timeout\n\nEach eval cell `timeout` is in seconds, defaults to 30, and is clamped to `1..3600`. It is a **wall-clock budget on the cell's own work** that the watchdog (`IdleTimeout`, `src/eval/idle-timeout.ts`) enforces, **but it is suspended while a host-side `agent()`/`parallel()`/`completion()` bridge call is in flight**: those calls emit synthetic pause/resume timeout-control status events (`withBridgeTimeoutPause`, `src/eval/bridge-timeout.ts`) that pause the watchdog entirely and start a fresh timeout window when control returns to the runtime, so a long fanout or a slow completion runs to completion instead of being killed mid-stream. Pause is reference-counted because `parallel()` can have multiple bridge calls in flight at once.\n\nThe pause/resume events are the **sole** mechanism that suspends the budget. Everything else the cell does — compute, `stdout`/`stderr`, `log()`/`phase()`, and ordinary (non-agent) tool calls — counts against `timeout`, so a cell that is not delegating to an agent/completion is bounded by a plain wall-clock timeout. The tool combines the caller abort signal, the session abort signal, and the watchdog's signal with `AbortSignal.any(...)`; no wall-clock deadline is passed to the backend, so neither runtime arms a competing fixed timer.\n\n### Kernel execution cancellation\n\nOn abort/timeout:\n\n- The host sends `kill(\"SIGINT\")` to the runner subprocess.\n- The runner's exec-time signal handler raises `KeyboardInterrupt` inside the user code.\n- Result includes `cancelled=true`; a kernel timeout is annotated as `eval cell timed out after <n>s; kernel interrupted but remains running. Reset the kernel via { reset: true } if state appears corrupted.`\n- Between requests the runner installs `SIG_IGN` for SIGINT so a stray cancel does not tear down the kernel.\n\nIf the runner does not emit `done` within 5s of the interrupt (`INTERRUPT_ESCALATION_MS` — e.g. stuck in C code holding the GIL), the host shuts the subprocess down (escalating `exit` → `SIGTERM` → `SIGKILL`), the cell is annotated as kernel-killed, and the kernel is recreated on the next call.\n\n### stdin behavior\n\nInteractive stdin is not supported. The runner does not forward `input()` prompts; user code that calls `input()` blocks until cancellation.\n\n## Output capture and rendering\n\n### Captured output classes\n\nFrom runner frames:\n\n- `stdout` / `stderr` → plain text chunks\n- `display` / `result` → rich display handling (MIME bundle)\n- `error` → traceback text\n- `application/x-omp-status` MIME inside `display` → structured status events\n\nDisplay MIME precedence:\n\n1. `text/markdown`\n2. `text/plain`\n3. `text/html` (converted to basic markdown)\n\nAdditionally captured as structured outputs:\n\n- `application/json` → JSON tree data\n- `image/png` / `image/jpeg` → image payloads\n- `application/x-omp-status` → status events\n\n### Matplotlib\n\nThe runner sets `MPLBACKEND=Agg` as an environ default so figures render off-screen. After every cell, `pyplot.get_fignums()` is iterated; each figure is saved to PNG, emitted as an `image/png` display, and closed.\n\n### Storage and truncation\n\nOutput is streamed through `OutputSink` and may be persisted to artifact storage. Tool results can include truncation metadata and `artifact://<id>` for full output recovery.\n\n### Renderer behavior\n\n- Tool renderer (`eval.ts`):\n - shows code-cell blocks with per-cell status\n - collapsed preview defaults to 10 lines\n - supports expanded mode for all output retained in the tool result\n- Interactive renderer (`eval-execution.ts`):\n - used for user-triggered Python execution in TUI\n - collapsed preview defaults to 20 lines\n - clamps very long individual lines to 4000 chars for display safety\n - shows cancellation/error/truncation notices\n\n## Operational troubleshooting\n\n- **Python backend not available** — Check `eval.py`, `PI_PY`, and that `python`/`python3` is on PATH. If preflight fails and `eval.js` is enabled, use a `js` cell.\n- **No Python on PATH** — Install a system Python 3.8+ or place a venv at `~/.omp/python-env`. `omp setup python --check` reports the resolved interpreter.\n- **Execution hangs then times out** — Increase tool `timeout` (max 3600s) if workload is legitimate. For stuck native code, cancellation triggers `SIGINT` first then escalates; the session restarts on the next request.\n- **stdin/input prompts in Python code** — `input()` is not supported; pass data programmatically.\n- **Working directory errors** — Tool validates `cwd` exists and is a directory before execution.\n\n## Relevant environment variables\n\n- `PI_PY` / `PI_JS` — eval backend exposure overrides\n- `PI_PYTHON_SKIP_CHECK=1` — bypass Python preflight/warm checks\n- `PI_PYTHON_INTEGRATION=1` — enable gated integration tests that spawn a real Python\n- `PI_PYTHON_IPC_TRACE=1` — log NDJSON frames exchanged with the runner subprocess\n",
|
|
55
55
|
"render-mermaid.md": "# RenderMermaid\n\n`RenderMermaid` is an optional built-in tool that renders Mermaid source to terminal-friendly text.\n\n## Enable it\n\nDisabled by default. Turn it on in `/settings` under **Tools → Render Mermaid**, or in `~/.omp/agent/config.yml`:\n\n```yaml\nrenderMermaid:\n enabled: true\n```\n\n## What it does\n\n- Tool name: `render_mermaid`\n- Input: Mermaid source in the required `mermaid` field\n- Output: rendered ASCII/Unicode text, not SVG or PNG\n- Storage: when artifact storage is available, the full render is also saved as an `artifact://...`\n\nThere are no model-specific or environment-variable prerequisites. Once enabled, any model that can call built-in tools can use it.\n\n## Parameters\n\n```json\n{\n \"mermaid\": \"graph TD\\n A[Start] --> B[Stop]\",\n \"config\": {\n \"useAscii\": false,\n \"paddingX\": 2,\n \"paddingY\": 2,\n \"boxBorderPadding\": 0\n }\n}\n```\n\nAvailable `config` fields:\n\n- `useAscii` — `true` for plain ASCII, `false` for Unicode box-drawing characters (default and usually more readable)\n- `paddingX` — horizontal spacing between nodes\n- `paddingY` — vertical spacing between nodes\n- `boxBorderPadding` — inner padding inside node boxes\n\n## Current limitations\n\n`RenderMermaid` uses the `beautiful-mermaid` ASCII renderer. It works best for flowcharts and small diagrams.\n\nComplex sequence diagrams, especially with `alt` / `else` blocks, can become very wide in a terminal. That is current renderer behavior, not a provider or model configuration problem.\n\nIf a sequence diagram is hard to read:\n\n1. Keep Unicode output (`useAscii: false`)\n2. Reduce spacing with a tighter config such as `paddingX: 2`, `paddingY: 2`, `boxBorderPadding: 0`\n3. Prefer smaller sub-diagrams over one large sequence diagram\n4. Open the saved artifact if the inline preview is truncated in the TUI\n\n## Example\n\nInput:\n\n```mermaid\ngraph TD\n A[Start] --> B{Decision}\n B -->|Yes| C[Action]\n B -->|No| D[End]\n```\n\nTypical result:\n\n```text\n┌─────┐\n│Start│\n└─────┘\n │\n ▼\n┌────────┐\n│Decision│\n└────────┘\n```\n",
|
|
56
56
|
"resolve-tool-runtime.md": "# Resolve tool runtime internals\n\nThis document explains how preview/apply workflows are modeled in coding-agent and how built-in or custom tools can participate via the tool-choice queue and `pushPendingAction`.\n\n## Scope and key files\n\n- [`src/tools/resolve.ts`](../packages/coding-agent/src/tools/resolve.ts)\n- [`src/tools/ast-edit.ts`](../packages/coding-agent/src/tools/ast-edit.ts)\n- [`src/extensibility/custom-tools/types.ts`](../packages/coding-agent/src/extensibility/custom-tools/types.ts)\n- [`src/extensibility/custom-tools/loader.ts`](../packages/coding-agent/src/extensibility/custom-tools/loader.ts)\n- [`src/sdk.ts`](../packages/coding-agent/src/sdk.ts)\n\n## What `resolve` does\n\n`resolve` is a hidden tool that finalizes a pending preview action.\n\n- `action: \"apply\"` executes the queued action's `apply(reason, extra)` callback and returns that result with resolve metadata.\n- `action: \"discard\"` invokes `reject(reason, extra)` if provided; otherwise returns `Discarded: <label>. Reason: <reason>`.\n- `extra` is optional free-form metadata. Queue handlers receive it; producers decide whether it has meaning.\n\nIf no pending action exists, `resolve(action=\"apply\")` fails with:\n\n- `No pending action to resolve. Nothing to apply or discard.`\n\n`resolve(action=\"discard\")` with no pending action succeeds instead, returning `Nothing to discard; no pending action remains.` — the desired end-state (no staged change) already holds.\n\n## Pending actions use the tool-choice queue\n\nPreview producers call `queueResolveHandler(...)`, which pushes a one-shot forced `resolve` directive onto the session tool-choice queue and adds a `resolve-reminder` steering message.\n\nRuntime behavior:\n\n- the queued handler owns the pending `apply`/`reject` callbacks,\n- `resolve` looks up the current queue invoker with `session.peekQueueInvoker()`,\n- if the model rejects the forced tool choice, the queue directive is requeued,\n- `resolve` does not maintain a separate pending-action stack.\n\n`resolve` also checks a standing resolve handler after the queue invoker; this is used by long-lived approval flows that are not ordinary preview tool calls.\n\nMultiple pending previews therefore follow the active tool-choice queue ordering, not an independent pending-action store. If an apply callback throws, the queued helper re-pushes the same resolve directive and reminder so the preview can still be discarded or retried.\n\n## Built-in producer example (`ast_edit`)\n\n`ast_edit` previews structural replacements first. When the preview has replacements and is not applied yet, it queues a resolve handler that contains:\n\n- label (human-readable summary)\n- `sourceToolName` (`ast_edit`)\n- `apply(reason: string, extra?: Record<string, unknown>)` callback that reruns AST edit with `dryRun: false`\n\n`resolve(action=\"apply\", reason=\"...\")` passes `reason` into this callback. `ast_edit` currently ignores `extra`.\n\n## Custom tools: `pushPendingAction`\n\nCustom tools can register resolve-compatible pending actions through `CustomToolAPI.pushPendingAction(...)`. The custom tool loader forwards these actions to `queueResolveHandler(...)` when that hook is available.\n\n`CustomToolPendingAction`:\n\n- `label: string` (required)\n- `apply(reason: string): Promise<AgentToolResult<unknown>>` (required) — invoked on apply; `reason` is the string passed to `resolve`\n- `reject?(reason: string): Promise<AgentToolResult<unknown> | undefined>` (optional) — invoked on discard; return value replaces the default \"Discarded\" message if provided\n- `details?: unknown` exists on the public custom-tool type but is not currently forwarded by the loader into resolve metadata\n- `sourceToolName?: string` (optional, defaults to `\"custom_tool\"`)\n\n### Minimal usage example\n\n```ts\nimport type { CustomToolFactory } from \"@oh-my-pi/pi-coding-agent\";\n\nconst factory: CustomToolFactory = (pi) => ({\n name: \"batch_rename_preview\",\n label: \"Batch Rename Preview\",\n description: \"Previews renames and defers commit to resolve\",\n parameters: pi.zod.object({\n files: pi.zod.array(pi.zod.string()),\n }),\n\n async execute(_toolCallId, params) {\n const previewSummary = `Prepared rename plan for ${params.files.length} files`;\n\n pi.pushPendingAction({\n label: `Batch rename: ${params.files.length} files`,\n sourceToolName: \"batch_rename_preview\",\n apply: async (reason) => {\n // apply writes here\n return {\n content: [\n { type: \"text\", text: `Applied batch rename. Reason: ${reason}` },\n ],\n };\n },\n reject: async (reason) => {\n // optional: cleanup or notify on discard\n return {\n content: [\n { type: \"text\", text: `Discarded batch rename. Reason: ${reason}` },\n ],\n };\n },\n });\n\n return {\n content: [\n {\n type: \"text\",\n text: `${previewSummary}. Call resolve to apply or discard.`,\n },\n ],\n };\n },\n});\n\nexport default factory;\n```\n\n## Runtime availability and failures\n\n`pushPendingAction` is wired by the custom tool loader through the active session's resolve queue hook.\n\nIf the runtime did not provide the resolve queue hook, `pushPendingAction` throws:\n\n- `Pending action store unavailable for custom tools in this runtime.`\n\n## Tool-choice behavior\n\nWhen `queueResolveHandler(...)` registers a preview, the agent runtime forces a one-shot `resolve` tool choice so pending previews are explicitly finalized before normal tool flow continues.\n\n## Developer guidance\n\n- Use pending actions only for destructive or high-impact operations that should support explicit apply/discard.\n- Keep `label` concise and specific; it is shown in resolve renderer output.\n- Ensure `apply(reason)` is deterministic and idempotent enough for one-shot execution; `reason` is informational and should not change behavior.\n- Implement `reject(reason)` when the discard needs cleanup (temp state, locks, notifications); omit it for stateless previews where the default message suffices.\n- If your tool can stage multiple previews, remember they are mediated by the tool-choice queue rather than a separate pending-action stack.\n",
|
|
@@ -66,7 +66,7 @@ export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
|
|
|
66
66
|
"skills.md": "# Skills\n\nSkills are file-backed capability packs discovered at startup and exposed to the model as:\n\n- lightweight metadata in the system prompt (name + description)\n- on-demand content via the `read` tool against `skill://...`\n- optional interactive `/skill:<name>` commands\n\nThis document covers current runtime behavior in `src/extensibility/skills.ts`, `src/discovery/builtin.ts`, `src/internal-urls/skill-protocol.ts`, and `src/discovery/agents-md.ts`.\n\n## What a skill is in this codebase\n\nA discovered skill is represented as:\n\n- `name`\n- `description`\n- `filePath` (the `SKILL.md` path)\n- `baseDir` (skill directory)\n- source metadata (`provider`, `level`, path)\n\nThe runtime only requires `name` and `path` for validity. In practice, matching quality depends on `description` being meaningful.\n\n## Required layout and SKILL.md expectations\n\n### Directory layout\n\nFor provider-based discovery (native/Claude/Codex/Agents/plugin providers), skills are discovered as **one level under `skills/`**:\n\n- `<skills-root>/<skill-name>/SKILL.md`\n\nNested patterns like `<skills-root>/group/<skill>/SKILL.md` are not discovered by provider loaders.\n\nFor `skills.customDirectories`, scanning uses the same non-recursive layout (`*/SKILL.md`).\n\n```text\nProvider-discovered layout (non-recursive under skills/):\n\n<root>/skills/\n ├─ postgres/\n │ └─ SKILL.md ✅ discovered\n ├─ pdf/\n │ └─ SKILL.md ✅ discovered\n └─ team/\n └─ internal/\n └─ SKILL.md ❌ not discovered by provider loaders\n\nCustom-directory scanning is also non-recursive, so nested paths are ignored unless you point `customDirectories` at that nested parent.\n```\n\n### `SKILL.md` frontmatter\n\nSupported frontmatter fields on the skill type:\n\n- `name?: string`\n- `description?: string`\n- `globs?: string[]`\n- `alwaysApply?: boolean`\n- `hide?: boolean`\n- `disableModelInvocation?: boolean` (Agent Skills equivalent of `hide`; normalized from kebab-case `disable-model-invocation`)\n- additional keys are preserved as unknown metadata\n\nCurrent runtime behavior:\n\n- `name` defaults to the skill directory name\n- `description` is required for:\n - native `.omp` provider skill discovery (`requireDescription: true`)\n - `omp-plugins` extension-package skills and the `github` provider (`.github/skills/`), which also pass `requireDescription: true`\n - `skills.customDirectories` scans via `scanSkillsFromDir` in `src/discovery/helpers.ts` (non-recursive)\n- the claude/codex/agents/opencode/claude-plugins providers can load skills without description\n\n## Discovery pipeline\n\n`loadSkills()` in `src/extensibility/skills.ts` does two passes:\n\n1. **Capability providers** via `loadCapability(\"skills\")`\n2. **Custom directories** via `scanSkillsFromDir(..., { requireDescription: true })` (one-level directory enumeration)\n\nIf `skills.enabled` is `false`, discovery returns no skills.\n\n### Built-in skill providers and precedence\n\nProvider ordering is priority-first (higher wins), then registration order for ties.\n\nCurrent registered skill providers:\n\n1. `native` (priority 100) — `.omp` user/project skills via `src/discovery/builtin.ts`\n2. `omp-plugins` (priority 90) — `skills/` bundled next to extension packages loaded through `extensions:`, `--extension`/`-e`, or installed plugins under `~/.omp/plugins/node_modules`\n3. `claude` (priority 80)\n4. priority 70 group (in registration order):\n - `claude-plugins`\n - `agents`\n - `codex`\n5. `opencode` (priority 55)\n6. `github` (priority 30) — `.github/skills/<name>/SKILL.md` (GitHub Agent Skills layout, project-only)\n\nDedup key is skill name. First item with a given name wins.\n\n### Source toggles and filtering\n\n`loadSkills()` applies these controls:\n\n- source toggles: `enableCodexUser`, `enableClaudeUser`, `enableClaudeProject`, `enablePiUser`, `enablePiProject`, `enableAgentsUser`, `enableAgentsProject`\n- `disabledExtensions` entries with `skill:<name>`\n- `ignoredSkills` (exclude; glob patterns)\n- `includeSkills` (include allowlist; glob patterns; empty means include all)\n\nFilter order is:\n\n1. not disabled by `disabledExtensions`\n2. source enabled\n3. not ignored\n4. included (if include list present)\n\nThe `agents` provider (`.agent[s]/skills`) is the canonical OMP-native location and has its own `enableAgentsUser`/`enableAgentsProject` toggles — disabling Claude/Codex/Pi does **not** turn it off. For providers without a dedicated toggle (`claude-plugins`, `opencode`, `gemini`, `github`, …), enablement falls back to: enabled if **any** named source toggle is enabled.\n\n### Collision and duplicate handling\n\n- Capability dedup already keeps first skill per name (highest-precedence provider)\n- `extensibility/skills.ts` additionally:\n - de-duplicates identical files by `realpath` (symlink-safe)\n - emits collision warnings when a later skill name conflicts\n - keeps the convenience `loadSkillsFromDir({ dir, source })` API as a thin adapter over `scanSkillsFromDir`\n- Custom-directory skills are merged after provider skills and follow the same collision behavior\n\n## Runtime usage behavior\n\n### System prompt exposure\n\nSystem prompt construction (`src/system-prompt.ts`) uses discovered skills as follows:\n\n- if `read` tool is available:\n - include discovered skills list in prompt, excluding skills with `hide: true`\n- otherwise:\n - omit discovered list\n\n`hide: true` does not disable the skill. Hidden skills are still loaded and remain reachable through `skill://<name>` and `/skill:<name>` when skill commands are enabled.\n\nTask tool subagents receive the session's discovered/provided skills list via normal session creation; there is no per-task skill pinning override.\n\n### Interactive `/skill:<name>` commands\n\nIf `skills.enableSkillCommands` is true, interactive mode registers one slash command per discovered skill.\n\n`/skill:<name> [args]` behavior:\n\n- reads the skill file directly from `filePath`\n- strips frontmatter\n- injects skill body as a custom message\n- delivery mode follows the **submission keybinding**:\n - **Enter** → invokes the skill on the `steer` queue while streaming (matches free-text Enter, which also steers), or as a normal idle prompt when the agent is not streaming\n - **Ctrl+Enter** (`app.message.followUp`) → invokes the skill on the `followUp` queue while streaming, or as a normal idle prompt when the agent is not streaming\n- appends metadata (`Skill: <path>`, optional `User: <args>`)\n\nThere is no flag, mode-selector, or frontmatter knob to override this — the keybinding _is_ the choice, identical to how free text is routed during streaming (`input-controller.ts:436-442` for Enter, `input-controller.ts:770-775` for Ctrl+Enter; both dispatch through `#invokeSkillCommand`).\n\n## `skill://` URL behavior\n\n`src/internal-urls/skill-protocol.ts` supports:\n\n- `skill://<name>` → resolves to that skill's `SKILL.md`\n- `skill://<name>/<relative-path>` → resolves inside that skill directory\n\n```text\nskill:// URL resolution\n\nskill://pdf\n -> <pdf-base>/SKILL.md\n\nskill://pdf/references/tables.md\n -> <pdf-base>/references/tables.md\n\nGuards:\n- reject absolute paths\n- reject `..` traversal\n- reject any resolved path escaping <pdf-base>\n```\n\nResolution details:\n\n- skill name must match exactly\n- relative paths are URL-decoded\n- absolute paths are rejected\n- path traversal (`..`) is rejected\n- resolved path must remain within `baseDir`\n- missing files return an explicit `File not found` error\n\nContent type:\n\n- `.md` => `text/markdown`\n- everything else => `text/plain`\n\nNo fallback search is performed for missing assets.\n\n## Skills vs AGENTS.md, commands, tools, hooks\n\n### Skills vs AGENTS.md\n\n- **Skills**: named, optional capability packs selected by task context or explicitly requested\n- **AGENTS.md/context files**: persistent instruction files loaded as context-file capability and merged by level/depth rules\n\n`src/discovery/agents-md.ts` specifically walks ancestor directories from `cwd` to discover standalone `AGENTS.md` files (stopping at the repo root, or home when no repo root is known), skipping files whose containing directory name starts with a dot.\n\n### Skills vs slash commands\n\n- **Skills**: model-readable knowledge/workflow content\n- **Slash commands**: user-invoked command entry points\n- `/skill:<name>` is a convenience wrapper that injects skill text; it does not change skill discovery semantics\n\n### Skills vs custom tools\n\n- **Skills**: documentation/workflow content loaded through prompt context and `read`\n- **Custom tools**: executable tool APIs callable by the model with schemas and runtime side effects\n\n### Skills vs hooks\n\n- **Skills**: passive content\n- **Hooks**: event-driven runtime interceptors that can block/modify behavior during execution\n\n## Practical authoring guidance tied to discovery logic\n\n- Put each skill in its own directory: `<skills-root>/<skill-name>/SKILL.md`\n- Always include explicit `name` and `description` frontmatter\n- Keep referenced assets under the same skill directory and access with `skill://<name>/...`\n- For nested taxonomy (`team/domain/skill`), point `skills.customDirectories` to the nested parent directory; scanning itself remains non-recursive\n- Avoid duplicate skill names across sources; first match wins by provider precedence\n",
|
|
67
67
|
"skills/authoring-extensions.md": "---\nname: authoring-extensions\ndescription: Use when creating a new omp extension. Covers ExtensionAPI, factory signature, tool/command/event registration, and local-dev testing.\n---\n\n# Authoring Extensions\n\nExtensions are the primary way to add capabilities to `oh-my-pi`. A single extension module can register tools the LLM can call, slash commands users can invoke, and event handlers that run throughout the session lifecycle — all from one TypeScript file.\n\n## Minimum viable extension\n\n```ts\nimport type { ExtensionAPI } from \"@oh-my-pi/pi-coding-agent\";\n\nexport default function (pi: ExtensionAPI) {\n pi.on(\"session_start\", async (_event, ctx) => {\n ctx.ui.notify(\"My extension loaded!\", \"info\");\n });\n}\n```\n\nThat is a working extension. Drop it into `~/.omp/agent/extensions/hello.ts` and restart omp to see the notification.\n\n## Full example\n\nThe following extension registers a slash command, a tool, and a session-start hook:\n\n```ts\nimport type { ExtensionAPI } from \"@oh-my-pi/pi-coding-agent\";\n\nexport default function myExtension(pi: ExtensionAPI) {\n const z = pi.zod;\n\n // Runs once when the session loads\n pi.on(\"session_start\", async (_event, ctx) => {\n ctx.ui.notify(`Session ready in ${ctx.cwd}`, \"info\");\n });\n\n // Slash command: /greet\n pi.registerCommand(\"greet\", {\n description: \"Send a greeting into the conversation\",\n handler: async (args, ctx) => {\n const name = args.trim() || \"world\";\n pi.sendMessage(\n {\n customType: \"greeting\",\n content: `Hello, ${name}!`,\n display: true,\n attribution: \"user\",\n },\n { triggerTurn: false }\n );\n ctx.ui.notify(`Greeted ${name}`, \"info\");\n },\n });\n\n // LLM-callable tool\n pi.registerTool({\n name: \"word_count\",\n label: \"Word Count\",\n description: \"Count the words in a string\",\n parameters: z.object({\n text: z.string().describe(\"Text to count\"),\n }),\n async execute(_id, params, _signal, _onUpdate, _ctx) {\n const count = params.text.split(/\\s+/).filter(Boolean).length;\n return {\n content: [{ type: \"text\", text: String(count) }],\n details: { count },\n };\n },\n });\n}\n```\n\n## Discovery paths\n\nomp loads extension modules from these sources:\n\n1. Native `.omp` locations discovered through the capability system:\n - `<cwd>/.omp/extensions/`\n - `~/.omp/agent/extensions/`\n - legacy extension paths listed in `.omp/settings.json#extensions` or `~/.omp/agent/settings.json#extensions`\n2. Installed plugins under `~/.omp/plugins/node_modules` (`omp plugin install` npm/git specs, or `omp plugin link`) via their `omp.extensions`/`pi.extensions` manifests. Marketplace cache installs do not feed extension modules — they surface skills/commands/hooks/tools/MCP only.\n3. Explicit configured paths passed by the CLI (`omp --extension ./my-ext.ts`, also `-e`; `--hook` is treated as an alias) and by the `extensions:` setting in config.\n\nThe runtime de-duplicates by resolved absolute path — first seen wins.\n\nWhen a path points to a directory, omp resolves the entry point in this order:\n\n1. `package.json` with `omp.extensions` (or legacy `pi.extensions`) field\n2. `index.ts`\n3. `index.js`\n\nWhen scanning an `extensions/` directory, omp also loads direct `*.ts`/`*.js` files and one-level subdirectories that have `index.ts`, `index.js`, or a manifest.\n\nExtension packages can also bundle sibling capability directories. When a package is loaded through `extensions:` or `--extension`/`-e`, the `omp-plugins` provider discovers its `skills/`, `hooks/pre|post/`, `tools/`, `commands/`, `rules/`, `prompts/`, and `.mcp.json`.\n\n## package.json manifest\n\nTo package an extension as an installable plugin, add an `omp` field to `package.json`:\n\n```json\n{\n \"name\": \"my-omp-extension\",\n \"omp\": {\n \"extensions\": [\"./src/main.ts\"]\n }\n}\n```\n\nThe legacy `pi` key is also accepted for backwards compatibility:\n\n```json\n{\n \"pi\": {\n \"extensions\": [\"./index.ts\"]\n }\n}\n```\n\nMultiple entry points are supported:\n\n```json\n{\n \"omp\": {\n \"extensions\": [\"./src/safety.ts\", \"./src/tools.ts\"]\n }\n}\n```\n\n## Registering commands\n\n```ts\npi.registerCommand(\"my-cmd\", {\n description: \"What the command does\",\n handler: async (args, ctx) => {\n // args: everything the user typed after /my-cmd\n // ctx: ExtensionCommandContext — includes ctx.ui, ctx.cwd, session controls\n ctx.ui.notify(\"Running!\", \"info\");\n await ctx.waitForIdle();\n await ctx.newSession();\n },\n});\n```\n\n`ExtensionCommandContext` session-control methods (safe to call from commands only):\n\n| Method | Effect |\n|---|---|\n| `waitForIdle()` | Wait for the agent to finish streaming |\n| `newSession(opts?)` | Open a fresh session |\n| `switchSession(path)` | Switch to an existing session file |\n| `branch(entryId)` | Fork from a specific history entry |\n| `navigateTree(id, opts?)` | Jump to a different point in the session tree |\n| `reload()` | Reload the session runtime |\n| `compact(opts?)` | Compact the current context |\n\n## Registering tools\n\nTools are called by the LLM. Parameters use [Zod](https://zod.dev) schemas, available at `pi.zod`:\n\n```ts\nconst z = pi.zod;\n\npi.registerTool({\n name: \"search_notes\", // snake_case, unique\n label: \"Search Notes\", // human-readable label for TUI\n description: \"Full-text search through project notes\",\n parameters: z.object({\n query: z.string().describe(\"Search query\"),\n limit: z.number().default(10).describe(\"Max results\").optional(),\n }),\n async execute(toolCallId, params, signal, onUpdate, ctx) {\n if (signal?.aborted) {\n return { content: [{ type: \"text\", text: \"Cancelled\" }] };\n }\n onUpdate?.({ content: [{ type: \"text\", text: \"Searching...\" }] });\n // ... do work ...\n return {\n content: [{ type: \"text\", text: `Found N results for \"${params.query}\"` }],\n details: { query: params.query, count: 0 },\n };\n },\n});\n```\n\n## Subscribing to events\n\n```ts\npi.on(\"tool_call\", async (event, ctx) => {\n // event.toolName, event.input, event.toolCallId\n if (event.toolName !== \"bash\") return;\n\n const command = String((event.input as { command?: unknown }).command ?? \"\");\n if (command.includes(\"rm -rf /\")) {\n return { block: true, reason: \"Blocked by safety policy\" };\n }\n});\n\npi.on(\"turn_end\", async (_event, ctx) => {\n ctx.ui.setStatus(\"tokens\", `~${ctx.getContextUsage()?.tokens ?? \"?\"} tokens`);\n});\n```\n\nFull event catalog: see [hooks authoring guide](./authoring-hooks.md).\n\n## Extension vs hook — when to use which\n\n| Need | Use |\n|---|---|\n| Tools + commands + events in one module | **Extension** (`ExtensionAPI`) |\n| Pure event interception (policy, redaction) | **Extension** or **Hook** (both work; extension is preferred) |\n| Legacy hook module already exists | **Hook** (`HookAPI` from `@oh-my-pi/pi-coding-agent/extensibility/hooks`) |\n| Registering provider / custom message renderer | **Extension only** |\n| Shipping as a marketplace plugin | **Extension** (use `package.json` manifest) |\n\nExtensions are a strict superset of hooks. New authoring should use `ExtensionAPI`.\n\n## Debugging\n\nStart omp with `--log-level debug` to see extension load diagnostics:\n\n```\nomp --log-level debug\n```\n\nFailed extension loads are logged with their path and error. Loaded extensions may also emit their own debug logs via `pi.logger`.\n\nTo temporarily disable a specific extension module by name without removing the file:\n\n```yaml\n# ~/.omp/agent/config.yml\ndisabledExtensions:\n - extension-module:my-ext\n```\n\nThe derived name is the filename stem (or directory name for `index.ts`-style entries): `/path/to/my-ext.ts` → `my-ext`.\n\n## Important constraints\n\n- **Do not call runtime actions during load.** Methods like `pi.sendMessage()` throw `ExtensionRuntimeNotInitializedError` if called synchronously during module evaluation (before a session is active). Register handlers/tools/commands during load; perform runtime actions only from event handlers, tools, or commands.\n- **`tool_call` errors are fail-closed.** If a `tool_call` handler throws, the tool is blocked.\n- **Command names must not clash with built-ins.** Conflicts are skipped with a diagnostic log.\n- **Reserved shortcuts are ignored** (`ctrl+c`, `ctrl+d`, `ctrl+z`, `ctrl+k`, `ctrl+p`, `ctrl+l`, `ctrl+o`, `ctrl+t`, `ctrl+g`, `ctrl+q`, `alt+m`, `shift+tab`, `shift+ctrl+p`, `alt+enter`, `escape`, `enter`).\n\n## Further reading\n\n- `docs/extensions.md` — runtime internals and full API surface reference\n- `docs/extension-loading.md` — detailed path resolution rules\n- `docs/hooks.md` — hook subsystem internals\n- `docs/skills/examples/hello-extension/` — complete working example\n",
|
|
68
68
|
"skills/authoring-hooks.md": "---\nname: authoring-hooks\ndescription: Use when creating a new omp hook. Covers HookAPI, event catalog, blocking/overriding tool calls, and context modification.\n---\n\n# Authoring Hooks\n\nHooks are event-driven interceptors that run alongside the agent loop. They are best used for cross-cutting concerns: safety policy, secret redaction, context pruning, audit logging. A hook module registers handlers via `pi.on(event, handler)` and can block tool execution, override tool output, or rewrite the message context before each LLM call.\n\n> **Relationship to extensions:** The hook subsystem (`HookAPI`) is the legacy API. The extension runner now handles everything hooks can do plus more. `ExtensionAPI` supports the hook event model plus extension-only events. Use `ExtensionAPI` for new work; use `HookAPI` only if you are maintaining an existing hook module.\n\n## Factory signature\n\n```ts\nimport type { HookAPI } from \"@oh-my-pi/pi-coding-agent/extensibility/hooks\";\n\nexport default function myHook(omp: HookAPI): void {\n omp.on(\"tool_call\", async (event, ctx) => {\n // intercept every tool call\n });\n}\n```\n\nThe default export must be a plain function (not async, not a class). It receives a `HookAPI` instance and must register all handlers synchronously during execution.\n\nAlternatively, using `ExtensionAPI` (preferred):\n\n```ts\nimport type { ExtensionAPI } from \"@oh-my-pi/pi-coding-agent\";\n\nexport default function myExtension(pi: ExtensionAPI): void {\n pi.on(\"tool_call\", async (event, ctx) => { /* ... */ });\n}\n```\n\n## Event catalog\n\n### Tool lifecycle\n\n| Event | Fires | Can return |\n|---|---|---|\n| `tool_call` | Before every tool execution | `{ block?: boolean; reason?: string }` |\n| `tool_result` | After every tool execution | `{ content?; details?; isError?: boolean }` |\n\n### Session lifecycle\n\n| Event | Fires | Can return |\n|---|---|---|\n| `session_start` | On initial session load | — |\n| `session_before_switch` | Before session switch | `{ cancel?: boolean }` |\n| `session_switch` | After session switch | — |\n| `session_before_branch` | Before session branch | `{ cancel?: boolean; skipConversationRestore?: boolean }` |\n| `session_branch` | After session branch | — |\n| `session_before_compact` | Before compaction | `{ cancel?: boolean; compaction?: CompactionResult }` |\n| `session.compacting` | During compaction (inject context) | `{ context?: string[]; prompt?: string; preserveData?: Record<string, unknown> }` |\n| `session_compact` | After compaction | — |\n| `session_before_tree` | Before tree navigation | `{ cancel?: boolean; summary?: { summary: string; details?: unknown } }` |\n| `session_tree` | After tree navigation | — |\n| `session_shutdown` | On session shutdown | — |\n\n### Agent/turn lifecycle\n\n| Event | Fires | Can return |\n|---|---|---|\n| `before_agent_start` | Before agent starts a turn | `{ message?: { customType; content; display; details; attribution? } }` |\n| `agent_start` | Agent streaming starts | — |\n| `agent_end` | Agent streaming ends | — |\n| `turn_start` | Start of a user→agent turn | — |\n| `turn_end` | End of a user→agent turn | — |\n| `context` | Before each LLM API call | `{ messages?: Message[] }` |\n| `auto_compaction_start` | Auto-compaction begins | — |\n| `auto_compaction_end` | Auto-compaction ends | — |\n| `auto_retry_start` | Auto-retry begins | — |\n| `auto_retry_end` | Auto-retry ends | — |\n| `ttsr_triggered` | TTSR (too-short response) triggered | — |\n| `todo_reminder` | Todo reminder fires | — |\n\nExtension-only events such as `tool_execution_start`, `tool_execution_update`, `tool_execution_end`, `input`, `user_bash`, and `user_python` require `ExtensionAPI`.\n\n## Pre-tool blocking contract\n\nReturn `{ block: true, reason: \"...\" }` from a `tool_call` handler to prevent execution:\n\n```ts\nomp.on(\"tool_call\", async (event, ctx) => {\n if (event.toolName === \"bash\") {\n const cmd = String(event.input.command ?? \"\");\n if (/\\brm\\s+-rf\\s+\\//.test(cmd)) {\n return { block: true, reason: \"Refusing to delete root filesystem\" };\n }\n }\n});\n```\n\nContract:\n\n- If **any** handler returns `{ block: true }`, execution stops immediately.\n- `reason` is returned to the LLM as the tool error text.\n- If a handler **throws**, the tool is also blocked (fail-closed).\n- Last non-blocking return wins for non-blocking results; first `block: true` short-circuits.\n\n## Post-tool override contract\n\nReturn `{ content, details, isError }` from a `tool_result` handler to patch what the LLM sees:\n\n```ts\nomp.on(\"tool_result\", async (event, ctx) => {\n if (event.toolName === \"read\" && !event.isError) {\n const redacted = event.content.map(chunk => {\n if (chunk.type !== \"text\") return chunk;\n return {\n ...chunk,\n text: chunk.text.replace(/(?:sk|pk)-[a-zA-Z0-9]{20,}/g, \"[REDACTED_API_KEY]\"),\n };\n });\n return { content: redacted };\n }\n});\n```\n\nContract:\n\n- Handlers run in registration order. For `HookAPI`, each handler receives the original tool result event, and the last returned override wins.\n- `content` replaces the full content array for the LLM.\n- `details` replaces the structured details object.\n- `isError` exists on the shared result type, but `HookToolWrapper` does not propagate it into a successful tool result; on a tool failure, the original error is rethrown after handlers complete.\n- On a tool failure, `tool_result` is still emitted with `isError: true`.\n\n## Context modification contract\n\nReturn `{ messages: [...] }` from a `context` handler to rewrite the message list before each LLM API call:\n\n```ts\nomp.on(\"context\", async (event, ctx) => {\n // Remove debug-only custom messages from LLM context\n const filtered = event.messages.filter(\n msg => !(msg.role === \"custom\" && msg.customType === \"debug-only\")\n );\n return { messages: filtered };\n});\n```\n\nContract:\n\n- `event.messages` is the current accumulated list.\n- Handlers run in order; each receives the output of the previous handler.\n- Return `undefined` (or nothing) to pass messages through unmodified.\n\n## Three complete examples\n\n### 1. rm-rf blocker\n\n```ts\nimport type { HookAPI } from \"@oh-my-pi/pi-coding-agent/extensibility/hooks\";\n\nexport default function rmRfBlocker(omp: HookAPI): void {\n omp.on(\"tool_call\", async (event, ctx) => {\n if (event.toolName !== \"bash\") return;\n\n const cmd = String(event.input.command ?? \"\");\n if (!/\\brm\\s+-rf\\s+\\//.test(cmd)) return;\n\n // Allow if user explicitly confirms (interactive mode only)\n if (ctx.hasUI) {\n const allow = await ctx.ui.confirm(\n \"Dangerous command\",\n `This command deletes from root:\\n${cmd}\\n\\nProceed?`\n );\n if (allow) return;\n }\n\n return { block: true, reason: \"rm -rf / blocked by safety policy\" };\n });\n}\n```\n\n### 2. API-key redactor\n\n```ts\nimport type { HookAPI } from \"@oh-my-pi/pi-coding-agent/extensibility/hooks\";\n\n// Common API-key shapes. Not exhaustive — providers using bespoke formats\n// (Anthropic `sk-ant-…`, JWT-style bearers, gateway-specific prefixes, etc.)\n// need their own entries.\nconst SECRET_PATTERNS = [\n /\\b(sk|pk)-[a-zA-Z0-9]{20,}\\b/g,\n /\\bAKIA[A-Z0-9]{16}\\b/g,\n /\\bghp_[a-zA-Z0-9]{36}\\b/g,\n // Zhipu / GLM Coding Plan: `<id>.<secret>` (no `sk-` prefix).\n /\\b[a-zA-Z0-9]{16,}\\.[a-zA-Z0-9]{16,}\\b/g,\n /\\b[a-zA-Z0-9_-]{20,}\\s*=\\s*[\"']?[a-zA-Z0-9._/+=-]{20,}[\"']?/g,\n];\n\nexport default function apiKeyRedactor(omp: HookAPI): void {\n omp.on(\"tool_result\", async (event) => {\n if (event.isError) return;\n\n let changed = false;\n const redacted = event.content.map(chunk => {\n if (chunk.type !== \"text\") return chunk;\n let text = chunk.text;\n for (const pattern of SECRET_PATTERNS) {\n const next = text.replace(pattern, \"[REDACTED]\");\n if (next !== text) { changed = true; text = next; }\n }\n return { ...chunk, text };\n });\n\n if (changed) return { content: redacted };\n });\n}\n```\n\n### 3. Context filter\n\n```ts\nimport type { HookAPI } from \"@oh-my-pi/pi-coding-agent/extensibility/hooks\";\n\nexport default function contextFilter(omp: HookAPI): void {\n omp.on(\"context\", async (event) => {\n const MAX_TOOL_OUTPUT_CHARS = 8_000;\n\n const trimmed = event.messages.map(msg => {\n // Truncate very large tool results to keep context manageable\n if (msg.role !== \"tool\") return msg;\n const content = msg.content.map(chunk => {\n if (chunk.type !== \"text\" || chunk.text.length <= MAX_TOOL_OUTPUT_CHARS) return chunk;\n return {\n ...chunk,\n text: chunk.text.slice(0, MAX_TOOL_OUTPUT_CHARS) + \"\\n[... truncated by context-filter hook]\",\n };\n });\n return { ...msg, content };\n });\n\n return { messages: trimmed };\n });\n}\n```\n\n## UI methods in hook context\n\n`ctx.ui` is a `HookUIContext`. Available methods:\n\n| Method | Description |\n|---|---|\n| `notify(message, type?)` | Show an in-app notification |\n| `setStatus(key, text)` | Set footer status text (keyed, sorted by key) |\n| `select(title, options)` | Show a selection dialog |\n| `confirm(title, message)` | Show a yes/no dialog |\n| `input(title, placeholder?)` | Show a text input dialog |\n| `editor(title, prefill?, { signal }?, { promptStyle }?)` | Show a multi-line editor |\n| `setEditorText(text)` | Set the input editor content |\n| `getEditorText()` | Get current input editor content |\n| `custom(factory)` | Render a custom TUI component |\n| `theme` | Current theme object |\n\nPass `{ promptStyle: true }` as the fourth argument when Enter should submit and Shift+Enter should insert a newline. The default hook editor behavior keeps Enter as newline and Ctrl+Enter as submit.\n\n`ctx.hasUI` is `false` in headless/print/subagent mode — always guard interactive calls.\n\n## Further reading\n\n- `docs/hooks.md` — hook subsystem internals, ordering rules, error propagation\n- `docs/extensions.md` — `ExtensionAPI` (superset of `HookAPI`)\n- `docs/skills/examples/safety-hook/` — complete working example\n",
|
|
69
|
-
"skills/authoring-marketplaces.md": "---\nname: authoring-marketplaces\ndescription: Use when creating a new omp marketplace. Covers marketplace.json schema, source types, install commands, and publishing.\n---\n\n# Authoring Marketplaces\n\nA marketplace is a Git repository (or local directory) that contains a catalog file at `.claude-plugin/marketplace.json
|
|
69
|
+
"skills/authoring-marketplaces.md": "---\nname: authoring-marketplaces\ndescription: Use when creating a new omp marketplace. Covers marketplace.json schema, source types, install commands, and publishing.\n---\n\n# Authoring Marketplaces\n\nA marketplace is a Git repository (or local directory) that contains a catalog file at either `.omp-plugin/marketplace.json` (preferred for omp-specific catalogs) or `.claude-plugin/marketplace.json` (Claude Code-compatible; used as the fallback). Anyone can author one. Users add it with `/marketplace add owner/repo` and then install individual plugins from it.\n\n## Minimum viable marketplace\n\n```\nmy-marketplace/\n .claude-plugin/\n marketplace.json\n plugins/\n my-plugin/\n skills/\n my-skill/\n SKILL.md\n```\n\n```json\n{\n \"name\": \"my-marketplace\",\n \"owner\": { \"name\": \"Your Name\" },\n \"plugins\": [\n {\n \"name\": \"my-plugin\",\n \"description\": \"What it does\",\n \"source\": \"./plugins/my-plugin\"\n }\n ]\n}\n```\n\nPush to GitHub. Users install with:\n\n```\n/marketplace add your-github-username/my-marketplace\n/marketplace install my-plugin@my-marketplace\n```\n\n## marketplace.json schema\n\nThe catalog file lives at either `.omp-plugin/marketplace.json` or `.claude-plugin/marketplace.json` in the repository root. omp prefers the `.omp-plugin/` path and falls back to the Claude path; a repository may publish both to expose tool-specific catalogs from a single source tree.\n\n### Top-level fields\n\n| Field | Required | Description |\n|---|---|---|\n| `name` | yes | Marketplace name. Lowercase alphanumeric, hyphens, dots. Must start and end with alphanumeric. Max 64 chars. |\n| `owner` | yes | Object with at minimum `owner.name` (string) |\n| `owner.name` | yes | Marketplace owner name |\n| `owner.email` | no | Owner contact email |\n| `plugins` | yes | Array of plugin entries (see below) |\n| `metadata.description` | no | Short description of the marketplace |\n| `metadata.version` | no | Catalog metadata version string |\n| `metadata.pluginRoot` | no | String prepended to all relative plugin source paths |\n| extra top-level fields | no | Preserved by the parser but not used by marketplace install/runtime logic |\n\n### Plugin entry fields\n\n| Field | Required | Description |\n|---|---|---|\n| `name` | yes | Plugin name (same naming rules as marketplace name) |\n| `source` | yes | Where to find the plugin — string or object (see source types below) |\n| `description` | no | Short plugin description |\n| `version` | no | Version string |\n| `author` | no | `{ name, email? }` |\n| `homepage` | no | URL |\n| `category` | no | e.g. `development`, `productivity`, `security` |\n| `tags` / `keywords` | no | Arrays of string tags/keywords |\n| `repository` | no | Repository URL |\n| `license` | no | License string |\n| `strict` | no | Boolean plugin metadata flag |\n| `commands`, `agents`, `hooks`, `mcpServers`, `lspServers` | no | Capability metadata used by plugin tooling and selectors |\n\n### Full catalog example\n\n```json\n{\n \"$schema\": \"https://anthropic.com/claude-code/marketplace.schema.json\",\n \"name\": \"acme-plugins\",\n \"owner\": {\n \"name\": \"Acme Corp\",\n \"email\": \"plugins@acme.example\"\n },\n \"metadata\": {\n \"description\": \"Official Acme plugins for oh-my-pi\"\n },\n \"plugins\": [\n {\n \"name\": \"acme-linter\",\n \"description\": \"Enforce Acme coding standards\",\n \"category\": \"development\",\n \"source\": \"./plugins/linter\"\n },\n {\n \"name\": \"acme-deploy\",\n \"description\": \"One-command deploy to Acme cloud\",\n \"category\": \"devops\",\n \"source\": {\n \"source\": \"github\",\n \"repo\": \"acme-corp/omp-deploy-plugin\",\n \"ref\": \"main\"\n }\n }\n ]\n}\n```\n\n## Plugin source types\n\n### 1. Relative path string\n\nPoints to a subdirectory inside the marketplace repository itself. Must start with `./`.\n\n```json\n\"source\": \"./plugins/my-plugin\"\n```\n\nThe path is resolved relative to the marketplace repository root. Path traversal outside the repo root is rejected.\n\nUse `metadata.pluginRoot` to avoid repeating a common prefix:\n\n```json\n{\n \"metadata\": { \"pluginRoot\": \"./plugins\" },\n \"plugins\": [\n { \"name\": \"plugin-a\", \"source\": \"./plugin-a\" },\n { \"name\": \"plugin-b\", \"source\": \"./plugin-b\" }\n ]\n}\n```\n\n### 2. Git URL\n\nA full Git repository URL. Optionally pin to a branch/tag (`ref`) or exact commit (`sha`):\n\n```json\n\"source\": {\n \"source\": \"url\",\n \"url\": \"https://github.com/org/my-plugin.git\",\n \"ref\": \"main\",\n \"sha\": \"a1b2c3d4...\"\n}\n```\n\n### 3. GitHub shorthand\n\nShorthand for GitHub repositories. Functionally equivalent to a Git URL but more concise:\n\n```json\n\"source\": {\n \"source\": \"github\",\n \"repo\": \"org/my-plugin\",\n \"ref\": \"v2.1.0\",\n \"sha\": \"a1b2c3d4...\"\n}\n```\n\n### 4. Git subdirectory (monorepo)\n\nFor plugins living inside a subdirectory of a larger repository. `url` accepts a full HTTPS URL or a GitHub `owner/repo` shorthand:\n\n```json\n\"source\": {\n \"source\": \"git-subdir\",\n \"url\": \"https://github.com/org/monorepo.git\",\n \"path\": \"packages/my-plugin\",\n \"ref\": \"main\",\n \"sha\": \"a1b2c3d4...\"\n}\n```\n\nThe `path` must resolve inside the cloned repository — directory escape is rejected.\n\n### 5. NPM package\n\nDeclares the plugin as an npm package. `version` is optional:\n\n```json\n\"source\": {\n \"source\": \"npm\",\n \"package\": \"@acme/omp-plugin\",\n \"version\": \"1.2.0\"\n}\n```\n\n> Note: npm plugin sources are declared in the schema but installation support is not yet fully implemented. Use Git-based sources for plugins that need to work today.\n\n## Plugin structure\n\nA plugin directory (regardless of source type) ships its content in conventional locations, all optional:\n\n```\nmy-plugin/\n skills/<name>/SKILL.md ← skills\n commands/*.md ← slash commands\n agents/*.md ← subagent definitions\n hooks/pre/, hooks/post/ ← hooks\n tools/ ← custom tools\n .mcp.json ← MCP server definitions\n package.json ← optional; its version is a fallback when the catalog entry has no version\n README.md ← recommended: description + usage\n```\n\n> Note: extension modules declared via `package.json` `omp.extensions` are **not** loaded from marketplace installs — that mechanism only applies to npm-installed or `omp plugin link`ed plugins. Ship marketplace plugin behavior through the conventional directories above.\n\n## Install command\n\n```\n/marketplace install name@marketplace-name\n/marketplace install --force name@marketplace-name # reinstall\n/marketplace install --scope project name@marketplace # project-scoped\n```\n\nCLI equivalent:\n\n```\nomp plugin marketplace add owner/repo\nomp plugin install name@marketplace-name\n```\n\nScope behavior:\n\n- **user** (default) — installed in `~/.omp/plugins/installed_plugins.json`, available in all projects\n- **project** — installed in `<project>/.omp/plugins/installed_plugins.json`, available only in that project\n\nProject-scoped installs shadow user-scoped installs of the same plugin name.\n\n## Naming rules\n\nMarketplace names and plugin names must:\n\n- Contain only lowercase letters, digits, hyphens (`-`), and dots (`.`)\n- Start and end with a lowercase letter or digit\n- Be at most 64 characters\n\nPlugin IDs (`name@marketplace`) must be at most 128 characters total.\n\nValid: `my-plugin`, `code-review`, `acme.tools`, `ai-v2`\nInvalid: `-bad-start`, `bad-end-`, `.dot-start`, `Under_score`, `HAS_CAPS`\n\n## Publishing workflow\n\n1. Create `marketplace.json` at `.omp-plugin/marketplace.json` (omp-only) or `.claude-plugin/marketplace.json` (shared with Claude Code) in a new Git repo.\n2. Add plugin entries pointing to subdirectories (or external sources).\n3. Push to GitHub.\n4. Share the `owner/repo` string. Users add it with `/marketplace add owner/repo`.\n5. When you update the catalog, users run `/marketplace update your-marketplace-name` to pull the latest.\n\nTo test locally before publishing:\n\n```\n/marketplace add ./path/to/my-marketplace\n```\n\nLocal path sources also accept `~/` and absolute paths.\n\n## Further reading\n\n- `docs/marketplace.md` — marketplace system internals, on-disk layout, command reference\n- `docs/skills/authoring-extensions.md` — how to author the extension modules inside plugins\n- `docs/skills/examples/mini-marketplace/` — minimal working marketplace example\n",
|
|
70
70
|
"skills/examples/hello-extension/README.md": "# hello-extension\n\nA minimal `oh-my-pi` extension that demonstrates the two most common authoring patterns: subscribing to `session_start` to notify on load, and registering a `/hello` slash command that sends a greeting into the conversation. It is intentionally small — use it as a copy-paste starting point for your own extension.\n\n## Install\n\n**Option A — drop into user extensions directory:**\n\n```\ncp -r . ~/.omp/agent/extensions/hello-extension\n```\n\nRestart `omp`. You will see the startup notification immediately.\n\n**Option B — point the settings `extensions` array at it:**\n\n```yaml\n# ~/.omp/agent/config.yml\nextensions:\n - /path/to/hello-extension\n```\n\n**Option C — load once via CLI flag:**\n\n```\nomp --extension ./hello-extension\n```\n\n## Usage\n\nAfter loading, type `/hello` or `/hello Ada` in the omp prompt. The command sends a visible greeting custom message into the conversation and shows a \"Message sent!\" notification.\n\n## What it demonstrates\n\n- Default export factory receiving `ExtensionAPI`\n- `pi.on(\"session_start\", ...)` — session lifecycle hook\n- `pi.registerCommand(...)` — slash command registration\n- `ctx.ui.notify(...)` — user-facing notification\n- `package.json` with `omp.extensions` manifest field\n",
|
|
71
71
|
"skills/examples/mini-marketplace/README.md": "# mini-marketplace\n\nA minimal `oh-my-pi` marketplace catalog that demonstrates the `marketplace.json` format. It lists one plugin (`my-plugin`) using a relative path source.\n\n## Install command\n\n```\n/marketplace add ./docs/skills/examples/mini-marketplace\n/marketplace install my-plugin@example-marketplace\n```\n\nOr from the CLI:\n\n```\nomp plugin marketplace add ./docs/skills/examples/mini-marketplace\nomp plugin install my-plugin@example-marketplace\n```\n\n## What it demonstrates\n\n- Minimum required `marketplace.json` fields: `name`, `owner.name`, `plugins`\n- Relative path plugin source using `./` prefix (`\"source\": \"./my-plugin\"`)\n- Plugin bundled inside the same directory tree as the marketplace catalog\n- Extra catalog metadata: the example includes a top-level `description`; current marketplace parsing preserves extra top-level fields, while runtime behavior uses required fields and plugin entries.\n\n## Structure\n\n```\nmini-marketplace/\n .claude-plugin/\n marketplace.json ← catalog\n README.md\n my-plugin/\n package.json ← omp.extensions manifest\n index.ts ← extension entry point\n```\n\nPublished and local marketplaces use the same catalog location: `.claude-plugin/marketplace.json` inside the marketplace root. Point `/marketplace add` at this folder to load the example.\n",
|
|
72
72
|
"skills/examples/safety-hook/README.md": "# safety-hook\n\nAn `oh-my-pi` extension that demonstrates `tool_call` blocking. It intercepts `bash` tool calls and returns `{ block: true, reason: \"...\" }` when the command contains `rm -rf /` with normal whitespace, preventing the tool from executing.\n\n## What it demonstrates\n\n- `pi.on(\"tool_call\", ...)` — pre-execution interception\n- `return { block: true, reason: \"...\" }` — blocking contract\n- Regex guard on bash input (`/\\brm\\s+-rf\\s+\\//`)\n\n## Install\n\n```\ncp -r . ~/.omp/agent/extensions/safety-hook\n```\n\nRestart `omp`. The hook is active for all sessions.\n\nOr load once:\n\n```\nomp --extension ./safety-hook\n```\n\n## How it works\n\n```\nLLM calls bash tool\n │\n ▼\ntool_call handlers run\n │\n ├─ command matches /\\brm\\s+-rf\\s+\\// ?\n │ yes → { block: true, reason: \"...\" } ← execution stops, reason sent to LLM\n │ no → undefined ← execution continues normally\n ▼\ntool executes (if not blocked)\n```\n\nThe `reason` text is what the LLM receives as the tool error, so it can understand why the call was rejected and try a different approach.\n",
|
|
@@ -85,7 +85,7 @@ export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
|
|
|
85
85
|
"toolconv/qwen3.md": "# Qwen3 tool-calling format (Hermes convention)\n\nTool-calling convention of Alibaba's **Qwen3** family (`Qwen/Qwen3-*`: dense `0.6B–32B` and MoE `30B-A3B`/`235B-A22B`; same template line as `Qwen2.5-*` and `QwQ-32B`). It is the **Hermes** convention — the XML+JSON format originated by NousResearch's Hermes 2 Pro and adopted verbatim by Qwen, plus a long tail of community fine-tunes. The envelope is **ChatML**: every turn is `<|im_start|>{role}\\n{body}<|im_end|>\\n`. Available tools are advertised in the system turn inside a `<tools>…</tools>` block (one JSON spec per line); the model emits each call as a `<tool_call>\\n{json}\\n</tool_call>` block whose `arguments` is a **nested JSON object** (not a stringified JSON); tool results are fed back inside `<tool_response>…</tool_response>`. Hybrid reasoning is carried in `<think>…</think>`. The format ships in the model's own `chat_template`, so an inference server enables it with no extra template: vLLM uses `--enable-auto-tool-choice --tool-call-parser hermes` (pair with `--reasoning-parser deepseek_r1` for the thinking split); SGLang exposes the matching parsers (e.g. `--reasoning-parser qwen3`).\n\nVerified against: Qwen's canonical function-calling guide (`qwen.readthedocs.io/en/latest/framework/function_call.html`, read in full incl. the Qwen-Agent + vLLM sections), the byte-exact `chat_template` field of `Qwen/Qwen3-8B`'s `tokenizer_config.json` (HF resolve-cache commit `b968826d9c46dd6066d109eabc6255188de91218`, rendered locally with Jinja2 for the raw streams below) and its `added_tokens_decoder` for token IDs, the NousResearch `Hermes-Function-Calling` README, and the vLLM tool-calling docs (`hermes` parser + Qwen models section).\n\n## Special tokens\n\nOnly the three ChatML markers are \"special\" control tokens (`special=true`, skipped by `skip_special_tokens`). The reasoning and tool markers are also single vocabulary tokens (one ID each) but are registered with `special=false`, i.e. they render as ordinary text and are **not** stripped by `skip_special_tokens`. The `<tools>`/`</tools>` wrapper has **no** dedicated token at all — it is plain text that BPE-splits into several tokens. IDs are from `Qwen/Qwen3-8B` `added_tokens_decoder`.\n\n| Token (verbatim) | ID | `special` | Purpose |\n|---|---|---|---|\n| `<\\|im_start\\|>` | 151644 | true | Start of a turn; followed immediately by the role name + `\\n` |\n| `<\\|im_end\\|>` | 151645 | true | End of a turn; the chat stop token |\n| `<\\|endoftext\\|>` | 151643 | true | Base EOS / pad token |\n| `<think>` | 151667 | false | Opens the reasoning block |\n| `</think>` | 151668 | false | Closes the reasoning block |\n| `<tool_call>` | 151657 | false | Opens one tool call |\n| `</tool_call>` | 151658 | false | Closes one tool call |\n| `<tool_response>` | 151665 | false | Opens one tool result |\n| `</tool_response>` | 151666 | false | Closes one tool result |\n| `<tools>` … `</tools>` | — | — | Plain text wrapper around the tool list in the system turn (not a single token) |\n\nNotes on exactness:\n- All markers use the ASCII pipe `|` (U+007C) and ASCII angle brackets. Qwen3 has **no** fullwidth (`|` U+FF5C) or `▁` (U+2581) variants — that is DeepSeek/SentencePiece territory, not Qwen.\n- `<|im_start|>` and `<|im_end|>` are the only tokens that matter for splitting turns. Because `<tool_call>`, `</tool_call>`, `<tool_response>`, `<think>`, `</think>` are `special=false`, they survive a `skip_special_tokens=True` decode, which is exactly why the regex-based `hermes` parser can recover them from decoded text.\n- The model card confirms `</think>` = token `151668` (used by the reference parsing snippet `output_ids[::-1].index(151668)`).\n\n## Roles / channels / turn structure\n\nChatML. Each message renders as:\n\n```text\n<|im_start|>{role}\n{body}<|im_end|>\n```\n\n- Roles: `system`, `user`, `assistant`, `tool`. There is no separate \"channel\" concept; the only sub-stream is the `<think>` reasoning block inside an assistant turn.\n- `<|im_end|>\\n` terminates every turn. With `add_generation_prompt=True` the prompt ends with `<|im_start|>assistant\\n` and the model continues from there.\n- **System turn:** if the caller supplies a `system` message it becomes the first turn. When `tools` are present, the tool advertisement is merged **into** that same system turn (the user's system text first, then `\\n\\n`, then the `# Tools` block — see below). Qwen3 injects no default system prompt when none is given.\n- **Tool-result turns use the `user` envelope.** Qwen3's template maps every `role: \"tool\"` message into a `<|im_start|>user` turn carrying `<tool_response>` blocks (consecutive tool messages are coalesced into one user turn). This differs from classic Hermes 2 Pro, which used a dedicated `<|im_start|>tool` turn for results — Qwen folds them into `user`.\n- **Thinking/reasoning:** carried in `<think>…</think>` at the start of an assistant turn (see the Parsing notes for the toggle and the history-rerender rule).\n\n## Tool definitions\n\nTools are advertised inside the system turn. The template emits a fixed preamble, then each tool object serialized with `tool | tojson` (`json.dumps(..., ensure_ascii=False)`) on **its own line**, then a fixed trailer. Each list element is the full OpenAI tool object `{\"type\": \"function\", \"function\": {...}}` (with a JSON-Schema `parameters` object). The exact, verbatim wrapper Qwen3 produces:\n\n```text\n<|im_start|>system\n{optional original system content}\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>\n{\"type\": \"function\", \"function\": {\"name\": \"get_current_temperature\", \"description\": \"Get current temperature at a location.\", \"parameters\": {\"type\": \"object\", \"properties\": {\"location\": {\"type\": \"string\", \"description\": \"The location to get the temperature for, in the format \\\"City, State, Country\\\".\"}, \"unit\": {\"type\": \"string\", \"enum\": [\"celsius\", \"fahrenheit\"], \"description\": \"The unit to return the temperature in. Defaults to \\\"celsius\\\".\"}}, \"required\": [\"location\"]}}}\n{\"type\": \"function\", \"function\": {\"name\": \"get_temperature_date\", \"description\": \"Get temperature at a location and date.\", \"parameters\": {\"type\": \"object\", \"properties\": {\"location\": {\"type\": \"string\", \"description\": \"The location to get the temperature for, in the format \\\"City, State, Country\\\".\"}, \"date\": {\"type\": \"string\", \"description\": \"The date to get the temperature for, in the format \\\"Year-Month-Day\\\".\"}, \"unit\": {\"type\": \"string\", \"enum\": [\"celsius\", \"fahrenheit\"], \"description\": \"The unit to return the temperature in. Defaults to \\\"celsius\\\".\"}}, \"required\": [\"location\", \"date\"]}}}\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n```\n\n- If the first message is a `system` message, its content is placed before `# Tools` (separated by a blank line); otherwise the turn opens straight into `# Tools`.\n- The trailing instruction is a literal part of the prompt, including the placeholder line `{\"name\": <function-name>, \"arguments\": <args-json-object>}` (those angle-bracket tokens are instructions, not emitted output).\n- Version note: the original Hermes 2 Pro system prompt additionally embedded a `FunctionCall` pydantic schema line (`{\"title\": \"FunctionCall\", \"type\": \"object\", \"properties\": {\"name\": …, \"arguments\": …}}`). Qwen3 dropped that line; the wrapper above is exactly what Qwen3 emits.\n\n## Tool-call format\n\nThe model emits each call as a `<tool_call>` line, a single-line JSON object, then `</tool_call>`. Minimal single call:\n\n```text\n<tool_call>\n{\"name\": \"get_current_temperature\", \"arguments\": {\"location\": \"San Francisco, CA, USA\", \"unit\": \"celsius\"}}\n</tool_call>\n```\n\n- `arguments` is a **nested JSON object**, not a JSON-encoded string. On the wire it is `\"arguments\": {\"location\": \"...\"}` — never `\"arguments\": \"{\\\"location\\\": ...}\"`. (The template renders a dict argument via `tojson`; only if a caller stored `arguments` as a pre-serialized string does it pass through verbatim.)\n- The call object has exactly two keys, `name` (string) and `arguments` (object). There is no per-call ID on the wire — the OpenAI-style `tool_call_id` is minted by the server, not the model (see API mapping).\n- A tool-calling assistant turn may also contain natural-language `content` before the first `<tool_call>`; the template inserts a `\\n` between that content and the first call.\n\n## Multiple / parallel tool calls\n\nParallel calls are emitted as consecutive `<tool_call>…</tool_call>` blocks within a single assistant turn, each separated by a newline:\n\n```text\n<|im_start|>assistant\n<tool_call>\n{\"name\": \"get_current_temperature\", \"arguments\": {\"location\": \"San Francisco, CA, USA\"}}\n</tool_call>\n<tool_call>\n{\"name\": \"get_temperature_date\", \"arguments\": {\"location\": \"San Francisco, CA, USA\", \"date\": \"2024-10-01\"}}\n</tool_call><|im_end|>\n```\n\nThe parser returns these as `tool_calls[0]`, `tool_calls[1]`, … in emission order. The application must execute them and return one `<tool_response>` per call, in the same order.\n\n## Tool-result format\n\nEach executed result is wrapped in `<tool_response>…</tool_response>`. Qwen3 places them inside a **`user`** turn, and **coalesces** consecutive tool results into one turn (one `<tool_response>` block per result, newline-separated, a single closing `<|im_end|>`):\n\n```text\n<|im_start|>user\n<tool_response>\n{\"temperature\": 26.1, \"location\": \"San Francisco, CA, USA\", \"unit\": \"celsius\"}\n</tool_response>\n<tool_response>\n{\"temperature\": 25.9, \"location\": \"San Francisco, CA, USA\", \"date\": \"2024-10-01\", \"unit\": \"celsius\"}\n</tool_response><|im_end|>\n```\n\n- The body between the tags is the tool's return value (typically a JSON string, but any text is allowed). The function name is **not** repeated inside Qwen3's `<tool_response>` — ordering ties results to calls. (Classic Hermes 2 Pro instead nested `{\"name\": ..., \"content\": ...}` inside `<tool_response>` under a `tool` turn; Qwen3's template emits the bare content under a `user` turn.)\n- At the OpenAI API layer a result message is `{\"role\": \"tool\", \"content\": \"...\", \"tool_call_id\": \"...\"}`; the template renders only its `content` into a `<tool_response>` block.\n\n## End-to-end example\n\nComplete multi-turn weather exchange in **non-thinking mode** (`enable_thinking=False`), exactly as `apply_chat_template` renders it for the live flow. With thinking disabled, each generation step injects an empty `<think>\\n\\n</think>\\n\\n` after `<|im_start|>assistant\\n`; the model then emits its tool call / final answer. Copy-pasteable, byte-exact:\n\n```text\n<|im_start|>system\nYou are a helpful assistant. Current Date: 2024-09-30.\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>\n{\"type\": \"function\", \"function\": {\"name\": \"get_current_temperature\", \"description\": \"Get current temperature at a location.\", \"parameters\": {\"type\": \"object\", \"properties\": {\"location\": {\"type\": \"string\", \"description\": \"The location to get the temperature for, in the format \\\"City, State, Country\\\".\"}, \"unit\": {\"type\": \"string\", \"enum\": [\"celsius\", \"fahrenheit\"], \"description\": \"The unit to return the temperature in. Defaults to \\\"celsius\\\".\"}}, \"required\": [\"location\"]}}}\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n<|im_start|>user\nWhat's the temperature in San Francisco now?<|im_end|>\n<|im_start|>assistant\n<think>\n\n</think>\n\n<tool_call>\n{\"name\": \"get_current_temperature\", \"arguments\": {\"location\": \"San Francisco, CA, USA\", \"unit\": \"celsius\"}}\n</tool_call><|im_end|>\n<|im_start|>user\n<tool_response>\n{\"temperature\": 26.1, \"location\": \"San Francisco, CA, USA\", \"unit\": \"celsius\"}\n</tool_response><|im_end|>\n<|im_start|>assistant\n<think>\n\n</think>\n\nThe current temperature in San Francisco is 26.1°C.<|im_end|>\n```\n\nIn **thinking mode** (`enable_thinking=True`, the default) the generation prompt instead ends with a bare `<|im_start|>assistant\\n` and the model itself produces the `<think>…real reasoning…</think>` block before the `<tool_call>`. (When re-rendering stored history, the template keeps the `<think>` block only for the last assistant message or messages that carry `reasoning_content`, and strips reasoning from earlier turns — see Parsing notes.)\n\n## OpenAI-compatible API mapping\n\nWith `--enable-auto-tool-choice --tool-call-parser hermes`, vLLM converts the raw stream into a standard Chat Completions response:\n\n- `finish_reason`: `\"tool_calls\"` when the turn ended on tool calls (otherwise `\"stop\"`).\n- `message.role`: `\"assistant\"`; `message.content`: `null` for a pure tool-call turn (any pre-call prose becomes `content`).\n- `message.tool_calls[]`: one entry per `<tool_call>` block, each:\n - `id`: server-generated, e.g. `\"chatcmpl-tool-924d705adb044ff88e0ef3afdd155f15\"` (the model emits no ID).\n - `type`: `\"function\"`.\n - `function.name`: the call's `name`.\n - `function.arguments`: a **JSON string** at the API boundary, e.g. `'{\"location\": \"San Francisco, CA, USA\"}'`. The wire format is a nested object, but the server re-serializes it to a string here (`json.loads(...)` it before use), matching OpenAI and Qwen-Agent.\n- With thinking + `--reasoning-parser deepseek_r1`, the `<think>…</think>` content is split out into `message.reasoning_content` and removed from `content`.\n- Feeding results back: append `{\"role\": \"tool\", \"content\": <result>, \"tool_call_id\": <id-from-the-call>}` for each result. `tool_call_id` links a result to its call (Qwen3's template ignores the id when rendering — ordering is what reaches the model — but the API still requires it).\n\nExample assistant message returned for the two-call query:\n\n```text\nfinish_reason='tool_calls'\nmessage.content = None\nmessage.tool_calls = [\n {id:'chatcmpl-tool-924d…', type:'function', function:{name:'get_current_temperature', arguments:'{\"location\": \"San Francisco, CA, USA\"}'}},\n {id:'chatcmpl-tool-7e30…', type:'function', function:{name:'get_temperature_date', arguments:'{\"location\": \"San Francisco, CA, USA\", \"date\": \"2024-10-01\"}'}},\n]\n```\n\n## Parsing notes & gotchas\n\n- **Arguments object vs string:** on the wire `arguments` is a nested JSON object; the OpenAI layer hands it back as a JSON string. Code that reads the raw stream must parse an object; code that reads the API must `json.loads` the string. Do not double-encode.\n- **`<tools>` is not a token.** Only count on `<|im_start|>`/`<|im_end|>` (and the `*tool_call*`/`*tool_response*`/`*think*` single tokens) being atomic. `<tools>`/`</tools>` are plain text.\n- **Regex/streaming parse:** the vLLM `hermes` parser (`vllm/tool_parsers/hermes_tool_parser.py`, `Hermes2ProToolParser`) keys on the literal `<tool_call>` / `</tool_call>` substrings and JSON-decodes the body, supporting multiple blocks per turn. In streaming it buffers from `<tool_call>` until it can incrementally parse `name` then `arguments`; partial argument JSON is emitted as argument deltas. Text before the first `<tool_call>` is streamed as ordinary content.\n- **Thinking toggle:** `enable_thinking=False` (passed via `chat_template_kwargs={\"enable_thinking\": False}` over the OpenAI API, or `tokenizer.apply_chat_template(..., enable_thinking=False)`) injects an empty `<think>\\n\\n</think>\\n\\n` into the generation prompt, hard-suppressing reasoning. Soft switches `/think` and `/no_think` in a user/system message flip it per-turn when thinking is enabled. Greedy decoding is discouraged for Qwen3 (repetition risk).\n- **History rerender asymmetry:** when `apply_chat_template` re-renders a stored conversation, it emits the `<think>` block only for the final assistant message or messages carrying `reasoning_content`; reasoning from earlier turns is dropped. So a stored intermediate tool-call assistant turn shows no `<think>` block, while the live generation step that produced it was prefixed with one (in non-thinking mode). Reasoning is preserved only within the current multi-step tool sequence (after the last real user query).\n- **Reasoning models + stopword templates:** Qwen warns against ReAct-style stopword tool templates for Qwen3, since reasoning text may contain the stopwords and corrupt parsing — use this native Hermes template instead.\n- **Robustness:** the format is prompt/template-driven, so malformed output is possible (truncated JSON, missing `</tool_call>`, prose mixed into a call, an array serialized as a string). Production parsers should tolerate and, on failure, fall back to treating the text as content. Named / `required` tool_choice routes through vLLM's structured-outputs backend for guaranteed-parseable arguments.\n- **Version/scope:** this `hermes` template covers `Qwen3-*`, `Qwen2.5-*`, and `QwQ-32B`. It does **not** cover `Qwen3-Coder`, which uses a different XML scheme parsed by vLLM's `qwen3_xml` parser — a separate convention.\n\n## Sources\n\n- Qwen function-calling guide: https://qwen.readthedocs.io/en/latest/framework/function_call.html\n- Qwen3-8B chat template + token IDs (`tokenizer_config.json`, `chat_template` + `added_tokens_decoder`): https://huggingface.co/Qwen/Qwen3-8B/resolve/main/tokenizer_config.json (verified via HF resolve-cache commit `b968826d9c46dd6066d109eabc6255188de91218`)\n- Qwen3-8B model card (thinking modes, `enable_thinking`, `</think>`=151668): https://huggingface.co/Qwen/Qwen3-8B\n- NousResearch Hermes-Function-Calling (origin of the convention): https://github.com/NousResearch/Hermes-Function-Calling\n- vLLM tool-calling docs (`hermes` parser, Qwen models, auto tool choice): https://docs.vllm.ai/en/latest/features/tool_calling/\n",
|
|
86
86
|
"tools/ask.md": "# ask\n\n> Prompts the interactive user for one or more option-picker or free-form answers.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/ask.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/ask.md`\n- Key collaborators:\n - `packages/coding-agent/src/config/settings-schema.ts` — `ask.timeout` / `ask.notify` defaults\n - `packages/coding-agent/src/modes/theme/theme.ts` — checkbox and tree glyphs for TUI rendering\n - `packages/coding-agent/src/tui.ts` — status-line rendering\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `questions` | `Question[]` | Yes | One or more questions. Empty arrays are rejected by schema and also guarded at runtime. |\n\n### `Question`\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `id` | `string` | Yes | Stable identifier used in multi-question results. |\n| `question` | `string` | Yes | Prompt text shown to the user. |\n| `options` | `{ label: string; description?: string }[]` | Yes | Option labels for the picker, each with optional explanatory `description` text shown below the label. The schema does not require a minimum length; the UI always appends `Other (type your own)`, and callers must not include it. |\n| `multi` | `boolean` | No | Enables multi-select mode. Default: `false`. |\n| `recommended` | `number` | No | Zero-based recommended option index. In single-select mode the label gets ` (Recommended)` appended in the UI. |\n\n## Outputs\n- Single-shot result.\n- `content[0].text` is plain text:\n - single question: `User selected: ...` and/or `User provided custom input: ...`\n - multiple questions: `User answers:` followed by one line per `id`\n- `details`:\n - single question: `{ question, options, multi, selectedOptions, customInput?, timedOut? }`\n - multiple questions: `{ results: QuestionResult[] }`, where each item includes `id`, `question`, `options`, `multi`, `selectedOptions`, and optional `customInput` and `timedOut`\n- Cancellation and headless cases throw instead of returning a structured success result.\n\n## Flow\n1. `AskTool.createIf()` only registers the tool when `session.hasUI` is true; headless sessions never get it.\n2. `execute()` requires `context.ui`; if missing it aborts the context and throws `ToolAbortError(\"Ask tool requires interactive mode\")`.\n3. It reads `ask.timeout` from settings, converts seconds to milliseconds (`0` disables timeout), and disables timeout entirely while plan mode is enabled (`packages/coding-agent/src/tools/ask.ts`).\n4. If `ask.notify` is not `off`, it sends a terminal notification: `Waiting for input`.\n5. For each question, `askSingleQuestion()` drives either:\n - single-select list + optional editor for `Other`\n - multi-select checkbox loop + `Done selecting` sentinel + optional editor for `Other`\n6. In multi-question mode, left/right arrow handlers enable back/forward navigation between questions and preserve prior selections.\n7. If a timeout fires before any selection/custom input, the tool auto-selects the recommended option, or the first option when no valid `recommended` index exists; the result text gets an ` (auto-selected after timeout)` suffix and `details.timedOut` is set.\n8. If the user cancels without timeout, `execute()` aborts the tool context and throws `ToolAbortError(\"Ask tool was cancelled by the user\")`.\n9. On success it formats human-readable text plus structured `details`; the TUI renderer uses `details` for rich display.\n\n## Modes / Variants\n- Single question: returns flattened `details` fields for one question.\n- Multiple questions: returns `details.results[]` and allows back/forward navigation across questions.\n- Single-select: one option or custom input.\n- Multi-select: toggled checkbox list, `Done selecting` sentinel only when forward navigation is not active.\n\n## Side Effects\n- User-visible prompts / interactive UI\n - Opens a selection dialog via `context.ui.select(...)`.\n - Opens a text editor dialog via `context.ui.editor(...)` for `Other`.\n - Sends a terminal notification unless `ask.notify=off`.\n- Session state\n - Reads plan-mode state to disable timeouts.\n - Calls `context.abort()` on headless use or user cancellation.\n- Background work / cancellation\n - Wraps UI waits in `untilAborted(...)` so abort signals interrupt pending dialogs.\n\n## Limits & Caps\n- `questions` must contain at least 1 item (`askSchema` in `packages/coding-agent/src/tools/ask.ts`).\n- `ask.timeout` default is `0` seconds, which disables timeout (`packages/coding-agent/src/config/settings-schema.ts`). Configured non-zero values are seconds.\n- Prompt guidance says provide 2-5 options, but code only requires the `options` array field and does not enforce a minimum or maximum length (`packages/coding-agent/src/prompts/tools/ask.md`).\n- Timeout only applies to the option picker; once the user chooses `Other`, the editor has no timeout (`promptForCustomInput()` in `packages/coding-agent/src/tools/ask.ts`).\n- `AskTool.concurrency = \"exclusive\"`: the tool runs alone in its tool batch because the selector/editor UI surface is shared and concurrent `ask` calls would clobber each other.\n\n## Errors\n- Missing interactive UI: throws `ToolAbortError(\"Ask tool requires interactive mode\")`.\n- User cancels picker/editor without timeout: throws `ToolAbortError(\"Ask tool was cancelled by the user\")`.\n- Abort signal during input: converted to `ToolAbortError(\"Ask input was cancelled\")`.\n- Empty `questions` at runtime returns a text error payload instead of throwing: `Error: questions must not be empty`.\n\n## Notes\n- `recommended` is only a UI hint; invalid indexes are ignored.\n- In single-select mode the returned `selectedOptions` value strips the appended ` (Recommended)` suffix.\n- Multi-select results preserve selection order by `Set` insertion order, not original option order after arbitrary toggles.\n- Option labels and prompt text are returned verbatim in `details`; the tool does not interpret them beyond UI affordances like `Other` and ` (Recommended)`.\n",
|
|
87
87
|
"tools/ast-edit.md": "# ast_edit\n\n> Preview and apply structural rewrites over source files via native ast-grep.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/ast-edit.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/ast-edit.md`\n- Key collaborators:\n - `crates/pi-natives/src/ast.rs` — native rewrite planning and file mutation\n - `crates/pi-ast/src/language/mod.rs` — language aliases and extension inference used by the native wrapper.\n - `packages/coding-agent/src/tools/path-utils.ts` — path/glob parsing and multi-path resolution\n - `packages/coding-agent/src/tools/resolve.ts` — preview/apply queueing\n - `packages/coding-agent/src/tools/render-utils.ts` — parse-error dedupe and display caps\n - `packages/coding-agent/src/utils/file-display-mode.ts` — hashline vs line-number diff references\n - `packages/hashline/src/format.ts` — stable hashline header formatting for preview anchors\n - `packages/natives/native/index.d.ts` — JS-visible native binding contract\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `ops` | `{ pat: string; out: string }[]` | Yes | One or more rewrite rules. `pat` must be non-empty. Duplicate `pat` values fail before native execution. Empty `out` deletes the matched node. |\n| `paths` | `string[]` | Yes | One or more files, directories, globs, or internal URLs with backing files. Empty entries are rejected. Globs are forbidden for internal URLs. |\n\nShared AST pattern grammar and language catalog: see [`ast_grep`](./ast-grep.md#inputs).\n\n- `ast_edit` uses the same `$NAME`, `$_`, `$$$NAME`, and `$$$` metavariable semantics.\n- The tool prompt adds rewrite-specific constraints:\n - metavariable names must be uppercase and must stand for whole AST nodes,\n - captures from `pat` are substituted into `out`,\n - each rewrite is a 1:1 structural substitution; one capture cannot expand into multiple sibling nodes unless the grammar itself permits that expansion at that position.\n\n## Outputs\n- Single-shot preview result from `ast_edit` itself.\n- Model-facing `content` is one text block showing proposed edits, grouped by file for directory/multi-file runs.\n - Each change renders as two lines. Hashline mode uses `-LINE:before` / `+LINE:after` under a `[PATH#TAG]` header; plain mode uses `-LINE:COLUMN before` / `+LINE:COLUMN after`.\n - Only the first line of each `before`/`after` snippet is shown, truncated to 120 characters in the wrapper.\n - `Limit reached; narrow paths.` and formatted parse issues are appended when applicable.\n- If no rewrites match, text is `No replacements made` plus formatted parse issues when present.\n- `details` includes aggregate preview metadata:\n - `totalReplacements`, `filesTouched`, `filesSearched`, `applied`, `limitReached`\n - optional `parseErrors`, `scopePath`, `files`, `fileReplacements`, `displayContent`, `meta`\n- The tool always previews first (`applied: false` in the direct result). Actual file writes happen only later through `resolve(action: \"apply\", ...)`.\n- When preview produced replacements, `ast_edit` also queues a pending `resolve` action. Successful apply returns a separate `resolve` result, not another `ast_edit` result.\n\n## Flow\n1. `AstEditTool.execute()` validates each op in `packages/coding-agent/src/tools/ast-edit.ts`:\n - empty `pat` fails,\n - at least one op is required,\n - duplicate `pat` values fail,\n - ops are converted to a `Record<pattern, replacement>`.\n2. The wrapper reads `PI_MAX_AST_FILES` via `$envpos(..., 1000)` and uses that as the native `maxFiles` cap for both preview and apply.\n3. Path normalization, internal URL handling, missing-path partitioning, and multi-path resolution follow the same `path-utils.ts` flow as `ast_grep`.\n4. The wrapper stats the resolved base path to decide whether to render grouped directory output.\n5. `runAstEditOnce(...)` always runs native `astEdit(...)` with `dryRun: true` and `failOnParseError: false` on the first pass.\n6. Native `ast_edit` in `crates/pi-natives/src/ast.rs`:\n - normalizes the rewrite map and sorts rules by pattern string,\n - resolves strictness (`smart` by default),\n - collects candidate files from a file or gitignore-aware directory scan,\n - infers a single language for the whole call unless `lang` was supplied,\n - compiles every rewrite pattern for that language,\n - parses each file, skips files with syntax-error trees, collects `replace_by(...)` edits for every match, enforces replacement and file caps, and returns textual before/after slices plus source ranges.\n7. The TS wrapper deduplicates parse errors, groups changes by file, and renders preview diff lines.\n8. If preview found replacements and `applied` is false, `queueResolveHandler(...)` registers a forced `resolve` action and injects a `resolve-reminder` steering message.\n9. On `resolve(action: \"apply\")`, the queued callback reruns the same rewrite set with `dryRun: false`, recomputes counts, and returns an error result if the live result no longer matches the preview (`stalePreview`). The current implementation compares replacement totals and per-file counts after the rerun; if the new run has already written different counts, the result is marked error.\n10. On a non-stale apply, the callback returns `Applied N replacements in M files.` (in hashline mode followed by fresh `[path#tag]` snapshot headers re-recorded from the post-apply content); on discard, `resolve` returns a discard message without mutating files.\n\n## Modes / Variants\n- Single file: preview or apply against one file.\n- Directory + optional glob: native scan walks the directory, then filters by compiled glob.\n- Multiple explicit paths/globs: wrapper unions them into one synthetic scope or runs per-target native calls when paths only meet at root.\n- Internal URL inputs: only supported when the router resolves them to a backing file path.\n- Preview mode: always the direct `ast_edit` tool result.\n- Apply mode: only reachable through the queued `resolve` callback after a preview.\n- Hashline output mode vs plain line/column mode: controlled by `resolveFileDisplayMode()`.\n\n## Side Effects\n- Filesystem\n - Preview reads files and scans directories.\n - Apply rewrites files in place with `std::fs::write(...)`, but only when the computed output differs from the original source.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Queues a one-shot forced `resolve` tool choice through `queueResolveHandler(...)`.\n - Adds a `resolve-reminder` steering message.\n- User-visible prompts / interactive UI\n - Direct `ast_edit` results are previews.\n - Follow-up apply/discard is exposed through the hidden `resolve` tool.\n- Background work / cancellation\n - Native preview/apply work runs on a blocking worker via `task::blocking(...)`.\n - Cancellation and optional native timeout are cooperative through `CancelToken::heartbeat()`.\n\n## Limits & Caps\n- File cap exposed by the wrapper: `PI_MAX_AST_FILES`, default `1000`, in `packages/coding-agent/src/tools/ast-edit.ts`.\n- Native `maxFiles` and `maxReplacements` are both clamped to at least `1` when provided in `crates/pi-natives/src/ast.rs`.\n- The wrapper never sets `maxReplacements`; native behavior therefore defaults to effectively unbounded replacements for a run.\n- Parse issues are rendered with at most `PARSE_ERRORS_LIMIT = 20` lines in `packages/coding-agent/src/tools/render-utils.ts`; `details.parseErrors` is deduplicated but not capped.\n- Directory scans use `include_hidden: true`, `use_gitignore: true`, and skip `node_modules` unless the glob text explicitly mentions `node_modules` in `crates/pi-natives/src/ast.rs`.\n- No separate glob-expansion count cap exists. Candidate count is whatever the resolved path/glob expands to after gitignore filtering, then native `maxFiles` stops mutations after the configured number of touched files.\n- Preview text truncates each rendered `before` and `after` first line to 120 characters in `packages/coding-agent/src/tools/ast-edit.ts`.\n\n## Errors\n- TS wrapper throws `ToolError` for empty patterns, duplicate rewrite patterns, empty path entries, unsupported internal-URL globs, internal URLs without `sourcePath`, and missing paths.\n- Native code returns hard errors for:\n - inability to infer one language across all candidates when `lang` is absent,\n - unsupported explicit `lang`,\n - bad glob compilation or unreadable search roots,\n - overlapping computed edits (`Overlapping replacements detected; refine pattern to avoid ambiguous edits`),\n - out-of-bounds edit ranges or non-UTF-8 replacement text,\n - write failures during apply,\n - cancellation or timeout.\n- With `failOnParseError: false` (the wrapper always uses this), pattern compile failures and file parse failures become `parseErrors` instead of aborting the whole run.\n- If every rewrite pattern fails to compile, native `ast_edit` returns a successful zero-replacement result with `parseErrors` populated.\n- Files containing tree-sitter error nodes are skipped for rewriting; they do not get partial edits.\n- Apply can fail after a successful preview if the preview becomes stale. The resolve callback compares replacement totals and per-file counts and returns an error result rather than silently reporting success for a mismatched preview.\n\n## Notes\n- `ast_edit` does not expose the native `lang`, `strictness`, `selector`, `maxReplacements`, `failOnParseError`, or `timeoutMs` fields to the model. The runtime fixes the call shape to a preview-first, smart-strictness, best-effort parse mode.\n- Because the wrapper does not expose `lang`, mixed-language rewrites only succeed when every candidate infers to the same canonical language. This is stricter than `ast_grep`.\n- Idempotency is not enforced syntactically. A rewrite like `foo($A) -> foo($A)` previews zero changes because output equals input; a rewrite that keeps matching its own output may still produce replacements on repeated calls.\n- Rewrites are accumulated per file, then applied from the end of the file backward after an overlap check. Independent matches can coexist; overlapping matches abort the run.\n- Native rewrite rule order is by pattern-string sort, not by the original `ops` array order, because `normalize_rewrite_map(...)` sorts the `(pattern, rewrite)` pairs.\n- Preview/apply parity is validated by totals and per-file counts after the apply rerun, not by a byte-for-byte diff of every replacement payload.",
|
|
88
|
-
"tools/ast-grep.md": "# ast_grep\n\n> Structural code search over supported source files via native ast-grep.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/ast-grep.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/ast-grep.md`\n- Key collaborators:\n - `crates/pi-natives/src/ast.rs` — native scan, parse, match engine\n - `crates/pi-ast/src/language/mod.rs` — language aliases and extension inference used by the native wrapper.\n - `packages/coding-agent/src/tools/path-utils.ts` — path/glob parsing and multi-path resolution\n - `packages/coding-agent/src/tools/render-utils.ts` — parse-error dedupe and display caps\n - `packages/coding-agent/src/tools/match-line-format.ts` — hashline match rendering\n - `packages/coding-agent/src/utils/file-display-mode.ts` — hashline vs line-number output mode\n - `packages/natives/native/index.d.ts` — JS-visible native binding contract\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `pat` | `string` | Yes | Single AST pattern. The wrapper trims it and rejects empty strings. |\n| `paths` | `string[]` | Yes | One or more files, directories, globs, or internal URLs with backing files. Empty entries are rejected. Globs are forbidden for internal URLs. |\n| `skip` | `number` | No | Match offset. Defaults to `0`, then `Math.floor(...)`; negatives and non-finite values fail. |\n\nPattern grammar and language support exposed to the model:\n- `$NAME` — capture one AST node.\n- `$_` — match one AST node without binding.\n- `$$$NAME` — capture zero or more AST nodes; ast-grep stops lazily at the next satisfiable node.\n- `$$$` — match zero or more AST nodes without binding.\n- Metavariable names must be uppercase and must stand for whole AST nodes, not partial tokens or string fragments.\n- Reusing the same metavariable requires identical code at each occurrence.\n- Patterns must parse as one valid AST node for the inferred target language.\n- Supported canonical languages come from `SupportLang::all_langs()` in `crates/pi-ast/src/language/mod.rs`: `astro`, `bash`, `c`, `cmake`, `cpp`, `csharp`, `dart`, `clojure`, `css`, `diff`, `dockerfile`, `elixir`, `erlang`, `go`, `graphql`, `haskell`, `hcl`, `html`, `ini`, `java`, `javascript`, `json`, `just`, `julia`, `kotlin`, `lua`, `make`, `markdown`, `nix`, `objc`, `ocaml`, `odin`, `perl`, `php`, `powershell`, `protobuf`, `python`, `r`, `regex`, `ruby`, `rust`, `scala`, `solidity`, `sql`, `starlark`, `svelte`, `swift`, `toml`, `tlaplus`, `tsx`, `typescript`, `verilog`, `vue`, `xml`, `yaml`, `zig`.\n\n## Outputs\n- Single-shot tool result.\n- Model-facing `content` is one text block:\n - grouped by file for directory/multi-file searches,\n - match lines rendered under `[PATH#HASH]` as `*LINE:text` in hashline mode or `*LINE|text` otherwise,\n - continuation lines for multi-line matches rendered with a leading space,\n - optional `meta: NAME=value` lines when ast-grep captured metavariables.\n- If no matches are found, text is `No matches found` or `No matches found. Parse issues mean the query may be mis-scoped; narrow paths before concluding absence.` plus formatted parse issues.\n- If the wrapper truncates visible results, the text ends with `Result limit reached; narrow paths or increase limit.`\n- `details` includes counts and metadata, not full match payloads:\n - `matchCount`, `fileCount`, `filesSearched`, `limitReached`\n - optional `parseErrors`, `scopePath`, `files`, `fileMatches`, `displayContent`, `meta`\n- Native ranges (`byteStart`, `byteEnd`, `startLine`, `startColumn`, `endLine`, `endColumn`) exist only inside the native result; the wrapper does not emit them directly to the model.\n\n## Flow\n1. `AstGrepTool.execute()` validates `pat`, normalizes `skip`, and normalizes each `paths` entry in `packages/coding-agent/src/tools/ast-grep.ts`.\n2. Internal URLs are resolved through `session.internalRouter`; entries without `sourcePath` fail, and internal-URL globs fail early.\n3. For multiple path inputs, `partitionExistingPaths()` drops missing bases only when at least one surviving base remains; if all bases are missing the call fails.\n4. `parseSearchPath()` splits a single path into `basePath` plus optional `glob`. `resolveExplicitSearchPaths()` collapses multiple inputs into a common base plus a brace-union glob, or separate `targets` when the only common base is a filesystem root.\n5. The wrapper stats the resolved base path to decide whether output should be grouped as a directory result.\n6. Execution dispatches to either:\n - one native `astGrep(...)` call for a single resolved base, or\n - `runMultiTargetAstGrep(...)`, which calls the native binding once per target, rebases paths back to the common root, sorts globally, then applies `skip` and the wrapper limit.\n7. Native `ast_grep` in `crates/pi-natives/src/ast.rs`:\n - normalizes and deduplicates patterns,\n - resolves a `MatchStrictness` (`smart` by default),\n - collects candidate files from a file or gitignore-aware directory scan,\n - infers language per candidate from extension unless `lang` was provided,\n - compiles the pattern separately for each language present,\n - reads each file, reports syntax-error trees as parse issues, runs `find_all`, and optionally captures metavariable bindings.\n8. Native results are sorted by path and source position, then paged by `offset`/`limit`.\n9. The TS wrapper normalizes parse-error strings, deduplicates them, groups matches by formatted path, renders anchor lines, appends limit/parse notices, and returns `toolResult(...).text(...).done()`.\n\n## Modes / Variants\n- Single file: native path is the file; output is a flat list of rendered match lines.\n- Directory + optional glob: native scan walks the directory, then filters by compiled glob.\n- Multiple explicit paths/globs: wrapper unions them into one synthetic scope or runs per-target native calls when paths only meet at root.\n- Internal URL inputs: only supported when the router can resolve them to a backing file path.\n- Hashline output mode vs plain line-number mode: controlled by `resolveFileDisplayMode()`; hashline mode requires the edit tool and hashline edit mode, and per-file anchors additionally require a successful whole-file snapshot (`recordFileSnapshot()`) — over-cap or unreadable files fall back to plain output.\n\n## Side Effects\n- Filesystem\n - Stats input paths in the TS wrapper.\n - Native code reads matched files and scans directories through `fs_cache`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - None beyond normal tool transcript/result metadata.\n- Background work / cancellation\n - Native work runs on a blocking worker via `task::blocking(...)`.\n - Cancellation and optional native timeout are cooperative through `CancelToken::heartbeat()`.\n\n## Limits & Caps\n- Wrapper-visible result cap: `DEFAULT_AST_LIMIT = 50` in `packages/coding-agent/src/tools/ast-grep.ts`.\n - Single-target calls rely on the native default limit of 50 in `crates/pi-natives/src/ast.rs`.\n - Multi-target calls fetch `skip + 50 + 1` matches per target, then re-page after global sort.\n- Native `limit` is clamped to at least `1`; omitted `offset` defaults to `0` in `crates/pi-natives/src/ast.rs`.\n- Parse issues are rendered with at most `PARSE_ERRORS_LIMIT = 20` lines in `packages/coding-agent/src/tools/render-utils.ts`; `details.parseErrors` itself is only deduplicated, not capped.\n- Directory scans use `include_hidden: true`, `use_gitignore: true`, and skip `node_modules` unless the glob text explicitly mentions `node_modules` in `crates/pi-natives/src/ast.rs`.\n- No hard file-count cap is applied by the wrapper or native `ast_grep`; candidate count is whatever the resolved path/glob expands to after gitignore filtering.\n- Multi-path union deduplicates identical path inputs before resolution in `resolveExplicitSearchPaths()`.\n\n## Errors\n- TS wrapper throws `ToolError` for empty patterns, invalid `skip`, empty path entries, unsupported internal-URL globs, internal URLs without `sourcePath`, and missing paths.\n- Native code returns hard errors for:\n - unsupported explicit `lang`,\n - inability to infer language for a candidate when `lang` is not supplied,\n - invalid AST pattern compilation for every relevant language,\n - unreadable search roots or bad glob compilation,\n - cancellation (`Aborted: Signal`) or timeout (`Aborted: Timeout`).\n- File-level parse failures and many per-language pattern compile failures are non-fatal: they are accumulated in `parseErrors` and surfaced alongside successful matches.\n- `no matches` is not an error, even when parse issues were recorded.\n\n## Notes\n- `pat` is always wrapped into a one-element `patterns` array by the TS tool; the model cannot send multiple patterns through `ast_grep` even though the native binding supports it.\n- `ast_grep` can search mixed-language trees because native compilation happens per discovered language, but the prompt still tells the model to keep calls single-language when possible to reduce parse noise.\n- Pattern compilation is per language present in the candidate set. One pattern can succeed for some languages and generate per-file parse errors for others in the same run.\n- A file with tree-sitter error nodes still gets searched; the syntax warning is additive, not a skip condition.\n- For glob semantics, `*.ts` matches only direct children while `**/*.ts` recurses; this is covered by native tests in `crates/pi-natives/src/ast.rs`.\n- Output anchors are intended for follow-up tools, but the exact anchor format depends on session edit mode (`hashline` vs line-number mode).",
|
|
88
|
+
"tools/ast-grep.md": "# ast_grep\n\n> Structural code search over supported source files via native ast-grep.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/ast-grep.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/ast-grep.md`\n- Key collaborators:\n - `crates/pi-natives/src/ast.rs` — native scan, parse, match engine\n - `crates/pi-ast/src/language/mod.rs` — language aliases and extension inference used by the native wrapper.\n - `packages/coding-agent/src/tools/path-utils.ts` — path/glob parsing and multi-path resolution\n - `packages/coding-agent/src/tools/render-utils.ts` — parse-error dedupe and display caps\n - `packages/coding-agent/src/tools/match-line-format.ts` — hashline match rendering\n - `packages/coding-agent/src/utils/file-display-mode.ts` — hashline vs line-number output mode\n - `packages/natives/native/index.d.ts` — JS-visible native binding contract\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `pat` | `string` | Yes | Single AST pattern. The wrapper trims it and rejects empty strings. |\n| `paths` | `string[]` | Yes | One or more files, directories, globs, or internal URLs with backing files. Empty entries are rejected. Globs are forbidden for internal URLs. |\n| `skip` | `number` | No | Match offset. Defaults to `0`, then `Math.floor(...)`; negatives and non-finite values fail. |\n\nPattern grammar and language support exposed to the model:\n- `$NAME` — capture one AST node.\n- `$_` — match one AST node without binding.\n- `$$$NAME` — capture zero or more AST nodes; ast-grep stops lazily at the next satisfiable node.\n- `$$$` — match zero or more AST nodes without binding.\n- Metavariable names must be uppercase and must stand for whole AST nodes, not partial tokens or string fragments.\n- Reusing the same metavariable requires identical code at each occurrence.\n- Patterns must parse as one valid AST node for the inferred target language.\n- Supported canonical languages come from `SupportLang::all_langs()` in `crates/pi-ast/src/language/mod.rs`: `astro`, `bash`, `c`, `cmake`, `cpp`, `csharp`, `dart`, `clojure`, `css`, `diff`, `dockerfile`, `emacs-lisp`, `elixir`, `erlang`, `go`, `graphql`, `haskell`, `hcl`, `html`, `ini`, `java`, `javascript`, `json`, `just`, `julia`, `kotlin`, `lua`, `make`, `markdown`, `nix`, `objc`, `ocaml`, `odin`, `perl`, `php`, `powershell`, `protobuf`, `python`, `r`, `regex`, `ruby`, `rust`, `scala`, `solidity`, `sql`, `starlark`, `svelte`, `swift`, `toml`, `tlaplus`, `tsx`, `typescript`, `verilog`, `vue`, `xml`, `yaml`, `zig`.\n\n## Outputs\n- Single-shot tool result.\n- Model-facing `content` is one text block:\n - grouped by file for directory/multi-file searches,\n - match lines rendered under `[PATH#HASH]` as `*LINE:text` in hashline mode or `*LINE|text` otherwise,\n - continuation lines for multi-line matches rendered with a leading space,\n - optional `meta: NAME=value` lines when ast-grep captured metavariables.\n- If no matches are found, text is `No matches found` or `No matches found. Parse issues mean the query may be mis-scoped; narrow paths before concluding absence.` plus formatted parse issues.\n- If the wrapper truncates visible results, the text ends with `Result limit reached; narrow paths or increase limit.`\n- `details` includes counts and metadata, not full match payloads:\n - `matchCount`, `fileCount`, `filesSearched`, `limitReached`\n - optional `parseErrors`, `scopePath`, `files`, `fileMatches`, `displayContent`, `meta`\n- Native ranges (`byteStart`, `byteEnd`, `startLine`, `startColumn`, `endLine`, `endColumn`) exist only inside the native result; the wrapper does not emit them directly to the model.\n\n## Flow\n1. `AstGrepTool.execute()` validates `pat`, normalizes `skip`, and normalizes each `paths` entry in `packages/coding-agent/src/tools/ast-grep.ts`.\n2. Internal URLs are resolved through `session.internalRouter`; entries without `sourcePath` fail, and internal-URL globs fail early.\n3. For multiple path inputs, `partitionExistingPaths()` drops missing bases only when at least one surviving base remains; if all bases are missing the call fails.\n4. `parseSearchPath()` splits a single path into `basePath` plus optional `glob`. `resolveExplicitSearchPaths()` collapses multiple inputs into a common base plus a brace-union glob, or separate `targets` when the only common base is a filesystem root.\n5. The wrapper stats the resolved base path to decide whether output should be grouped as a directory result.\n6. Execution dispatches to either:\n - one native `astGrep(...)` call for a single resolved base, or\n - `runMultiTargetAstGrep(...)`, which calls the native binding once per target, rebases paths back to the common root, sorts globally, then applies `skip` and the wrapper limit.\n7. Native `ast_grep` in `crates/pi-natives/src/ast.rs`:\n - normalizes and deduplicates patterns,\n - resolves a `MatchStrictness` (`smart` by default),\n - collects candidate files from a file or gitignore-aware directory scan,\n - infers language per candidate from extension unless `lang` was provided,\n - compiles the pattern separately for each language present,\n - reads each file, reports syntax-error trees as parse issues, runs `find_all`, and optionally captures metavariable bindings.\n8. Native results are sorted by path and source position, then paged by `offset`/`limit`.\n9. The TS wrapper normalizes parse-error strings, deduplicates them, groups matches by formatted path, renders anchor lines, appends limit/parse notices, and returns `toolResult(...).text(...).done()`.\n\n## Modes / Variants\n- Single file: native path is the file; output is a flat list of rendered match lines.\n- Directory + optional glob: native scan walks the directory, then filters by compiled glob.\n- Multiple explicit paths/globs: wrapper unions them into one synthetic scope or runs per-target native calls when paths only meet at root.\n- Internal URL inputs: only supported when the router can resolve them to a backing file path.\n- Hashline output mode vs plain line-number mode: controlled by `resolveFileDisplayMode()`; hashline mode requires the edit tool and hashline edit mode, and per-file anchors additionally require a successful whole-file snapshot (`recordFileSnapshot()`) — over-cap or unreadable files fall back to plain output.\n\n## Side Effects\n- Filesystem\n - Stats input paths in the TS wrapper.\n - Native code reads matched files and scans directories through `fs_cache`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - None beyond normal tool transcript/result metadata.\n- Background work / cancellation\n - Native work runs on a blocking worker via `task::blocking(...)`.\n - Cancellation and optional native timeout are cooperative through `CancelToken::heartbeat()`.\n\n## Limits & Caps\n- Wrapper-visible result cap: `DEFAULT_AST_LIMIT = 50` in `packages/coding-agent/src/tools/ast-grep.ts`.\n - Single-target calls rely on the native default limit of 50 in `crates/pi-natives/src/ast.rs`.\n - Multi-target calls fetch `skip + 50 + 1` matches per target, then re-page after global sort.\n- Native `limit` is clamped to at least `1`; omitted `offset` defaults to `0` in `crates/pi-natives/src/ast.rs`.\n- Parse issues are rendered with at most `PARSE_ERRORS_LIMIT = 20` lines in `packages/coding-agent/src/tools/render-utils.ts`; `details.parseErrors` itself is only deduplicated, not capped.\n- Directory scans use `include_hidden: true`, `use_gitignore: true`, and skip `node_modules` unless the glob text explicitly mentions `node_modules` in `crates/pi-natives/src/ast.rs`.\n- No hard file-count cap is applied by the wrapper or native `ast_grep`; candidate count is whatever the resolved path/glob expands to after gitignore filtering.\n- Multi-path union deduplicates identical path inputs before resolution in `resolveExplicitSearchPaths()`.\n\n## Errors\n- TS wrapper throws `ToolError` for empty patterns, invalid `skip`, empty path entries, unsupported internal-URL globs, internal URLs without `sourcePath`, and missing paths.\n- Native code returns hard errors for:\n - unsupported explicit `lang`,\n - inability to infer language for a candidate when `lang` is not supplied,\n - invalid AST pattern compilation for every relevant language,\n - unreadable search roots or bad glob compilation,\n - cancellation (`Aborted: Signal`) or timeout (`Aborted: Timeout`).\n- File-level parse failures and many per-language pattern compile failures are non-fatal: they are accumulated in `parseErrors` and surfaced alongside successful matches.\n- `no matches` is not an error, even when parse issues were recorded.\n\n## Notes\n- `pat` is always wrapped into a one-element `patterns` array by the TS tool; the model cannot send multiple patterns through `ast_grep` even though the native binding supports it.\n- `ast_grep` can search mixed-language trees because native compilation happens per discovered language, but the prompt still tells the model to keep calls single-language when possible to reduce parse noise.\n- Pattern compilation is per language present in the candidate set. One pattern can succeed for some languages and generate per-file parse errors for others in the same run.\n- A file with tree-sitter error nodes still gets searched; the syntax warning is additive, not a skip condition.\n- For glob semantics, `*.ts` matches only direct children while `**/*.ts` recurses; this is covered by native tests in `crates/pi-natives/src/ast.rs`.\n- Output anchors are intended for follow-up tools, but the exact anchor format depends on session edit mode (`hashline` vs line-number mode).",
|
|
89
89
|
"tools/bash.md": "# bash\n\n> Execute a shell command in the session workspace, with optional PTY or background-job handling.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/bash.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/bash.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/bash-interactive.ts` — PTY/TUI execution path.\n - `packages/coding-agent/src/tools/bash-interceptor.ts` — blocks tool-better shell patterns.\n - `packages/coding-agent/src/tools/bash-skill-urls.ts` — expands internal URLs to paths.\n - `packages/coding-agent/src/exec/bash-executor.ts` — non-PTY shell execution.\n - `packages/coding-agent/src/session/streaming-output.ts` — tail buffer, truncation, artifact spill.\n - `packages/coding-agent/src/tools/tool-timeouts.ts` — timeout clamp bounds.\n - `packages/coding-agent/src/config/settings-schema.ts` — default interceptor rules.\n - `docs/bash-tool-runtime.md` — deeper executor/runtime notes; use as the companion doc for shell-session internals.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `command` | `string` | Yes | Shell command text to execute. A leading `cd <path> && ...` is rewritten into `cwd` only when `cwd` was omitted. |\n| `env` | `Record<string, string>` | No | Extra environment variables. Keys must match `^[A-Za-z_][A-Za-z0-9_]*$` or the tool throws. Values go through internal-URL expansion and are passed as environment values, not shell text. |\n| `timeout` | `number` | No | Timeout in seconds. Default `300`; clamped to `1..3600` by `clampTimeout(\"bash\", ...)`. |\n| `cwd` | `string` | No | Working directory, resolved against `session.cwd` via `resolveToCwd`. Must exist and be a directory. |\n| `pty` | `boolean` | No | Request PTY mode. Default `false`. PTY is used only when `pty: true`, `PI_NO_PTY !== \"1\"`, and the tool context has a UI. |\n| `async` | `boolean` | No | Background execution request. Present only when `async.enabled` is true for the session. Returns immediately with a job id instead of waiting. |\n\n## Outputs\nThe tool returns a single `text` content block plus optional `details`.\n\n- Success, foreground:\n - `content[0].text`: command output, or `(no output)` when the command produced nothing.\n - `details.timeoutSeconds`: effective timeout after clamping.\n - `details.requestedTimeoutSeconds`: present when the requested timeout differed from the effective timeout.\n - `details.wallTimeMs`: elapsed wall-clock milliseconds for completed local/client-terminal runs.\n - `details.terminalId`: present when execution was routed through a client terminal bridge.\n - `details.exitCode`: present when the command completed with a non-zero exit code.\n - `details.meta.truncation`: present when output was truncated in memory; includes `artifactId` when full output spilled to an artifact.\n - non-zero exits return a tool result marked `isError` with output plus `Command exited with code <n>`; they are not thrown.\n- Success, background start (`async: true` or auto-background):\n - `content[0].text`: optional preview tail, timeout notice if any, then `Background job <id> started: <label>` with follow-up instructions.\n - `details.async`: `{ state: \"running\", jobId, type: \"bash\" }`.\n- Background progress / completion:\n - delivered through `onUpdate` / async job manager, not the initial return.\n - running updates contain tail text and `details.async.state: \"running\"` only after the job is considered backgrounded.\n - completion/failure updates carry final text and `details.async.state: \"completed\" | \"failed\"`. A non-zero exit is recorded as a failed background job.\n- Failure:\n - unfinished execution (`cancelled`, timeout, missing exit status), validation failures, and intercepted commands throw `ToolError` / `ToolAbortError`.\n\nStdout and stderr are merged before the model sees them. Definite non-zero exit codes are appended to the returned error result text as `Command exited with code <n>`.\n\n## Flow\n1. `BashTool.execute()` in `packages/coding-agent/src/tools/bash.ts` reads `command`, normalizes `env`, and defaults `timeout` to `300`. When `bash.stripTrailingHeadTail` is enabled (default), `applyBashFixups()` from `packages/coding-agent/src/tools/bash-command-fixup.ts` first strips safe trailing `| head`/`| tail` pipes and redundant trailing `2>&1` from single-line commands.\n2. If `cwd` is absent, it rewrites a leading `cd <path> && ...` into the structured `cwd` field and strips that prefix from `command`.\n3. If `async: true` is requested while `async.enabled` is off, it throws `ToolError` before any execution.\n4. If `bashInterceptor.enabled` is on, `checkBashInterception()` runs against both the original command and the `cd`-stripped command. A matching enabled rule throws before URL expansion or execution.\n5. `expandInternalUrls()` rewrites supported internal URLs inside `command`, each `env` value, and protocol-looking `cwd` values. Command replacements are shell-escaped; `env` and `cwd` replacements use raw filesystem/string values because they are not interpolated into shell text.\n6. `resolveToCwd()` resolves `cwd` against `session.cwd`; `fs.stat()` verifies that the target exists and is a directory.\n7. `clampTimeout(\"bash\", requestedTimeoutSec)` enforces `TOOL_TIMEOUTS.bash` (`default: 300`, `min: 1`, `max: 3600`). When clamped, `#buildCompletedResult()` / `#buildBackgroundStartResult()` append a notice line.\n8. Execution path splits:\n 1. `async: true` -> `#startManagedBashJob()` registers a session async job and returns immediately.\n 2. Non-PTY with `bash.autoBackground.enabled` and an async job manager -> starts a managed job, waits up to `min(thresholdMs, timeoutMs - 1000)`, and either returns the completed result or converts the run into a background job.\n 3. Non-PTY client-terminal bridge, when the session advertises terminal capability and `pty` is false -> creates a remote terminal, streams/polls current output, and releases the terminal after completion.\n 4. Otherwise runs foreground execution.\n9. Foreground non-PTY without client terminal calls `executeBash()` from `packages/coding-agent/src/exec/bash-executor.ts`.\n10. Foreground PTY calls `runInteractiveBashPty()` from `packages/coding-agent/src/tools/bash-interactive.ts`.\n11. Local non-PTY and PTY paths allocate an output artifact first when `session.allocateOutputArtifact` is available. The artifact path/id are passed into the sink so large output can spill to disk.\n12. `executeBash()` loads shell settings, optional shell snapshot, and shell minimizer settings, then runs via a persistent native `Shell` session or one-shot `executeShell()`. `docs/bash-tool-runtime.md` covers that path in detail.\n13. `runInteractiveBashPty()` creates a `PtySession`, overlays an xterm-backed console UI, forwards user key input into the PTY, captures output through `OutputSink`, and kills the PTY on dismiss/dispose.\n14. Client-terminal bridge mode calls `session.getClientBridge().createTerminal(...)`, emits `terminalId` updates, polls output until exit/timeout/abort, maps signal exits to `137`, and releases the handle in `finally`.\n15. On completion, `#buildCompletedResult()` formats `(no output)` when needed, attaches truncation metadata from the output summary, appends wall-time/timeout/exit notices, and re-checks unfinished status before returning.\n16. On timeout, missing exit status, or cancellation, the tool throws with captured output included when available.\n\n## Modes / Variants\n1. Foreground non-PTY local\n - Default path when no client terminal bridge is available.\n - Uses `executeBash()`.\n - Streams tail-only updates through `streamTailUpdates()` and `TailBuffer(DEFAULT_MAX_BYTES)`.\n2. Foreground non-PTY client terminal\n - Used when `session.getClientBridge()?.capabilities.terminal` is true, `createTerminal` exists, and `pty` is false.\n - Streams current terminal output via polling updates with `details.terminalId`.\n - Enforces the same timeout and abort behavior, then releases the terminal handle.\n3. Foreground PTY\n - Requires `pty: true`, UI context, and `PI_NO_PTY !== \"1\"`.\n - Uses `runInteractiveBashPty()` and a `PtySession` overlay.\n - Supports interactive input; `Esc` kills the session from the overlay.\n4. Explicit background job\n - Requires `async: true` and `async.enabled`.\n - Registers a job with `session.asyncJobManager` and returns `{ state: \"running\", jobId }` immediately.\n5. Auto-backgrounded non-PTY job\n - Requires `bash.autoBackground.enabled`, no PTY, and an async job manager.\n - Starts like a foreground managed job, then backgrounds it when it outlives the wait window.\n6. Intercepted command\n - No subprocess created.\n - Returns a `ToolError` pointing the model at `read`, `search`, `find`, `edit`, or `write`.\n\n## Side Effects\n- Filesystem\n - Validates `cwd` with `fs.stat()`.\n - May allocate and write artifact files for full local output (`bash`) and minimizer-preserved raw output (`bash-original`).\n - `expandInternalUrls(..., { ensureLocalParentDirs: true })` creates parent directories for `local://` paths before execution.\n- Subprocesses / native bindings / client terminal\n - Non-PTY local execution uses native shell execution via `@oh-my-pi/pi-natives` (`Shell.run()` or `executeShell()`).\n - PTY uses native `PtySession.start()`.\n - Client-terminal mode delegates process execution to the connected client terminal capability.\n- Session state\n - Reads session settings for async, auto-background, interceptor, tool availability, and shell configuration.\n - Registers jobs with `session.asyncJobManager` for explicit/auto background runs.\n - Uses `session.getSessionId()` to isolate shell reuse and async session keys.\n - Uses `session.allocateOutputArtifact()` for spill files.\n- User-visible prompts / interactive UI\n - PTY mode opens a TUI overlay titled `Console` and forwards input to the PTY.\n - Background start messages direct the agent to the `job` tool (use `list: true` for a snapshot, or pass `poll: [id]` to wait).\n- Background work / cancellation\n - Async and auto-background jobs continue after the initial tool return.\n - Cancellation aborts the native run; PTY overlay dismissal also kills the PTY.\n\n## Limits & Caps\n- Default timeout: `300s` (`TOOL_TIMEOUTS.bash.default` in `packages/coding-agent/src/tools/tool-timeouts.ts`).\n- Timeout clamp: `1..3600s` (`TOOL_TIMEOUTS.bash.min/max`).\n- Auto-background default threshold: `60_000ms` (`DEFAULT_AUTO_BACKGROUND_THRESHOLD_MS` in `packages/coding-agent/src/tools/bash.ts`), further capped to `timeoutMs - 1000` by `#resolveAutoBackgroundWaitMs()`.\n- Non-PTY executor timeout: `executeBash()` arms a host-side timer at `max(1_000, timeoutMs)` that aborts the run and quarantines the persistent shell session; the same timeout is also passed to the native run as `timeoutMs` (`packages/coding-agent/src/exec/bash-executor.ts`).\n- In-memory output tail cap: `50 * 1024` bytes (`DEFAULT_MAX_BYTES` in `packages/coding-agent/src/session/streaming-output.ts`). Once exceeded, the sink keeps only the tail window in memory.\n- Streaming callback throttle in `executeBash()`: `50ms` between `onChunk` calls when streaming is enabled.\n- TUI collapsed preview: `10` visual lines (`BASH_DEFAULT_PREVIEW_LINES`) when rendered inline in the agent UI; this is a renderer cap, not a tool output cap.\n\n## Errors\n- Input validation:\n - invalid env key -> `ToolError(\"Invalid bash env name: <key>\")`.\n - async requested while disabled -> `ToolError(\"Async bash execution is disabled...\")`.\n - missing async job manager -> `ToolError(\"Async job manager unavailable for this session.\")`.\n - missing/bad `cwd` -> `ToolError(\"Working directory does not exist: ...\")` or `ToolError(\"Working directory is not a directory: ...\")`.\n- Interceptor:\n - matched command -> `ToolError` with `Blocked: <rule.message>` and the original command.\n - invalid interceptor regexes are silently skipped by `compileRules()`.\n- Internal URL expansion:\n - unsupported scheme, unknown skill, path traversal, missing router support, or router resolution failures all throw `ToolError` from `packages/coding-agent/src/tools/bash-skill-urls.ts`.\n- Execution:\n - non-zero exit -> returned tool result marked `isError`, with `details.exitCode` and text ending in `Command exited with code <n>`.\n - missing exit code -> thrown `ToolError` with `Command failed: missing exit status`.\n - timeout -> thrown `ToolError`; PTY/client-terminal modes use `Command timed out after <n> seconds`, non-PTY executor returns cancelled output that `BashTool` converts to an error.\n - user abort -> `ToolAbortError` when the caller signal is aborted.\n- Artifact allocation / artifact save failures are swallowed in `saveBashOriginalArtifact()` and `OutputSink.#createFileSink()`; execution continues without that artifact.\n\n## Notes\n- `strict = true` is set on `BashTool`; `concurrency` is resolved per call: `pty: true` is `\"exclusive\"` (it takes over the terminal UI), everything else is `\"shared\"`, so multiple non-pty bash calls in one assistant message run in parallel. When parallel calls overlap on the same shell session key, the first owns the persistent `Shell`; the rest run in isolated one-shot shells (see `shellSessionsInUse` in `bash-executor.ts`).\n- `command` URL expansions shell-escape replacements; `env` and `cwd` expansion use `noEscape: true` because they become environment values / filesystem paths, not shell text.\n- `checkBashInterception()` blocks only when the matching rule's `tool` name is present in `ctx.toolNames`; missing tools disable their corresponding rule.\n- Default interceptor rules come from `DEFAULT_BASH_INTERCEPTOR_RULES` in `packages/coding-agent/src/config/settings-schema.ts`:\n - `cat|head|tail|less|more` -> `read`\n - `grep|rg|ripgrep|ag|ack` -> `search`\n - `find|fd|locate` with name/type/glob flags -> `find`\n - `sed -i`, `perl -i`, `awk -i inplace` -> `edit`\n - `echo|printf|cat <<` with redirection -> `write`\n- PTY mode is ignored in non-UI contexts and when `PI_NO_PTY=1`; the tool silently falls back to non-PTY execution.\n- Non-PTY runs merge `NON_INTERACTIVE_ENV` with `env`; PTY runs also prepend `NON_INTERACTIVE_ENV` before custom env values.\n- When the shell minimizer rewrites output inside `executeBash()`, the visible output is replaced with minimized text and a `[raw output: artifact://<id>]` footer may be appended if `onMinimizedSave` persisted the original text.\n- The TUI renderer parses partial JSON to recover `env` assignments early in streaming previews; that behavior is display-only.\n- For executor internals that are not tool-specific — shell session reuse keys, snapshots, prefix handling, and native timeout behavior — see `docs/bash-tool-runtime.md`.\n",
|
|
90
90
|
"tools/browser.md": "# browser\n\n> Open, reuse, close, and script Puppeteer tabs against headless Chromium or CDP-attached apps.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/browser.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/browser.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/browser/tab-supervisor.ts` — global tab registry; worker lifecycle; run/close coordination.\n - `packages/coding-agent/src/tools/browser/tab-worker.ts` — executes `run` code; implements the `tab` helper API.\n - `packages/coding-agent/src/tools/browser/tab-worker-entry.ts` — worker-thread transport bootstrap.\n - `packages/coding-agent/src/tools/browser/registry.ts` — browser-handle registry keyed by browser kind.\n - `packages/coding-agent/src/tools/browser/launch.ts` — Puppeteer loading, Chromium resolution/download, headless launch, stealth injection.\n - `packages/coding-agent/src/tools/browser/attach.ts` — CDP attach/reuse, target picking, spawned-app process handling.\n - `packages/coding-agent/src/tools/browser/tab-protocol.ts` — worker init/run/result message schema.\n - `packages/coding-agent/src/tools/browser/readable.ts` — `tab.extract()` readability extraction.\n - `packages/coding-agent/src/tools/browser/render.ts` — TUI rendering for `open`/`close` status lines and `run` JS cells.\n - `packages/coding-agent/src/tools/puppeteer/00_stealth_tampering.txt` — mask patched functions/descriptors as native.\n - `packages/coding-agent/src/tools/puppeteer/01_stealth_activity.txt` — synthesize visibility/focus/scroll activity.\n - `packages/coding-agent/src/tools/puppeteer/02_stealth_hairline.txt` — fix Modernizr hairline detection.\n - `packages/coding-agent/src/tools/puppeteer/03_stealth_botd.txt` — spoof `navigator.webdriver`, `window.chrome`, and Chrome fingerprint surfaces.\n - `packages/coding-agent/src/tools/puppeteer/04_stealth_iframe.txt` — patch iframe `contentWindow`/`srcdoc` behavior.\n - `packages/coding-agent/src/tools/puppeteer/05_stealth_webgl.txt` — spoof WebGL vendor/renderer/precision.\n - `packages/coding-agent/src/tools/puppeteer/06_stealth_screen.txt` — normalize screen/viewport/device-pixel-ratio values.\n - `packages/coding-agent/src/tools/puppeteer/07_stealth_fonts.txt` — spoof local fonts and perturb canvas text rendering.\n - `packages/coding-agent/src/tools/puppeteer/08_stealth_audio.txt` — spoof audio latency/sample-rate and perturb offline rendering.\n - `packages/coding-agent/src/tools/puppeteer/09_stealth_locale.txt` — force locale/languages/timezone/date strings.\n - `packages/coding-agent/src/tools/puppeteer/10_stealth_plugins.txt` — synthesize `navigator.plugins`/`navigator.mimeTypes`.\n - `packages/coding-agent/src/tools/puppeteer/11_stealth_hardware.txt` — spoof `navigator.hardwareConcurrency`.\n - `packages/coding-agent/src/tools/puppeteer/12_stealth_codecs.txt` — spoof media codec support.\n - `packages/coding-agent/src/tools/puppeteer/13_stealth_worker.txt` — carry UA/platform spoofing into `Worker`/`SharedWorker`.\n\n## Inputs\n\n### Shared fields\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `action` | `\"open\" \\| \"close\" \\| \"run\"` | Yes | Dispatches to the open/close/run path. |\n| `name` | `string` | No | Tab id. Defaults to `\"main\"`. Tabs live in a process-global map, so the same name is reused across later calls and in-process subagents until closed. |\n| `timeout` | `number` | No | Tool wall-clock timeout in seconds. Defaults to `30`; clamped to the browser tool range before execution. |\n\n### `action: \"open\"`\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `url` | `string` | No | Navigate after the tab is ready. Existing reusable tabs also navigate when `url` is supplied. |\n| `viewport` | `{ width: number; height: number; scale?: number }` | No | Requested viewport. For headless launch this becomes the initial viewport; for a page it is applied with `page.setViewport()`. `scale` maps to Puppeteer `deviceScaleFactor`. |\n| `wait_until` | `\"load\" \\| \"domcontentloaded\" \\| \"networkidle0\" \\| \"networkidle2\"` | No | Navigation wait condition. Defaults to `\"load\"` where omitted, including `open` navigation and later `tab.goto(...)`. |\n| `dialogs` | `\"accept\" \\| \"dismiss\"` | No | Installs a page `dialog` handler that auto-accepts or auto-dismisses dialogs. Omitted means no handler. |\n| `app` | `{ path?: string; cdp_url?: string; args?: string[]; target?: string }` | No | Selects browser kind. No `app` uses the session `browser.headless` setting. `app.path` is resolved against the session cwd and used as the executable path for spawn/attach reuse. `app.cdp_url` connects to an existing CDP endpoint. `args` are appended only when spawning `app.path`. `target` is only used for attached/spawned-app page selection. |\n\n### `action: \"close\"`\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `all` | `boolean` | No | Close every known tab. Omitted closes only `name`. |\n| `kill` | `boolean` | No | When a tab release drops a spawned-app browser handle to refcount 0, also terminate its process tree. Has no effect on headless shutdown and only disconnects connected CDP browsers. |\n\n### `action: \"run\"`\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `code` | `string` | Yes | Async-function body executed in a VM context with `page`, `browser`, `tab`, `display`, `assert`, `wait`, `console`, timers, `URL`, `TextEncoder`, `TextDecoder`, and `Buffer` in scope. |\n\n## Outputs\nThe tool returns one result per call; no streaming partial output is emitted from the browser implementation itself.\n\n- `open`: text content with `Opened` or `Reused`, browser description, URL, and optional title. `details` includes `action`, `name`, `browser`, `url`, `viewport`, and the same text in `details.result`.\n- `close`: text content with either `Closed ...` or `No tab named ...`. `details` includes `action`, `name`, and `details.result`.\n- `run`: ordered `content` array built as:\n 1. every `display(value)` call in execution order,\n 2. final return value, JSON-stringified unless already a string,\n 3. or `Ran code on tab \"...\"` if nothing else was produced.\n- `display(value)` coercion in `packages/coding-agent/src/tools/browser/tab-worker.ts`:\n - `{ type: \"image\", data: string, mimeType: string }` becomes image content,\n - `string` becomes text content,\n - other values become pretty JSON text when serializable, else `String(value)`.\n- `tab.screenshot()` also appends text plus an image content item unless `silent: true`; `details.screenshots` records persisted screenshot metadata `{ dest, mimeType, bytes, width, height }`.\n- `run` `details` includes `action`, `name`, current `browser`/`url` when the tab exists, optional `screenshots`, and `details.result` containing only the concatenated text outputs. Combined run text is capped at the inline byte limit via `enforceInlineByteCap()`; over-cap text is saved as a session artifact (`saveBrowserOutputArtifact()`) and the capped text replaces it in content and `details.result`.\n\n## Flow\n1. `BrowserTool.execute()` (`packages/coding-agent/src/tools/browser.ts`) abort-checks, clamps `timeout` via `clampTimeout(\"browser\", ...)`, defaults `name` to `\"main\"`, and dispatches on `action`.\n2. `open` resolves browser kind with `resolveBrowserKind()`:\n - `app.cdp_url` → `{ kind: \"connected\" }` after trimming trailing slashes.\n - `app.path` → `{ kind: \"spawned\" }` after resolving against session cwd.\n - otherwise → `{ kind: \"headless\", headless: session.settings.get(\"browser.headless\") }`.\n3. `open` rejects reusing the same tab name across different browser kinds (`sameBrowserKind()`); callers must close first.\n4. `open` acquires a browser handle through `acquireBrowser()` (`packages/coding-agent/src/tools/browser/registry.ts`):\n - existing connected handle is reused by browser-kind key;\n - stale disconnected handles are disposed and recreated;\n - headless launches via `launchHeadlessBrowser()`;\n - `connected` waits for `${cdpUrl}/json/version`, then `puppeteer.connect()`;\n - `spawned` first tries `findReusableCdp()`, else kills same-path processes, allocates a free loopback port, spawns the executable with `--remote-debugging-port=<port>`, waits for CDP, then connects.\n5. `open` acquires a tab through `acquireTab()` (`packages/coding-agent/src/tools/browser/tab-supervisor.ts`):\n - same-name + same-browser + alive tab is reused unless `dialogs` changed;\n - same-name but different browser handle, dead state, or changed dialog policy forces release and recreation;\n - reusing with a new `url` navigates by issuing `await tab.goto(...)` through the worker, defaulting to `waitUntil: \"load\"` when `wait_until` is omitted.\n6. New tabs build a `WorkerInitPayload` in `buildInitPayload()`:\n - headless mode sends `url`, `waitUntil`, `viewport`, `dialogs`, and timeout; the worker defaults missing `waitUntil` to `\"load\"`.\n - attach mode resolves a page with `pickElectronTarget()`, gets its target id, and sends `targetId` plus `dialogs`.\n7. `acquireTab()` spawns a dedicated Bun `Worker` from `tab-worker-entry.ts`; if that fails it falls back to inline execution in the main thread (`spawnInlineWorker()`), preserving behavior but losing protection against synchronous infinite loops.\n8. `WorkerCore.#init()` (`packages/coding-agent/src/tools/browser/tab-worker.ts`) connects back to the browser websocket endpoint. Headless mode opens a new page, applies stealth patches, applies viewport, installs dialog handling if requested, and optionally navigates. Attach mode resolves the requested target page and optionally installs dialog handling.\n9. On success the worker sends `ready` with `{ url, title, viewport, targetId }`; the supervisor stores a `TabSession`, increments browser-handle refcount with `holdBrowser()`, and keeps the tab in a process-global `Map<string, TabSession>`.\n10. `run` requires non-empty `code`, looks up the tab with `getTab()`, then delegates to `runInTab()`.\n11. `runInTabWithSnapshot()` rejects dead tabs and concurrent runs (`Tab ... is busy`), captures session cwd plus optional `browser.screenshotDir`, registers an abort hook, sends a `run` message to the worker, and races the result against `timeoutMs + 750` ms. Timeouts force-kill the tab worker and, for headless tabs, close the orphaned page target.\n12. `WorkerCore.#run()` creates a VM context, exposes the raw Puppeteer `page`/`browser` plus a synthetic `tab` API, and executes `(async () => { ...code... })()` via `vm.runInContext()`.\n13. The `tab` helper API implemented in `#createTabApi()` is:\n - `tab.name: string`\n - `tab.page: Page`\n - `tab.signal?: AbortSignal`\n - `tab.url(): string`\n - `tab.title(): Promise<string>`\n - `tab.goto(url, { waitUntil? })`\n - `tab.observe({ includeAll?, viewportOnly? })`\n - `tab.screenshot({ selector?, fullPage?, save?, silent? })`\n - `tab.extract(format = \"markdown\")`\n - `tab.click(selector)`\n - `tab.type(selector, text)`\n - `tab.fill(selector, value)`\n - `tab.press(key, { selector? })`\n - `tab.scroll(deltaX, deltaY)`\n - `tab.drag(from, to)`\n - `tab.waitFor(selector)`\n - `tab.evaluate(fn, ...args)`\n - `tab.scrollIntoView(selector)`\n - `tab.select(selector, ...values)`\n - `tab.uploadFile(selector, ...filePaths)`\n - `tab.waitForUrl(pattern, { timeout? })`\n - `tab.waitForResponse(pattern, { timeout? })`\n - `tab.id(n)`\n14. Selector handling in `normalizeSelector()` accepts plain CSS and Puppeteer query handlers, and rewrites legacy Playwright-style prefixes `p-text/`, `p-xpath/`, `p-pierce/`, `p-aria/`; other `p-*` prefixes throw a `ToolError`.\n15. `tab.observe()` clears the element cache, takes a Puppeteer accessibility snapshot, filters to interactive nodes unless `includeAll`, optionally filters to viewport-visible nodes, assigns numeric ids, caches `ElementHandle`s, and returns URL/title/viewport/scroll metadata plus `elements`.\n16. `tab.id(n)` resolves the cached `ElementHandle`, verifies `el.isConnected`, and throws a stale-id error after cache invalidation if the DOM changed or the cache was cleared.\n17. `tab.goto()` clears the cached element ids before navigating. Any new `tab.observe()` also clears and rebuilds the cache.\n18. `tab.click()` uses a custom retry loop for `text/...` selectors to find an actionable visible match; other selectors use `page.locator(...).click()` with the run timeout.\n19. `tab.screenshot()` captures either the whole page or a selector PNG, downsizes a copy for model output, chooses a persistence path, writes the image to disk, records metadata, and optionally emits text + image display entries.\n20. `display()` calls accumulate in an array. After code finishes, the worker posts `{ displays, returnValue, screenshots }`; `BrowserTool.#run()` appends the return value as trailing text content when not `undefined`.\n21. `close` releases one tab or all tabs via `releaseTab()` / `releaseAllTabs()`. Each tab aborts pending runs, asks the worker to close, waits up to `750` ms for a `closed` ack, terminates the worker, decrements browser refcount, and disposes the browser handle when refcount reaches zero.\n\n## Modes / Variants\n- **Action dispatch**\n - `open` — acquire/reuse browser + tab.\n - `close` — release one tab or all tabs.\n - `run` — execute JS inside the tab worker.\n- **Browser kind**\n - **Headless**: launches local Chromium with Puppeteer, applies stealth patches, and creates a fresh page per tab.\n - **Spawned app (`app.path`)**: reuses an existing CDP-enabled process for that executable when possible; otherwise kills same-path processes, spawns the executable with remote debugging enabled, then attaches. No stealth patches are injected.\n - **Connected browser (`app.cdp_url`)**: attaches to an already-running CDP endpoint. No process ownership; close only disconnects.\n- **Target selection for attached/spawned browsers**\n - With `app.target`, `pickElectronTarget()` returns the first page whose URL or title contains the case-insensitive substring.\n - Without `app.target`, it skips titles/URLs matching `request handler|devtools|background page|background host|service worker` and otherwise falls back to the first page.\n- **Worker mode**\n - **Dedicated worker**: normal path; user code runs off the main thread and can be aborted even when it blocks synchronously.\n - **Inline fallback**: activated when Bun worker spawn fails; behavior matches, but synchronous infinite loops on user code cannot be interrupted.\n- **Dialog policy**\n - No `dialogs` field: no auto-handler.\n - `accept`/`dismiss`: page `dialog` events are handled automatically.\n - Changing dialog policy on an existing live tab forces tab recreation instead of mutating the worker in place.\n- **Screenshot persistence**\n - `save` provided: persist full-resolution PNG at the resolved cwd-relative or absolute path.\n - `browser.screenshotDir` session setting set: persist full-resolution PNG under that directory with a timestamped filename.\n - Neither set: persist the resized image to a temp-file path under the OS temp dir.\n\n## Side Effects\n- Filesystem\n - `loadPuppeteer()` writes `{}` to `<puppeteer-safe-dir>/package.json` before importing `puppeteer-core`.\n - First headless launch may download Chromium into the Puppeteer cache directory returned by `getPuppeteerDir()`.\n - `tab.screenshot()` creates parent directories and writes image files.\n - `tab.uploadFile()` resolves supplied paths against the session cwd.\n- Network\n - CDP attach paths poll `http://127.0.0.1:<port>/json/version` or the supplied `cdp_url` `/json/version`.\n - Headless/browser-attach sessions create CDP websocket connections.\n - Headless first-use Chromium download uses `@puppeteer/browsers`.\n - User `page` / `tab` operations perform normal browser network traffic.\n- Subprocesses / native bindings\n - Headless mode launches Chromium through Puppeteer.\n - `app.path` mode may spawn the target executable via `Bun.spawn()`.\n - `killExistingByPath()` / `gracefulKillTreeOnce()` use `@oh-my-pi/pi-natives` process inspection/termination.\n - Worker mode uses Bun `Worker`; fallback mode does not.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Browser handles are cached in a process-global `Map` keyed by browser kind in `packages/coding-agent/src/tools/browser/registry.ts`.\n - Tabs are cached in a process-global `Map` keyed by `name` in `packages/coding-agent/src/tools/browser/tab-supervisor.ts`.\n - `run` captures session cwd and optional `browser.screenshotDir` for screenshot/save path resolution.\n - `restartForModeChange()` drops only headless tabs.\n- User-visible prompts / interactive UI\n - None beyond normal tool output. Dialog auto-handling is invisible unless it fails and emits debug logs.\n- Background work / cancellation\n - `open`, `run`, CDP waits, and browser actions thread through abort signals.\n - A timed-out `run` aborts the worker execution path and can tear down the tab.\n\n## Limits & Caps\n- Tool timeout clamp: default `30` s, min `1` s, max `300` s (`TOOL_TIMEOUTS.browser` in `packages/coding-agent/src/tools/tool-timeouts.ts`).\n- Supervisor grace period around init/run/close: `750` ms (`GRACE_MS` in `packages/coding-agent/src/tools/browser/tab-supervisor.ts`).\n- Puppeteer protocol timeout for launch/connect operations: `60_000` ms (`BROWSER_PROTOCOL_TIMEOUT_MS` in `packages/coding-agent/src/tools/browser/launch.ts`).\n- Connected-browser CDP readiness wait: `5_000` ms before `puppeteer.connect()` (`packages/coding-agent/src/tools/browser/registry.ts`).\n- Spawned-app CDP readiness wait after spawn: `30_000` ms (`packages/coding-agent/src/tools/browser/registry.ts`).\n- CDP polling cadence: 150 ms in `waitForCdp()` (`packages/coding-agent/src/tools/browser/attach.ts`).\n- Headless default viewport: `1365x768` at `deviceScaleFactor: 1.25` (`DEFAULT_VIEWPORT` in `packages/coding-agent/src/tools/browser/launch.ts`).\n- Screenshot model-attachment resize cap: `maxWidth 1024`, `maxHeight 1024`, `maxBytes 150 * 1024`, `jpegQuality 70` (`packages/coding-agent/src/tools/browser/tab-worker.ts`).\n- `tab.waitForUrl()` polling interval: `200` ms (`packages/coding-agent/src/tools/browser/tab-worker.ts`).\n- Drag simulation uses `12` mouse-move steps (`packages/coding-agent/src/tools/browser/tab-worker.ts`).\n\n## Errors\n- `BrowserTool.execute()` converts DOM-style `AbortError` into `ToolAbortError`; other errors propagate.\n- `run` hard-fails on missing code: `Missing required parameter 'code' for action 'run'.`\n- `open` fails when reusing a name across browser kinds: `Tab \"...\" is bound to a different browser (...). Close it first.`\n- `runInTabWithSnapshot()` fails when the tab is absent/dead (`Tab \"...\" is not alive. Reopen it.`) or already running (`Tab \"...\" is busy`).\n- Worker init failures and run failures are serialized through `RunErrorPayload`; `ToolError` and abort state are reconstructed on the host side by `errorFromPayload()`.\n- Attached-target mismatches surface as:\n - `No page targets available on the attached browser`\n - `No page target matched \"...\". Available pages:\\n...`\n - `Target ... is no longer available on the attached browser`\n- Spawned-app path validation requires an absolute executable path after cwd resolution, not an app bundle directory path.\n- Spawn/attach failures are wrapped into `ToolError`s such as `Timed out waiting for CDP endpoint ...`, `Failed to attach to ...`, or `Connected to ... but puppeteer.connect failed: ...`.\n- `tab` helper errors are user-visible `ToolError`s, including unsupported selector prefix, stale/unknown element id, invalid drag target, missing upload files, non-`<select>` for `tab.select()`, non-file-input for `tab.uploadFile()`, and screenshot selector misses.\n- On run timeout, the worker reports `Browser code execution timed out after <ms>ms`; the supervisor may escalate to `Browser code execution hung past grace; tab killed` if the worker does not respond after the grace window.\n\n## Notes\n- `loadPuppeteer()` and `loadPuppeteerInWorker()` temporarily redirect `cwd` to a safe Puppeteer directory before importing `puppeteer-core`, because Puppeteer probes the current working directory during module load.\n- Headless launch prefers a detected system Chrome/Chromium, then `PUPPETEER_EXECUTABLE_PATH`, and only then downloads Chromium.\n- Headless launch always passes `--no-sandbox`, `--disable-setuid-sandbox`, `--disable-blink-features=AutomationControlled`, and a `--window-size=...` matching the initial viewport. It also ignores Puppeteer default args `--disable-extensions`, `--disable-default-apps`, and `--disable-component-extensions-with-background-pages`.\n- Proxy-related env vars only affect headless launch: `PUPPETEER_PROXY`, `PUPPETEER_PROXY_BYPASS_LOOPBACK`, and `PUPPETEER_PROXY_IGNORE_CERT_ERRORS`.\n- Stealth patches are applied only in headless mode. Spawned or externally connected browsers are intentionally left untouched.\n- `applyStealthPatches()` also strips Puppeteer's `//# sourceURL=__puppeteer_evaluation_script__` suffix from CDP `Runtime.evaluate` / `Runtime.callFunctionOn` payloads.\n- `tab.extract()` reads `page.content()`, runs Readability first, then falls back to `main article`/`article`/`main`/`[role='main']`/`body`, and returns `null` if neither extraction path yields content.\n- `close(all: true, kill: false)` disconnects from spawned/connected browsers when the last tab closes but leaves spawned app processes running.\n- Headless orphan cleanup is best-effort: if a worker dies before closing its page, the supervisor searches browser targets by `targetId` and closes that page.\n- Console methods inside `run` do not appear in tool output; they are forwarded as debug/warn/error logs through the worker transport.",
|
|
91
91
|
"tools/checkpoint.md": "# checkpoint\n\n> Mark the current top-level conversation state so later `rewind` can collapse exploratory context into a report.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/checkpoint.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/checkpoint.md`\n- Key collaborators:\n - `packages/coding-agent/src/session/agent-session.ts` — captures the active checkpoint after tool success.\n - `packages/coding-agent/src/session/session-manager.ts` — persists the normal session entry stream; not the active checkpoint marker.\n - `packages/coding-agent/src/tools/index.ts` — registers the tool and gates it behind `checkpoint.enabled`.\n - `packages/coding-agent/src/config/settings-schema.ts` — defines the disabled-by-default feature flag.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `goal` | `string` | Yes | Investigation goal. Required by the schema and echoed in the tool result. |\n\n## Outputs\nThe tool returns a single text result plus structured details:\n\n- text body:\n - `Checkpoint created.`\n - `Goal: <goal>`\n - `Run your investigation, then call rewind with a concise report.`\n- `details`:\n - `goal: string`\n - `startedAt: string` — ISO timestamp created inside `CheckpointTool.execute()`\n\nNo checkpoint ID, artifact URI, job handle, file path, or restore token is returned.\n\n## Flow\n1. `CheckpointTool.createIf()` in `packages/coding-agent/src/tools/checkpoint.ts` returns `null` for subagents by checking `session.taskDepth`; only top-level sessions can see the tool.\n2. `CheckpointTool.execute()` rejects subagent calls again with `ToolError(\"Checkpoint not available in subagents.\")`.\n3. It rejects nested checkpoints with `ToolError(\"Checkpoint already active.\")` when `session.getCheckpointState?.()` is already set.\n4. It creates `startedAt = new Date().toISOString()` and returns a normal `toolResult()` payload. The tool itself does not persist anything.\n5. On the later `tool_execution_end` event, `AgentSession` in `packages/coding-agent/src/session/agent-session.ts` detects successful `checkpoint` execution and captures three in-memory fields:\n - `checkpointMessageCount` — current `agent.state.messages.length`, after the checkpoint tool result has already been appended\n - `checkpointEntryId` — `sessionManager.getEntries().at(-1)?.id ?? null`, i.e. the last persisted session entry ID at checkpoint time\n - `startedAt` — copied from tool details or regenerated\n6. `AgentSession` stores that object in its private `#checkpointState` field and clears `#pendingRewindReport`.\n\n## Side Effects\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Sets `AgentSession.#checkpointState` in memory.\n - Records the checkpoint boundary as a message count plus a session entry ID.\n - Enables the later yield guard: if a checkpoint is active and no rewind report is pending, `#enforceRewindBeforeYield()` injects a developer-role warning and schedules another turn.\n- User-visible prompts / interactive UI\n - The tool result tells the model to call `rewind` after the investigation.\n - If the agent tries to `yield` first, `AgentSession` injects:\n\n```text\n<system-warning>\nYou are in an active checkpoint. You MUST call rewind with your investigation findings before yielding. Do NOT yield without completing the checkpoint.\n</system-warning>\n```\n\n## Limits & Caps\n- Availability is gated by `checkpoint.enabled`, default `false`, in `packages/coding-agent/src/config/settings-schema.ts`.\n- The tool is registered as discoverable in `packages/coding-agent/src/tools/index.ts`.\n- Only one active checkpoint is allowed per top-level session.\n- Checkpoint state is not persisted as a dedicated session entry. If the process exits, a resumed session can reload the conversation history, but not the live `#checkpointState` guard.\n- Session persistence still applies to the ordinary checkpoint tool call message. Global session persistence truncation is `MAX_PERSIST_CHARS = 500_000` in `packages/coding-agent/src/session/session-manager.ts`.\n\n## Errors\n- `ToolError(\"Checkpoint not available in subagents.\")` — thrown for subagent sessions.\n- `ToolError(\"Checkpoint already active.\")` — thrown when a prior checkpoint has not been rewound or cleared.\n- The tool body has no local `try/catch`; unexpected exceptions propagate.\n\n## Notes\n- Despite the summary string `Create a git-based checkpoint to save and restore session state`, the implementation does not call git and does not snapshot filesystem state.\n- Captured state is conversation/session metadata only:\n - in-memory message count\n - session entry ID in the session tree\n - timestamp\n- Not captured:\n - working tree contents\n - staged changes\n - artifacts\n - blob-store contents\n - SQLite history rows from `packages/coding-agent/src/session/history-storage.ts`\n - auth or agent records from `packages/coding-agent/src/session/agent-storage.ts`\n",
|
|
@@ -114,7 +114,7 @@ export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
|
|
|
114
114
|
"tools/write.md": "# write\n\n> Create or overwrite a file, writable internal resource, archive entry, SQLite row, or merge-conflict resolution.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/write.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/write.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/archive-reader.ts` — parse `archive.ext:entry` selectors.\n - `packages/coding-agent/src/tools/sqlite-reader.ts` — detect SQLite paths and perform row insert/update/delete.\n - `packages/coding-agent/src/lsp/index.ts` — format-on-write and diagnostics writethrough.\n - `packages/coding-agent/src/tools/auto-generated-guard.ts` — block overwriting generated files.\n - `packages/coding-agent/src/tools/fs-cache-invalidation.ts` — invalidate shared FS scan caches after writes.\n - `packages/coding-agent/src/tools/plan-mode-guard.ts` — resolve paths and enforce plan-mode write policy.\n\n## Inputs\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `path` | `string` | Yes | Target path. Plain file path writes a filesystem file. Writable internal URLs are delegated to their handler. `archive.ext:inner/path` writes an archive entry for `.tar`, `.tar.gz`, `.tgz`, or `.zip`. `db.sqlite:table` inserts a row. `db.sqlite:table:key` updates or deletes a row. `conflict://<id>` resolves a recorded merge conflict; `conflict://*` bulk-resolves every registered conflict. |\n| `content` | `string` | Yes | Full replacement file content, archive entry content, internal-resource content, conflict replacement, or SQLite row payload. SQLite non-delete writes must parse as a JSON5 object. Empty or whitespace-only content deletes a SQLite row when `path` includes a row key. |\n\nWorked examples:\n\n```text\npath: \"src/generated/config.json\"\ncontent: \"{\\n \\\"enabled\\\": true\\n}\\n\"\n```\n\n```text\npath: \"fixtures/archive.zip:templates/email.txt\"\ncontent: \"hello\\n\"\n```\n\n```text\npath: \"data/app.sqlite:users:42\"\ncontent: \"{name: 'Ada', active: true}\"\n```\n\n## Outputs\nSingle-shot result.\n\n- Success always returns a text block.\n - Plain file write: `Successfully wrote <chars> bytes to <relative-path>` (the count is `cleanContent.length`, not encoded byte length).\n - Internal URL write: `Successfully wrote <chars> bytes to <url>`.\n - Archive write: `Successfully wrote <chars> bytes to <relative-archive-path>:<entry-path>`.\n - SQLite write: one of `Inserted row into <table>`, `Updated row '<key>' in <table>`, `No row updated ...`, `Deleted row ...`, `No row deleted ...`.\n - Conflict resolution: conflict-specific success text, with fresh hashline snapshot headers when applicable.\n- If hashline prefixes were copied from `read` output and stripped first, the first text block gets an extra note.\n- In hashline display mode, plain file writes (including ACP bridge writes) and conflict resolutions prepend a fresh `[<relative-path>#TAG]` header so the next `edit` has a current snapshot tag without an extra `read`. Bulk conflict resolutions append a `Snapshots:` block listing one header per successfully written file.\n- Plain file writes may also return `details.diagnostics` plus `details.meta.diagnostics` when LSP diagnostics-on-write is enabled, and `details.madeExecutable` when a newly written shebang file is chmodded executable.\n- SQLite writes use `toolResult(...).sourcePath(...)`, so `details.meta.sourcePath` points at the database file.\n- Archive writes set `details.resolvedPath` to the archive's absolute path; internal URL writes return empty `details`.\n\n## Flow\n1. `WriteTool.execute()` in `packages/coding-agent/src/tools/write.ts` strips pasted `[PATH#HASH]` headers and `LINE:` hashline prefixes from `content` when the session is in hashline display mode.\n2. If `path` is an internal URL whose handler exposes `write`, the tool delegates directly to `handler.write(...)` and returns.\n3. `conflict://...` paths are handled next by the merge-conflict resolver. Scope reads such as `conflict://<id>/ours` are rejected as read-only; writable conflict URIs must omit the scope.\n4. It calls `#resolveArchiveWritePath()` next. That uses `parseArchivePathCandidates()` from `packages/coding-agent/src/tools/archive-reader.ts`, checks candidate archive files on disk, and falls back to the longest matching archive suffix even when the archive file does not exist yet.\n5. Archive writes call `enforcePlanModeWrite(..., { op: exists ? \"update\" : \"create\" })`, then `#writeArchiveEntry()`.\n - The parent directory of the archive file is created with `fs.mkdir(..., { recursive: true })`.\n - `.zip` archives are read with `fflate.unzipSync()`, the target entry is replaced in an in-memory map, and the archive is rewritten with `fflate.zipSync()` + `Bun.write()`.\n - `.tar`, `.tar.gz`, and `.tgz` archives are read with `Bun.Archive`, existing entries are copied into an object map, the target entry is replaced, and `Bun.Archive.write()` rewrites the archive.\n - `invalidateFsScanAfterWrite()` runs on the archive file path.\n6. If the path is not treated as an archive, `execute()` calls `#resolveSqliteWritePath()`. That uses `parseSqlitePathCandidates()` and `isSqliteFile()` from `packages/coding-agent/src/tools/sqlite-reader.ts`. Existing non-SQLite files suppress the SQLite path interpretation.\n7. SQLite writes call `enforcePlanModeWrite(..., { op: \"update\" })`, then `#writeSqliteRow()`.\n - The database must already exist; missing DBs throw `SQLite database '<path>' not found`.\n - The tool opens `new Database(..., { create: false, strict: true })` and sets `PRAGMA busy_timeout = 3000`.\n - Whitespace-only `content` with a row key deletes a row.\n - Non-empty `content` is parsed with `Bun.JSON5.parse()`, must be a JSON object, and is routed to insert/update helpers from `packages/coding-agent/src/tools/sqlite-reader.ts`.\n - `invalidateFsScanAfterWrite()` runs on the DB path and the connection is closed in `finally`.\n8. Otherwise the tool treats `path` as a plain filesystem file.\n - `enforcePlanModeWrite(..., { op: \"create\" })` runs before path resolution.\n - Existing files are checked by `assertEditableFile()` to block overwriting detected generated files.\n - ACP bridge writeTextFile is tried first when available; otherwise the session’s writethrough callback writes content. With LSP enabled and `lsp.formatOnWrite` / `lsp.diagnosticsOnWrite` settings on, `createLspWritethrough()` may format content, sync it through LSP servers, save it, and collect diagnostics. Otherwise `writethroughNoop()` writes directly with `Bun.write()` or `file.write()`.\n - `maybeMarkExecutableForShebang()` may chmod the file executable when content starts with `#!`.\n - `invalidateFsScanAfterWrite()` runs on the file path.\n9. The tool returns a text result and optional diagnostics / executable metadata.\n\n## Modes / Variants\n### Plain file path\n- Target is any path that does not resolve as an archive selector and does not resolve as an existing-or-new SQLite selector.\n- Existing files are overwritten.\n- `write.ts` does not call `fs.mkdir()` on this path; explicit parent-directory creation only exists in the archive branch, but `Bun.write()` itself creates missing parent directories for plain file writes.\n\nExample:\n\n```text\npath: \"tmp/output.txt\"\ncontent: \"hello\\n\"\n```\n\n### Archive entry write\n- Selector syntax: `archive.ext:inner/path`.\n- Supported archive suffixes come from `parseArchivePathCandidates()`: `.tar`, `.tar.gz`, `.tgz`, `.zip`.\n- The inner path is normalized to `/`, strips empty and `.` segments, rejects `..`, and rejects directory targets ending in `/`.\n- Rewrites the whole archive file after replacing one entry.\n- Creates the parent directory for the archive file if needed.\n\nExample:\n\n```text\npath: \"build/assets.tar.gz:css/app.css\"\ncontent: \"body { color: black; }\\n\"\n```\n\n### SQLite table insert\n- Selector syntax: `db.sqlite:table`.\n- `content` must parse as a JSON5 object.\n- Empty object is allowed and becomes `INSERT INTO <table> DEFAULT VALUES`.\n- Query parameters are rejected for SQLite writes.\n\nExample:\n\n```text\npath: \"data/app.db:users\"\ncontent: \"{name: 'Ada', active: true}\"\n```\n\n### SQLite row update / delete\n- Selector syntax: `db.sqlite:table:key`.\n- Non-empty `content` updates the row.\n- Empty or whitespace-only `content` deletes the row.\n- Row lookup uses the single-column primary key if present; otherwise it falls back to `rowid`. Composite primary keys and `WITHOUT ROWID` tables are rejected for key-based writes.\n\nExample update:\n\n```text\npath: \"data/app.sqlite:users:42\"\ncontent: \"{email: 'ada@example.com'}\"\n```\n\nExample delete:\n\n```text\npath: \"data/app.sqlite:users:42\"\ncontent: \"\"\n```\n\n## Side Effects\n- Filesystem\n - Creates or overwrites plain files.\n - Rewrites entire archive files when writing an archive entry.\n - Explicitly creates parent directories (via `fs.mkdir`) for archive files only; plain file writes get parent directories from `Bun.write()`.\n - Mutates existing SQLite databases; never creates a new SQLite DB.\n - Resolves conflict markers in files for `conflict://...` writes.\n - May chmod a shebang file executable after a successful plain-file write.\n- Subprocesses / native bindings\n - Uses Bun SQLite bindings via `bun:sqlite`.\n - Uses Bun archive APIs and lazily imports `fflate` for ZIP reads/writes.\n - May talk to configured LSP servers through `packages/coding-agent/src/lsp/index.ts`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Invalidates shared filesystem scan cache entries through `invalidateFsScanAfterWrite()`.\n - Enforces plan-mode write restrictions before mutating the target.\n- Background work / cancellation\n - Marks the tool `concurrency = \"exclusive\"` in `WriteTool`.\n - LSP writethrough can schedule deferred diagnostics fetches after a timeout, but plain `write.ts` only consumes the immediate return value.\n\n## Limits & Caps\n- `WriteTool` itself exposes no byte cap beyond storing `content` in memory and, for archives, rebuilding the archive in memory.\n- Generated-file detection reads at most `CHECK_BYTE_COUNT = 1024` bytes and `HEADER_LINE_LIMIT = 40` header lines from an existing file in `packages/coding-agent/src/tools/auto-generated-guard.ts`.\n- SQLite writes set `PRAGMA busy_timeout = 3000`.\n- LSP writethrough uses a `5_000` ms operation timeout in `runLspWritethrough()` and may schedule a deferred diagnostics fetch with `AbortSignal.timeout(25_000)` in `scheduleDeferredDiagnosticsFetch()`.\n- Shebang executable handling depends on host filesystem chmod support.\n\n## Errors\n- Invalid archive subpaths throw `ToolError` with messages such as:\n - `Archive write path must target a file inside the archive`\n - `Archive write path must target a file, not a directory`\n - `Archive path cannot contain '..'`\n- SQLite path parsing throws on unsupported forms:\n - `SQLite write paths do not support query parameters`\n - `SQLite write path must target a table`\n - `SQLite row writes require a non-empty row key`\n- Missing SQLite DBs surface as `SQLite database '<path>' not found`.\n- SQLite content errors are model-visible `ToolError`s, including invalid JSON5, non-object payloads, unknown columns, non-scalar values, empty update objects, composite primary keys, and `WITHOUT ROWID` tables.\n- Existing plain files may be rejected by `assertEditableFile()` when they look generated.\n- Conflict scope writes such as `conflict://<id>/ours` are rejected as read-only; invalid conflict IDs or missing conflict history surface as `ToolError`s from the conflict resolver.\n- Archive read/write failures and unexpected SQLite exceptions are wrapped in `ToolError(error.message)`.\n- If no LSP server matches or LSP formatting/diagnostics times out, file writes still fall back to writing content; diagnostics may be omitted.\n\n## Notes\n- Archive path detection runs before SQLite detection. A path that matches an archive selector is never treated as SQLite.\n- SQLite detection declines when an existing file with a `.sqlite` / `.db` suffix is present but does not have SQLite magic bytes; then the path falls back to a plain file write.\n- ZIP entry content is encoded with `new TextEncoder().encode(content)` in `#writeArchiveEntry()`. Non-ZIP archive writes pass the string directly to `Bun.Archive.write()`.\n- The prompt forbids two common anti-patterns: using `write` for routine edits that should use `edit`, and creating `*.md` / `README` files unless explicitly requested. It also forbids emojis unless requested.\n- Plain file and internal URL writes report `cleanContent.length` as “bytes”, which is UTF-16 code units in JS, not an on-disk byte measurement.\n- `stripWriteContent()` only removes hashline prefixes when the session’s file display mode has `hashLines` enabled; otherwise content is written unchanged.\n",
|
|
115
115
|
"tree.md": "# `/tree` Command Reference\n\n`/tree` opens the interactive **Session Tree** navigator. It lets you jump to any entry in the current session file and continue from that point.\n\nThis is an in-file leaf move, not a new session export.\n\n## What `/tree` does\n\n- Builds a tree from current session entries (`SessionManager.getTree()`)\n- Opens `TreeSelectorComponent` with keyboard navigation, filters, and search\n- On selection, calls `AgentSession.navigateTree(targetId, { summarize, customInstructions })`\n- Rebuilds visible chat from the new leaf path\n- Optionally prefills editor text when selecting a user/custom message\n\nPrimary implementation:\n\n- `src/slash-commands/builtin-registry.ts` (`/tree`, `/branch` command routing)\n- `src/modes/controllers/input-controller.ts` (keybinding wiring, double-escape behavior)\n- `src/modes/controllers/selector-controller.ts` (tree UI launch + summary prompt flow)\n- `src/modes/components/tree-selector.ts` (navigation, filters, search, labels, rendering)\n- `src/session/agent-session.ts` (`navigateTree` leaf switching + optional summary)\n- `src/session/session-manager.ts` (`getTree`, `branch`, `branchWithSummary`, `resetLeaf`, label persistence)\n\n## How to open it\n\nAny of the following opens the same selector:\n\n- `/tree`\n- configured keybinding for the `app.session.tree` action\n- double-escape on empty editor when `doubleEscapeAction = \"tree\"` (default)\n- `/branch` when `doubleEscapeAction = \"tree\"` (routes to tree selector instead of user-only branch picker)\n\n## Tree UI model\n\nThe tree is rendered from session entry parent pointers (`id` / `parentId`).\n\n- Children are sorted by timestamp ascending (older first, newer lower)\n- Active branch (path from root to current leaf) is marked with a bullet\n- Labels (if present) render as `[label]` before node text\n- If multiple roots exist (orphaned/broken parent chains), they are shown under a virtual branching root\n\n```text\nExample tree view (active path marked with •):\n\n├─ user: \"Start task\"\n│ └─ assistant: \"Plan\"\n│ ├─ • user: \"Try approach A\"\n│ │ └─ • assistant: \"A result\"\n│ │ └─ • [milestone] user: \"Continue A\"\n│ └─ user: \"Try approach B\"\n│ └─ assistant: \"B result\"\n```\n\nThe selector recenters around current selection and shows up to:\n\n- `max(5, floor(terminalHeight / 2))` rows\n\n## Keybindings inside tree selector\n\n- `Up` / `Down`: move selection (wraps)\n- `Left` / `Right`: page up / page down\n- `Enter`: select node\n- `Esc`: clear search if active; otherwise close selector\n- `Ctrl+C`: close selector\n- `Type`: append to search query\n- `Backspace`: delete search character\n- `Shift+L`: edit/clear label on selected entry\n- `Ctrl+O`: cycle filter forward\n- `Shift+Ctrl+O`: cycle filter backward\n- `Alt+D/T/U/L/A`: jump directly to specific filter mode\n\n## Filters and search semantics\n\nFilter modes (`TreeList`):\n\n1. `default`\n2. `no-tools`\n3. `user-only`\n4. `labeled-only`\n5. `all`\n\n### `default`\n\nShows conversational nodes plus any entry types not explicitly suppressed. It hides these setting/bookkeeping entry types:\n\n- `label`\n- `custom`\n- `model_change`\n- `thinking_level_change`\n\nOther internal entry types that are not rendered specially may appear as blank rows in current code.\n\n### `no-tools`\n\nSame as `default`, plus hides `toolResult` messages.\n\n### `user-only`\n\nOnly `message` entries where role is `user`.\n\n### `labeled-only`\n\nOnly entries that currently resolve to a label.\n\n### `all`\n\nEverything in the session tree, including bookkeeping/custom entries.\n\n### Tool-only assistant node behavior\n\nAssistant messages that contain **only tool calls** (no text) are hidden by default in all filtered views unless:\n\n- message is error/aborted (`stopReason` not `stop`/`toolUse`), or\n- it is the current leaf (always kept visible)\n\n### Search behavior\n\n- Query is tokenized by spaces\n- Matching is case-insensitive\n- All tokens must match (AND semantics)\n- Searchable text includes label, role, and type-specific content (message text, branch summary text, custom type, tool command snippets, etc.)\n\n## Selection outcomes (important)\n\n`navigateTree` computes new leaf behavior from selected entry type:\n\n### Selecting `user` message\n\n- New leaf becomes selected entry’s `parentId`\n- If parent is `null` (root user message), leaf resets to root (`resetLeaf()`)\n- Selected message text is copied to editor for editing/resubmit\n\n### Selecting `custom_message`\n\n- Same leaf rule as user messages (`parentId`)\n- Text content is extracted and copied to editor\n\n### Selecting non-user node (assistant/tool/summary/compaction/custom bookkeeping/etc.)\n\n- New leaf becomes selected node id\n- Editor is not prefilled\n\n### Selecting current leaf\n\n- No-op; selector closes with “Already at this point”\n\n```text\nSelection decision (simplified):\n\nselected node\n │\n ├─ is current leaf? ── yes ──> close selector (no-op)\n │\n ├─ is user/custom_message? ── yes ──> leaf := parentId (or resetLeaf for root)\n │ + prefill editor text\n │\n └─ otherwise ──> leaf := selected node id\n + no editor prefill\n```\n\n## Summary-on-switch flow\n\nSummary prompt is controlled by `branchSummary.enabled` (default: `false`).\n\nWhen enabled, after picking a node the UI asks:\n\n- `No summary`\n- `Summarize`\n- `Summarize with custom prompt`\n\nFlow details:\n\n- Escape in summary prompt reopens tree selector\n- Custom prompt cancellation returns to summary choice loop\n- During summarization, UI shows loader and binds `Esc` to `abortBranchSummary()`\n- If summarization aborts, tree selector reopens and no move is applied\n\n`navigateTree` internals:\n\n- Collects abandoned-branch entries from old leaf to common ancestor\n- Emits `session_before_tree` (extensions can cancel or inject summary)\n- Uses default summarizer only if requested and needed\n- Applies move with:\n - `branchWithSummary(...)` when summary exists\n - `branch(newLeafId)` for non-root move without summary\n - `resetLeaf()` for root move without summary\n- Replaces agent conversation with rebuilt session context\n- Emits `session_tree`\n\nNote: if user requests summary but there is nothing to summarize, navigation proceeds without creating a summary entry.\n\n## Labels\n\nLabel edits in tree UI call `appendLabelChange(targetId, label)`.\n\n- non-empty label sets/updates resolved label\n- empty label clears it\n- labels are stored as append-only `label` entries\n- tree nodes display resolved label state, not raw label-entry history\n\n## `/tree` vs adjacent operations\n\n| Operation | Scope | Result |\n| --------- | ------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| `/tree` | Current session file | Moves leaf to selected point (same file) |\n| `/branch` | Usually current session file -> new session file | By default branches from selected **user** message into a new session file; if `doubleEscapeAction = \"tree\"`, `/branch` opens tree navigation UI instead |\n| `/fork` | Whole current session | Duplicates session into a new persisted session file |\n| `/resume` | Session list | Switches to another session file |\n\nKey distinction: `/tree` is a navigation/repositioning tool inside one session file. `/branch`, `/fork`, and `/resume` all change session-file context.\n\n## Operator workflows\n\n### Re-run from an earlier user prompt without losing current branch\n\n1. `/tree`\n2. search/select earlier user message\n3. choose `No summary` (or summarize if needed)\n4. edit prefilled text in editor\n5. submit\n\nEffect: new branch grows from selected point within same session file.\n\n### Leave current branch with context breadcrumb\n\n1. enable `branchSummary.enabled`\n2. `/tree` and select target node\n3. choose `Summarize` (or custom prompt)\n\nEffect: a `branch_summary` entry is appended at the target position before continuing.\n\n### Investigate hidden bookkeeping entries\n\n1. `/tree`\n2. press `Alt+A` (all)\n3. search for `model`, `thinking`, `custom`, or labels\n\nEffect: inspect full internal timeline, not just conversational nodes.\n\n### Bookmark pivot points for later jumps\n\n1. `/tree`\n2. move to entry\n3. `Shift+L` and set label\n4. later use `Alt+L` (`labeled-only`) to jump quickly\n\nEffect: fast navigation among durable branch landmarks.\n",
|
|
116
116
|
"ttsr-injection-lifecycle.md": "# TTSR Injection Lifecycle\n\nThis document covers the current Time Traveling Stream Rules (TTSR) runtime path from rule discovery to stream interruption, retry injection, extension notifications, and session-state handling.\n\n## Implementation files\n\n- [`../src/sdk.ts`](../packages/coding-agent/src/sdk.ts)\n- [`../src/export/ttsr.ts`](../packages/coding-agent/src/export/ttsr.ts)\n- [`../src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts)\n- [`../src/session/session-manager.ts`](../packages/coding-agent/src/session/session-manager.ts)\n- [`../src/prompts/system/ttsr-interrupt.md`](../packages/coding-agent/src/prompts/system/ttsr-interrupt.md)\n- [`../src/capability/index.ts`](../packages/coding-agent/src/capability/index.ts)\n- [`../src/extensibility/extensions/types.ts`](../packages/coding-agent/src/extensibility/extensions/types.ts)\n- [`../src/extensibility/hooks/types.ts`](../packages/coding-agent/src/extensibility/hooks/types.ts)\n- [`../src/extensibility/custom-tools/types.ts`](../packages/coding-agent/src/extensibility/custom-tools/types.ts)\n- [`../src/modes/controllers/event-controller.ts`](../packages/coding-agent/src/modes/controllers/event-controller.ts)\n\n## 1. Discovery feed and rule registration\n\nAt session creation, `createAgentSession()` loads discovered rules, constructs a `TtsrManager`, and buckets rules through `bucketRules(...)`:\n\n```ts\nconst ttsrSettings = settings.getGroup(\"ttsr\");\nconst ttsrManager = new TtsrManager(ttsrSettings);\nconst rulesResult = await loadCapability<Rule>(ruleCapability.id, { cwd });\nconst { rulebookRules, alwaysApplyRules } = bucketRules(\n rulesResult.items,\n ttsrManager,\n {\n builtinRules: ttsrSettings.builtinRules,\n disabledRules: ttsrSettings.disabledRules,\n },\n);\n```\n\n`bucketRules(...)` drops names listed in `ttsr.disabledRules`, drops embedded `builtin-defaults` rules when `ttsr.builtinRules === false`, registers accepted TTSR rules, and then routes the remaining rules to always-apply/rulebook buckets.\n\n### Pre-registration dedupe behavior\n\n`loadCapability(\"rules\")` deduplicates by `rule.name` with first-wins semantics (higher provider priority first). Shadowed duplicates are removed before TTSR registration.\n\n### `TtsrManager.addRule()` behavior\n\nRegistration is skipped when:\n\n- TTSR is disabled (`ttsr.enabled === false`)\n- both `rule.condition` (regex) and `rule.astCondition` (ast-grep patterns) are absent, or every regex condition fails to compile and there are no AST conditions\n- a rule with the same `rule.name` was already registered in this manager\n- the rule scope excludes all monitored streams\n\nInvalid regex conditions and unreachable scopes are logged as warnings and ignored; session startup continues. If a TTSR rule defines `globs`, those globs are compiled as a global file-path gate for matching.\n\n### AST conditions (`astCondition`)\n\nA rule may carry `astCondition`: a list of [ast-grep](https://ast-grep.github.io/) patterns (OR'd, same as regex `condition`), matched structurally instead of textually. A repeated metavariable inside one pattern requires both occurrences to be equal (`if ($X) clearTimeout($X)` matches but `if ($X) clearTimeout($Y)` does not).\n\nAST conditions only evaluate on **edit/write tool-argument streams** — they need a language, which is inferred from the file extension on the tool's path argument, and they match against the tool's reconstructed source snapshot (`matcherDigest`), not the raw wire delta. Matching is performed in memory by the native `astMatch` engine (no temp files) with Smart strictness. Streams without a usable file path (prose, thinking, path-less tool calls) skip AST conditions entirely. A rule may mix `condition` and `astCondition`; the regex paths keep working on every scope while AST paths apply only to those tool streams.\n\n### Setting gating\n\n`TtsrSettings.enabled` gates the manager: when `ttsr.enabled === false`, `addRule()` refuses registration and `checkDelta()`/`checkSnapshot()`/`checkAstSnapshot()`/`hasRules()`/`hasAstRules()` all return empty/false, so no matching runs.\n\n## 2. Streaming monitor lifecycle\n\nTTSR detection runs inside `AgentSession.#handleAgentEvent`.\n\n### Turn start\n\nOn `turn_start`, the stream buffer is reset:\n\n- `ttsrManager.resetBuffer()`\n\n### During stream (`message_update`)\n\nWhen assistant updates arrive and rules exist:\n\n- monitor `text_delta`, `thinking_delta`, and `toolcall_delta`\n- for tools exposing `matcherDigest` (edit/write), replace the scoped buffer with the reconstructed source snapshot and call `checkSnapshot(snapshot, matchContext)`; otherwise append the delta into a source/tool scoped manager buffer and call `checkDelta(delta, matchContext)` (synchronous regex matching either way)\n- for edit/write tool streams, when `hasAstRules()` is true, `await checkAstSnapshot(snapshot, matchContext)` (asynchronous AST matching)\n\n`checkDelta()`/`checkSnapshot()` iterate registered rules and return all matching rules that pass scope, global path-glob, regex condition, and repeat policy checks. `checkAstSnapshot()` applies the same scope/path/repeat gates, then runs each candidate rule's `astCondition` patterns against the snapshot via the native `astMatch` engine. It is throttled per stream key: an identical consecutive snapshot (common when only non-source arguments change between deltas) is skipped without re-running the matcher. Both paths feed their matches through the same trigger-decision handler.\n\n## 3. Trigger decision and immediate abort path\n\nWhen one or more rules match and at least one matched rule allows interruption:\n\n1. Matched rules are deduplicated into `#pendingTtsrInjections`.\n2. `#ttsrAbortPending = true` and a TTSR resume gate is created.\n3. `agent.abort()` is called immediately.\n4. `ttsr_triggered` event is emitted asynchronously (fire-and-forget).\n5. retry work is scheduled via the post-prompt task scheduler with a 50ms delay.\n\nAbort is not blocked on extension callbacks.\n\n## 4. Retry scheduling, context mode, and reminder injection\n\nAfter the 50ms timeout:\n\n1. `#ttsrAbortPending = false`\n2. read `ttsrManager.getSettings().contextMode`\n3. if `contextMode === \"discard\"`, drop the targeted partial assistant output with `agent.replaceMessages(...slice(0, targetAssistantIndex))`\n4. build injection content from pending rules using `ttsr-interrupt.md` template\n5. append and persist a hidden `custom_message`/runtime custom message with `customType: \"ttsr-injection\"` and `details.rules`\n6. mark those rule names injected, persist a `ttsr_injection` entry, and call `agent.continue()` to retry generation\n\nTemplate payload is:\n\n```xml\n<system-interrupt reason=\"rule_violation\" rule=\"{{name}}\" path=\"{{path}}\">\n...\n{{content}}\n</system-interrupt>\n```\n\nPending injections are cleared after content generation.\n\n### `contextMode` behavior on partial output\n\n- `discard`: partial/aborted assistant message is removed before retry.\n- `keep`: partial assistant output remains in conversation state; reminder is appended after it.\n\n### Non-interrupting matches\n\nNon-interrupting matches split by `matchContext.source`:\n\n- **`source === \"tool\"` (tool-source match).** The rule is bucketed into `#perToolTtsrInjections`, keyed by the matched tool call's `id`. There is **no** deferred follow-up turn and the stream is not aborted. When the tool actually produces a result, the `afterToolCall` hook prepends a rendered `ttsr-tool-reminder.md` block to `ctx.result.content` (a single `text` block inserted ahead of the tool's own content), and persists a `ttsr_injection` entry with the consumed rule names. The template payload is:\n\n ```xml\n <system-reminder reason=\"rule_violation\" rule=\"{{name}}\" path=\"{{path}}\">\n ...\n {{content}}\n </system-reminder>\n ```\n\n- **`source === \"text\"` / `\"thinking\"` (prose-source match).** Behavior is unchanged: the rule is queued in `#pendingTtsrInjections` and, after a successful non-error, non-aborted assistant message, `AgentSession` injects the hidden `ttsr-injection` custom message as a follow-up and schedules continuation.\n\nWithin a single matching batch, each rule is attached to exactly one sibling tool call — if multiple sibling tool calls would satisfy the same rule, deduplication picks one and the others are left untouched. Multiple distinct rules can still fold onto the same tool call.\n\n#### Implications for tool authors and transcript readers\n\n- The tool's own `toolResult` content is preserved verbatim; the reminder is **prepended** as an additional leading text block. Renderers that assume `content[0]` is the tool's primary output must scan past any block whose text begins with `<system-reminder reason=\"rule_violation\"` (or filter on the wrapper tag) to find the real payload.\n- The reminder is in-band on the tool result, not a separate `custom_message`/`ttsr-injection` entry. Transcript readers looking for non-interrupting TTSR activity on tool-source rules MUST inspect tool results (and the persisted `ttsr_injection` entry list), not just synthetic injection entries.\n- A single tool result may carry reminders for several rules concatenated with a blank line between rendered templates.\n- If the assistant message ends with `stopReason === \"aborted\"` or `\"error\"` before the matched tools run, the pending per-tool buckets are cleared — those rules are **not** persisted as injected and remain eligible to re-trigger on a future turn (subject to repeat policy).\n\n## 5. Repeat policy and gap logic\n\n`TtsrManager` tracks `#messageCount` and per-rule `lastInjectedAt`.\n\n### `repeatMode: \"once\"`\n\nA rule can trigger only once after it has an injection record.\n\n### `repeatMode: \"after-gap\"`\n\nA rule can re-trigger only when:\n\n- `messageCount - lastInjectedAt >= repeatGap`\n\n`messageCount` increments on `turn_end`, so gap is measured in completed turns, not stream chunks.\n\n## 6. Event emission and extension/hook surfaces\n\n### Session event\n\n`AgentSessionEvent` includes:\n\n```ts\n{ type: \"ttsr_triggered\"; rules: Rule[] }\n```\n\n### Extension runner\n\n`#emitSessionEvent()` routes the event to:\n\n- extension listeners (`ExtensionRunner.emit({ type: \"ttsr_triggered\", rules })`)\n- local session subscribers\n\n### Hook and custom-tool typing\n\n- extension API exposes `on(\"ttsr_triggered\", ...)`\n- hook API exposes `on(\"ttsr_triggered\", ...)`\n- custom tools receive `onSession({ reason: \"ttsr_triggered\", rules })`\n\n### Interactive-mode rendering difference\n\nInteractive mode uses `session.isTtsrAbortPending` to suppress showing the aborted assistant stop reason as a visible failure during TTSR interruption, and renders a `TtsrNotificationComponent` when the event arrives.\n\n## 7. Persistence and resume state (current implementation)\n\n`SessionManager` persists injected-rule state:\n\n- entry type: `ttsr_injection`\n- append API: `appendTtsrInjection(ruleNames)`\n- query API: `getInjectedTtsrRules()`\n- context reconstruction includes `SessionContext.injectedTtsrRules`\n\n`TtsrManager` supports restoration via `restoreInjected(ruleNames)`.\n\n### Current wiring status\n\nIn the current runtime path:\n\n- interrupted injections append a hidden `custom_message` with `customType: \"ttsr-injection\"` and append a `ttsr_injection` entry via `appendTtsrInjection(...)`\n- deferred non-interrupting prose-source injections are marked/persisted when their queued custom message reaches `message_end`\n- non-interrupting tool-source injections are marked at match time and persisted via `appendTtsrInjection(...)` from the `afterToolCall` hook when the matched tool's result is produced\n- `createAgentSession()` restores `existingSession.injectedTtsrRules` into `ttsrManager`\n\nNet effect: injected-rule suppression is persisted/restored across session reload/resume for the current branch path.\n\n## 8. Race boundaries and ordering guarantees\n\n### Abort vs retry callback\n\n- abort is synchronous from TTSR handler perspective (`agent.abort()` called immediately)\n- retry is deferred by timer (`50ms`)\n- extension notification is asynchronous and intentionally not awaited before abort/retry scheduling\n\n### Multiple matches in same stream window\n\n`checkDelta()` returns all currently matching eligible rules for that scoped buffer. Pending injections are deduplicated by rule name before injection.\n\n### Between abort and continue\n\nDuring the timer window, state can change (user interruption, mode actions, additional events). The retry call is best-effort: `agent.continue()` is awaited in a try/catch; on failure the error is swallowed and the TTSR resume gate is resolved.\n\n## 9. Edge cases summary\n\n- Invalid `condition` regex: skipped with warning; other conditions/rules continue.\n- Duplicate rule names at capability layer: lower-priority duplicates are shadowed before registration.\n- Duplicate names at manager layer: second registration is ignored.\n- `ttsr.disabledRules`: listed names are dropped before TTSR registration and are not surfaced through always-apply/rulebook buckets.\n- `ttsr.builtinRules: false`: embedded `builtin-defaults` rules are dropped before TTSR registration; user/project rules still load.\n- `globs` on a TTSR rule require the stream match context to include at least one matching file path.\n- `contextMode: \"keep\"`: partial violating output can remain in context before reminder retry.\n- `interruptMode: \"never\"`: prose-source matches queue a deferred hidden injection after a successful assistant message; tool-source matches fold an in-band `<system-reminder>` into the matched tool call's `toolResult` content via the `afterToolCall` hook (no mid-stream abort, no separate follow-up turn).\n- Tool-source non-interrupting buckets are cleared when the parent assistant message ends with `stopReason === \"aborted\"` or `\"error\"`, so rules whose target tool never produced a result remain eligible to re-trigger.\n- Repeat-after-gap depends on turn count increments at `turn_end`; mid-turn chunks do not advance gap counters.\n",
|
|
117
|
-
"tui-core-renderer.md": "# TUI core renderer — the append-only contract\n\nWhat you are dealing with before you touch the rendering engine. This is the\ncompanion to [`tui-runtime-internals.md`](./tui-runtime-internals.md): that doc\nmaps the *flow* (input → component tree → render); this doc explains the\n**render contract, why it is shaped this way, and the invariants you must not\nviolate**. Scope is the core engine only:\n\n- [`packages/tui/src/tui.ts`](../packages/tui/src/tui.ts) — frame pipeline, commit ledger, window math, emitters, cursor placement.\n- [`packages/tui/src/terminal.ts`](../packages/tui/src/terminal.ts) — `ProcessTerminal`, capability probes, private-CSI reassembly.\n- [`packages/tui/src/terminal-capabilities.ts`](../packages/tui/src/terminal-capabilities.ts) — `TERMINAL` profile, sync-output / DECCARA / image detection.\n- [`packages/tui/src/stdin-buffer.ts`](../packages/tui/src/stdin-buffer.ts) — escape-sequence reassembly.\n- [`packages/tui/src/utils.ts`](../packages/tui/src/utils.ts) — width/slice/wrap (the width model).\n- [`packages/tui/src/kitty-graphics.ts`](../packages/tui/src/kitty-graphics.ts) + [`components/image.ts`](../packages/tui/src/components/image.ts) — inline images.\n- [`packages/tui/src/deccara.ts`](../packages/tui/src/deccara.ts) — rectangular-fill optimizer.\n\nApplication-layer renderers (transcript, tool calls, session tree, editor,\nwidgets) are **out of scope** — they live in `packages/coding-agent`. The one\napp-layer file that is load-bearing for this contract is\n[`transcript-container.ts`](../packages/coding-agent/src/modes/components/transcript-container.ts),\nwhich implements the commit-boundary seam described below.\n\n---\n\n## 1. The one thing to understand first\n\n> **The renderer cannot observe the terminal's scroll position** (ConPTY's\n> probe lies; POSIX has no API at all). The previous engine tried to *guess*\n> when it was safe to rewrite native scrollback, and every policy choice over\n> that unobservable variable traded one failure family for another (yank ↔\n> flash ↔ corruption ↔ invisible-until-resize — see the git history of this\n> file for the full war journal). The current engine removes the guess\n> entirely: **native scrollback is append-only.**\n\nWe keep the transcript on the **normal screen** (native scrollback, native\nselection, transcript persists after exit). The engine maintains one ledger:\n\n- **`committedRows` (C)** — frame rows `[0, C)` have been physically scrolled\n into terminal history. They are **immutable**: the engine never rewrites\n them, and components must never change them.\n- **`windowTopRow` (W)** — the frame row mapped to grid row 0. The visible\n window is frame rows `[W, W + height)`, repainted in place with relative\n cursor moves.\n- **commit boundary** — reported by the component tree per frame\n (`NativeScrollbackLiveRegion`) as two nested ends:\n - **byte-stable end (B)** — `commitSafeEnd ?? liveRegionStart ?? frame.length`.\n Rows below B are asserted never to re-layout and stay under the\n committed-prefix audit.\n - **durable end (D)** — `max(B, snapshotSafeEnd ?? B)`. Rows in `[B, D)` may\n still drift bytes later (a streaming markdown table re-aligning columns) but\n are *durable* — their current snapshot is permanent content, so dropping them\n when they scroll off is forbidden. They commit **audit-exempt**: later drift\n becomes a frozen stale row in history, never a re-anchor.\n\nPer ordinary frame: `W = max(C, L − height)`, `C' = max(C, min(D, W))`, and the\nonly bytes that ever touch history are the **chunk** `frame[C, C')` written at\nthe scrollback seam. The engine also tracks **`auditRows` (A ≤ C)** — the\nbyte-stable leading prefix `[0, A)`; the committed-prefix audit (§2) samples only\nthat prefix, so the durable suffix `[A, C)` drifting never triggers a re-anchor.\nScrollback therefore equals `frame[0..C)` — every row exactly once, in order,\nwith its content at commit time. There is nothing to guess, nothing to defer,\nand nothing to reconcile: the scroll position is irrelevant because ordinary\nupdates never rewrite anything a scrolled reader could be looking at.\n\n### What this costs (the accepted tradeoffs)\n\n- A block that has scrolled past the window top cannot reflow in place. A\n byte-stable block stays in the live region (below B) until final; a durable\n block (below D) commits its scroll-off snapshot, so a late layout change of an\n already-committed row is a frozen stale row in history (duplication never loss),\n not a dropped row.\n- A component tree that reports **no seam** gets shell semantics: whatever\n scrolls off is final. Shrinking such a frame into its committed prefix\n re-anchors the window and leaves the stale copy in history (§3).\n- Inside multiplexers, a resize leaves the pane history wrapped at the old\n width (same as any shell output).\n\n---\n\n## 2. The frame pipeline (what you are editing)\n\n`#doRender` per frame:\n\n1. Compose the frame (`render(width)`), collecting `liveRegionStart` /\n `commitSafeEnd` from the root children (absolute row indices).\n2. **Audit the committed prefix** (`findCommittedPrefixResync`, skipped on\n geometry frames). Components must never re-layout rows below C, but real\n flows violate it (a TTSR rewind truncating a streamed block, an image-cap\n demotion shrinking a committed image) and the violation must not become\n content loss. The detector samples the prefix *tail* (up to 8 non-blank\n rows in the last 24, SGR-stripped): an in-place edit or restyle disturbs\n only the touched rows (≤1 mismatch ⇒ aligned ⇒ ignored — stale styling in\n history is the accepted artifact), while any insertion/deletion shifts\n every row below it including the tail (⇒ re-anchor C at the first changed\n row and recommit from there: history keeps the stale copy and gains a\n fresh one — **duplication, never loss**).\n3. Classify: **fullPaint** (first paint, `clearScrollback` session replace, or\n geometry change outside a multiplexer — all user gestures) or **update**.\n4. Window math as in §1. Two special rules:\n - **Overlays freeze commits** (`C' = C`): composited rows must never enter\n history; the hidden gap backfills via the chunk after the overlay closes.\n - **Shrink into the committed prefix** (`L ≤ C`): re-anchor\n `W = max(0, L − height)`, reset `C = min(B, W)`, keep the stale history\n above (no gesture, no erase).\n5. Extract the cursor marker (strip-first: markers never reach the terminal,\n the prefix ledger, or the audit), prepare lines (width fitting), slice the\n window, composite overlays **into the window slice only** (screen\n coordinates — an overlay never touches the frame or the ledger).\n6. Emit:\n\n| Emitter | Bytes | When |\n|---|---|---|\n| `#emitFullPaint` | clears + `frame[0, C')` + window rows | gestures only. `clearScrollback` ⇒ `\\x1b[2J\\x1b[H\\x1b[3J`; otherwise ED22 (when supported) + `\\x1b[2J\\x1b[H` |\n| `#emitUpdate` scroll-append | `\\r\\n` + new bottom rows + changed-row range | the rows leaving the screen are exactly the chunk, content untouched since painted |\n| `#emitUpdate` in-window diff | relative move + changed-row range rewrite | nothing scrolls, nothing commits (cursor-only when nothing changed) |\n| `#emitUpdate` seam rewrite | chunk rows + full window rewrite | commit advance, window re-anchor, hidden-gap backfill, mux resize |\n\n**ED3 (`CSI 3 J`) is emitted in exactly one place** — `#emitFullPaint` with\n`clearScrollback: true` — and is reached only by user gestures: session\nreplace/branch/resume (`requestRender(true, { clearScrollback: true })`),\nresize outside a multiplexer, `resetDisplay()` (Ctrl+L). A gesture pins the\nuser to the tail, so the snap is acceptable; multiplexers never get ED3 (it is\na no-op there and a replay would duplicate pane history).\n\nThe ordinary update path never emits ED2/ED3 or an absolute cursor home —\nseveral terminal families snap a scrolled reader to the bottom on those.\n\n### The commit-boundary seam (the load-bearing app contract)\n\n`NativeScrollbackLiveRegion` (tui.ts) is how a component keeps mutable rows out\nof history:\n\n- `getNativeScrollbackLiveRegionStart()` — first row that may still mutate\n (everything below it, including root chrome rendered after it, stays in the\n window).\n- `getNativeScrollbackCommitSafeEnd()` — optional **byte-stable** deeper boundary\n (B): the append-only prefix of the live region (a streaming assistant message's\n settled rows), asserted never to re-layout, so it stays under the audit.\n- `getNativeScrollbackSnapshotSafeEnd()` — optional **durable** deeper boundary\n (D ≥ B): rows whose current snapshot is permanent but may still drift bytes\n (a streaming markdown table whose columns keep re-aligning). They commit on\n scroll-off (never dropped) but **audit-exempt** — drift after commit freezes a\n stale row in history rather than re-anchoring the audit and spraying duplicate\n snapshots. Without it, a commit-stable block that perpetually re-lays-out an\n interior row (a table taller than the window) had no byte-stable prefix past\n the table head, so its scrolled-off rows were committed nowhere and repainted\n nowhere — silent content loss as the reply streamed.\n\n`TranscriptContainer` implements this for the coding agent: finalized blocks\nfreeze (their render is snapshotted, so their content can never drift after\nthe engine may have committed it), still-mutating blocks\n(`isTranscriptBlockFinalized?.() === false`) anchor the live region, and\n`deriveLiveCommitState` derives the byte-stable commit-safe end of the first\nlive block from two independent signals:\n\n- **append-only detection** — a block observed growing without visibly\n rewriting an interior row commits its full body; a rewrite suspends this\n for `VOLATILE_REARM_FRAMES` clean frames.\n- **stable-prefix ratchet** — rows that stayed visibly identical for a full\n `STABLE_PREFIX_COMMIT_FRAMES` window commit even while the block's tail\n keeps rewriting (a task tool's static prompt above a ticking progress\n tree). Without it, one perpetually animating row holds the whole block out\n of history, so a block taller than the window reads as cut off (head\n neither committed nor on screen) for the entire run. The ratchet tracks the\n window-minimum common prefix; a rewrite above the promoted run retreats it\n to the divergence, and rows that already committed are the engine audit's\n problem (recommit → duplication, never loss). That retreat also arms a\n permanent **rewrite floor** at the divergence: a row that mutates *after*\n surviving a full promotion window is a slow ticker (an agent row's tool/cost\n counter updating every few seconds), not settling content — without the\n floor, every quiet stretch re-promoted it and every later tick forced an\n audit recommit, spraying stale snapshots of the block into scrollback for\n the whole run. Rows at/after the floor never re-promote while the block\n lives (the floor index travels with append-shaped insertions above it);\n one-off re-layouts before any promotion never arm it, and the append-only\n path commits the full block regardless.\n\nThe byte-stable end gates audited commits; the **durable snapshot end** is the\nseparate floor that guarantees no loss. `TranscriptContainer` reports the whole\nbody of a still-live **commit-stable** block (`isTranscriptBlockCommitStable?.()\n!== false`) as the snapshot-safe end, so its scrolled-off rows always reach\nhistory even while its interior re-lays-out. Provisional blocks\n(`isTranscriptBlockCommitStable?.() === false`: a collapsing tool/edit preview\nwhose head is a throwaway tail window) report no snapshot-safe end, so their\nhead is correctly dropped rather than stranded as stale history.\n\nFreezing is unconditional — it is the engine's required guarantee, not a\nper-terminal optimization.\n\n---\n\n## 3. Invariants — MUST / NEVER\n\n1. **NEVER add a new `CSI 3 J` (ED3) callsite.** ED3 flows only through\n `#emitFullPaint({ clearScrollback: true })`, only for gestures, never inside\n multiplexers.\n2. **NEVER rewrite a committed row.** No emitter may touch frame rows `< C`,\n and `W ≥ C` always (re-showing a committed row on the grid duplicates it\n for a scrolling reader — the historical corruption family). When a\n *component* violates immutability, the audit (§2) degrades to duplication —\n never silently skip rows, never erase history.\n3. **Commits are exactly the chunk.** Any byte shape that scrolls the screen\n must scroll *only* rows accounted for by `C' − C` — that is what makes\n scrollback provably `frame[0..C)`.\n4. **NEVER probe the viewport position or fork on platform in the update\n path.** win32 behaves like POSIX. The probe APIs are gone; do not\n reintroduce them.\n5. **Mutable content stays below the commit boundary.** App-layer renderers\n must finalize-before-commit; the engine trusts B and clamps, it does not\n verify content.\n6. **Park the hardware cursor at real content bottom**, not the padded window\n bottom, or height shrinks scroll live rows into history and duplicate them\n per resize step.\n7. **Cursor writes live inside the synchronized-output frame**, before ESU —\n never as a second frame after it.\n8. **NEVER throw in the render hot path.** Clamp over-wide lines\n (`truncateToWidth`); a width mismatch is cosmetic, not fatal.\n9. **Multiplexers get no destructive clear and no history rewrap on resize** —\n repaint the window in place; pane history keeps its old wrap.\n10. **Any change to the ledger math, the emitters, or the seam must be\n validated by the stress harness (§6)** across its full scenario matrix,\n not by a single-terminal smoke test.\n\n---\n\n## 4. Terminal capability detection\n\n`TERMINAL` (`terminal-capabilities.ts`) is resolved once at import from\n`TERMINAL_ID` plus environment sniffing; detection helpers are pure over\n`(env, platform)` and unit-testable.\n\n- `shouldEnableSynchronizedOutputByDefault(env, id)` → DEC 2026 default.\n Precedence: user opt-out (`PI_NO_SYNC_OUTPUT`/`PI_TUI_SYNC_OUTPUT=0`) → user\n force-on (`PI_FORCE_SYNC_OUTPUT=1`/`PI_TUI_SYNC_OUTPUT=1`) → `TERM_FEATURES`\n advertises `Sy` → `WT_SESSION` → known direct terminals → off for risky\n multiplexers and unknowns. Reconciled at runtime by the DECRQM mode-2026\n report; a user override still wins.\n- `detectRectangularSgrSupport(id, env)` → DECCARA fills: kitty only, off in\n multiplexers and under `PI_NO_DECCARA`.\n- `supportsScreenToScrollback` → kitty's ED22 (used once, on the initial\n paint, to preserve the pre-existing shell screen).\n\nThe old ED3-risk classifier (`eagerEraseScrollbackRisk`, `PI_TUI_ED3_SAFE`,\n`submitPinsViewportToTail`) is gone: behavior no longer depends on which\nterminal is rendering, so there is no risk class to detect. Env sniffing now\nonly selects *optimizations* (sync output, DECCARA, images), where a miss is\ncosmetic, not corrupting.\n\n---\n\n## 5. Width model\n\n`visibleWidth` / `truncateToWidth` / `sliceByColumn` / `wrapTextWithAnsi`\n(`utils.ts`) all route through **one native UAX#11 engine**\n(`@oh-my-pi/pi-natives`, Rust `unicode-width`). `Bun.stringWidth` was dropped\ndeliberately — mixing two width models in measure-vs-slice produced crashes.\n\n- Fast path: printable ASCII is one cell per code unit.\n- ZWJ pictographic emoji take the `visibleWidthByGrapheme` override.\n- OSC 66 sized text takes the native path.\n\n**Rule:** any new measuring code routes through these helpers, and the hot\npath clamps instead of throwing. Known residual: combining-heavy scripts\n(Arabic harakat) survive painting verbatim, but ghostty-web's cell readback can\nmigrate non-spacing marks across cells — the stress harness compares those rows\nwith marks stripped (`sameLinesAllowingMarkDrift`).\n\n---\n\n## 6. The fidelity gate (use it)\n\n`packages/tui/test/render-stress-harness.ts` drives the renderer's **real\nemitted ANSI** into a ghostty-web `VirtualTerminal` across randomized op\nsequences and parameterized terminal shapes, and validates the contract with a\n**shadow commit ledger**: an independent reimplementation of §1's math, fed\nonly by observed frames (a `render` wrap) and observed bytes (a `write` wrap).\nPer op it asserts:\n\n- the whole tape (scrollback + grid) equals `shadowTape + window slice`, row\n for row, including across resizes;\n- scrolled readers stay pinned and visible history rows are never rewritten;\n- multiplexer pane history grows by exactly the committed chunk;\n- sync-output/autowrap bracket discipline, cursor parking, background columns,\n duplicate accounting.\n\nRun it — plus `render-regressions.test.ts`,\n`streaming-scrollback-defer.test.ts`, and the `issue-*-repro.test.ts` files —\nbefore changing ledger math, emitters, or the seam. A change that passes one\nterminal and one seed is not verified.\n\n---\n\n## 7. Capability probes & stdin reassembly\n\n`ProcessTerminal` fuses capability queries with a bare DA1 (`CSI c`) sentinel so\na non-answering terminal is detected when DA1 returns first. Replies can arrive\n**split across a stdin flush**, so:\n\n- `#privateCsiResponseBuffer` accumulates `\\x1b[?…` partials while a sentinel is\n outstanding, rejoins on the terminator byte, then runs the handlers on the\n **complete** reply. A new `\\x1b` mid-reassembly or >256 bytes abandons the\n partial so real keys still reach input.\n- `#da1SentinelOwners` is a **typed FIFO** discriminated by `kind` so a\n keyboard DA1 cannot be mistaken for an OSC 11 / DECRQM / graphics-probe\n sentinel.\n- DECRQM probes (2026/2048/2031) drive runtime feature gating.\n\n**Rule:** any new probe must own a typed sentinel and survive a split reply\n(feed the reply byte-by-byte in a test and assert nothing leaks to input).\n\n---\n\n## 8. Inline images & memory\n\nKitty images are **transmit-once, place-many** (`kitty-graphics.ts`).\n`ImageBudget` keeps only the most-recent N images live; when the cap is\nexceeded the demoted image's pixels are deleted by id (`a=d,d=I`) and its\nvisible rows re-render as the text fallback through the ordinary window diff —\n**no destructive replay**. A demoted placement already committed to history\nsimply loses its pixels (committed rows are immutable), and the text fallback\nis **height-preserving** once a graphic has rendered (reserved rows + fallback\nline), so demotion never shrinks the block and never shifts committed content\nbelow it.\n\n**Rule:** never re-emit full base64 per frame. Kitty Unicode placeholders are\ndefault-on only for kitty/ghostty (`PI_NO_KITTY_PLACEHOLDERS` /\n`PI_KITTY_PLACEHOLDERS`).\n\n---\n\n## 9. Escape hatches (env vars)\n\n| Var | Effect |\n|---|---|\n| `PI_NO_SYNC_OUTPUT=1` | Disable DEC 2026 BSU/ESU wrappers (autowrap discipline stays on). |\n| `PI_TUI_SYNC_OUTPUT=0\\|1` / `PI_FORCE_SYNC_OUTPUT=1` | Force sync output off / on. |\n| `PI_NO_DECCARA` | Disable Kitty DECCARA rectangular-fill optimization. |\n| `PI_FORCE_IMAGE_PROTOCOL=kitty\\|iterm2\\|sixel\\|off` | Override image protocol detection. |\n| `PI_NO_KITTY_PLACEHOLDERS=1` / `PI_KITTY_PLACEHOLDERS=1` | Force Kitty Unicode placeholders off / on. |\n| `PI_HARDWARE_CURSOR=1` | Show the real hardware cursor instead of a rendered one. |\n| `PI_NOTIFICATIONS=off\\|0\\|false` | Suppress terminal notifications. |\n| `PI_DEBUG_REDRAW=1` | Log the chosen render intent + ledger state per frame to the debug log. |\n\nRemoved with the old engine: `PI_TUI_ED3_SAFE` (no ED3-risk lever exists),\n`PI_CLEAR_ON_SHRINK` (shrinks always clear exactly), `PI_TUI_DEBUG` (per-render\ndump superseded by `PI_DEBUG_REDRAW` ledger logging and the stress harness\nreplay/reduce tooling).\n\n---\n\n## 10. Before you touch the render core — checklist\n\n- [ ] Are you about to emit `CSI 3 J` anywhere other than the gesture-driven\n `clearScrollback` full paint? **Stop.**\n- [ ] Could any code path rewrite, or re-show on the grid, a frame row below\n `committedRows`? **Stop.**\n- [ ] Does your byte shape scroll rows that are not the commit chunk? That\n breaks `scrollback == frame[0..C)`.\n- [ ] Are you adding a viewport probe, a platform fork, or a terminal-brand\n branch to the update path? The contract exists so none are needed.\n- [ ] New mutable UI above the editor? It must report (or live inside) the\n live-region seam, or it will freeze at first commit.\n- [ ] Did you run the stress harness and the repro suite across the full\n scenario matrix — not just one terminal and one seed?\n- [ ] New probe? Typed sentinel owner + split-reply test.\n- [ ] New width path? Routed through the shared native engine, clamped (never\n thrown) in the hot path.\n",
|
|
118
|
-
"tui-runtime-internals.md": "# TUI runtime internals\n\nThis document maps the non-theme runtime path from terminal input to rendered output in interactive mode. It focuses on behavior in `packages/tui` and its integration from `packages/coding-agent` controllers.\n\n> **Editing the rendering engine itself?** Read\n> [`tui-core-renderer.md`](./tui-core-renderer.md) first — it documents the\n> failure modes (yank / corruption / flash / width crashes) and the invariants\n> the render planner, native-scrollback bookkeeping, and capability detection\n> must not violate.\n\n## Runtime layers and ownership\n\n- **`packages/tui` engine**: terminal lifecycle, stdin normalization, focus routing, render scheduling, differential painting, overlay composition, hardware cursor placement.\n- **`packages/coding-agent` interactive mode**: builds component tree, binds editor callbacks and keymaps, reacts to agent/session events, and translates domain state (streaming, tool execution, retries, plan mode) into UI components.\n\nBoundary rule: the TUI engine is message-agnostic. It only knows `Component.render(width)`, `handleInput(data)`, focus, and overlays. Agent semantics stay in interactive controllers.\n\n## Implementation files\n\n- [`packages/coding-agent/src/modes/interactive-mode.ts`](../packages/coding-agent/src/modes/interactive-mode.ts)\n- [`packages/coding-agent/src/modes/controllers/event-controller.ts`](../packages/coding-agent/src/modes/controllers/event-controller.ts)\n- [`packages/coding-agent/src/modes/controllers/input-controller.ts`](../packages/coding-agent/src/modes/controllers/input-controller.ts)\n- [`packages/coding-agent/src/modes/components/custom-editor.ts`](../packages/coding-agent/src/modes/components/custom-editor.ts)\n- [`packages/tui/src/tui.ts`](../packages/tui/src/tui.ts)\n- [`packages/tui/src/terminal.ts`](../packages/tui/src/terminal.ts)\n- [`packages/tui/src/editor-component.ts`](../packages/tui/src/editor-component.ts)\n- [`packages/tui/src/stdin-buffer.ts`](../packages/tui/src/stdin-buffer.ts)\n- [`packages/tui/src/components/loader.ts`](../packages/tui/src/components/loader.ts)\n\n## Boot and component tree assembly\n\n`InteractiveMode` constructs `TUI(new ProcessTerminal(), settings.get(\"showHardwareCursor\"))`, applies `tui.maxInlineImages` and Kitty text-sizing settings, then creates persistent containers:\n\n- `chatContainer`\n- `pendingMessagesContainer`\n- `statusContainer`\n- `todoContainer`\n- `subagentContainer`\n- `btwContainer`\n- `omfgContainer`\n- `errorBannerContainer`\n- `statusLine`\n- `hookWidgetContainerAbove`\n- `editorContainer` (holds `CustomEditor`)\n- `hookWidgetContainerBelow`\n\n`init()` wires the tree in that order after any startup warnings/welcome/changelog, focuses the editor, registers input handlers via `InputController`, starts TUI, pushes terminal title state, updates the editor border, and requests a forced render.\nA forced render (`requestRender(true)`) queues a viewport repaint or explicit session replacement; it does **not** throw away previous-line history by default.\n\n## Terminal lifecycle and stdin normalization\n\n`ProcessTerminal.start()`:\n\n1. Enables raw mode and bracketed paste.\n2. Attaches resize handler and refreshes dimensions.\n3. Enables Windows VT input mode when running on win32.\n4. Creates a `StdinBuffer` to split partial escape chunks into complete sequences.\n5. Queries Kitty keyboard protocol support (`CSI ? u`), then enables protocol flags if supported; otherwise enables modifyOtherKeys fallback after a short timeout.\n6. Queries OSC 11 background color and Mode 2031 appearance notifications for dark/light theme detection.\n7. Queries OSC 99 notification capabilities.\n8. Starts periodic OSC 11 polling only where safe, then probes DEC private modes 2026/2048/2031 via DECRQM.\n\n`StdinBuffer` behavior:\n\n- Buffers fragmented escape sequences (CSI/OSC/DCS/APC/SS3).\n- Emits `data` only when a sequence is complete or timeout-flushed.\n- Detects bracketed paste and emits a `paste` event with raw pasted text.\n\nThis prevents partial escape chunks from being misinterpreted as normal keypresses.\n\n## Input routing and focus model\n\nInput path:\n\n`stdin -> ProcessTerminal -> StdinBuffer -> TUI.#handleInput -> focusedComponent.handleInput`\n\nRouting details:\n\n1. TUI runs registered input listeners first (`addInputListener`), allowing consume/transform behavior.\n2. TUI handles global debug shortcut (`shift+ctrl+d`) before component dispatch.\n3. If focused component belongs to an overlay that is now hidden/invisible, TUI reassigns focus to next visible overlay or saved pre-overlay focus.\n4. Key release events are filtered unless focused component sets `wantsKeyRelease = true`.\n5. After dispatch, TUI schedules render.\n\n`setFocus()` also toggles `Focusable.focused`, which controls whether components emit `CURSOR_MARKER` for hardware cursor placement.\n\n## Key handling split: editor vs controller\n\n`CustomEditor` intercepts high-priority combos first (escape, ctrl-c/d/z, ctrl-v, ctrl-p variants, ctrl-t, alt-up, extension custom keys) and delegates the rest to base `Editor` behavior (text editing, history, autocomplete, cursor movement).\n\n`InputController.setupKeyHandlers()` then binds editor callbacks to mode actions:\n\n- cancellation / mode exits on `Escape`\n- shutdown on double `Ctrl+C` or empty-editor `Ctrl+D`\n- suspend/resume on `Ctrl+Z`\n- slash-command and selector hotkeys\n- follow-up/dequeue toggles and expansion toggles\n\nThis keeps key parsing/editor mechanics in `packages/tui` and mode semantics in coding-agent controllers.\n\n## Render loop and the append-only contract\n\n`TUI.requestRender()` coalesces render requests and rate-limits ordinary frames:\n\n- forced renders (`requestRender(true, ...)`) schedule an immediate frame and force a full window rewrite; with `clearScrollback`, they trigger a destructive full paint (ED3 outside multiplexers)\n- ordinary renders schedule through `#scheduleRender()` and respect `TUI.#MIN_RENDER_INTERVAL_MS`\n- repeated requests while a render is pending collapse into the same scheduled frame\n- `requestComponentRender(component)` requests on behalf of a single self-contained change (spinner frame, blink): when every request in the coalesced frame is component-scoped and the frame is quiet (no resize, overlays, inline images, forced repaint, or root-list change), compose re-renders only the root subtrees containing the requesting components and reuses every other root child's previous rows and seam report; any unsafe condition or concurrent full request downgrades to a full compose\n\n`#doRender()` pipeline:\n\n1. Render root component tree, collecting the commit-boundary seam (`NativeScrollbackLiveRegion`) from the children.\n2. Advance the append-only ledger: `windowTop = max(committedRows, frame.length - height)`, commit chunk = settled rows crossing the window top (never past the seam).\n3. Extract and strip `CURSOR_MARKER`, normalize lines, slice the visible window, composite overlays into the window slice (screen coordinates; overlays freeze commits).\n4. Emit one of: gesture-driven full paint (initial / session replace / resize), scroll-append (chunk rows only), in-window row diff, or seam rewrite (chunk + full window).\n\nNative scrollback always equals the committed frame prefix — rows enter history exactly once, in order, when the seam says they are final. There are no viewport probes and no deferred reconciliation; see [`tui-core-renderer.md`](./tui-core-renderer.md).\n\nRender writes use synchronized output mode (`CSI ? 2026 h/l`) when enabled; capability detection, DECRQM, or `PI_NO_SYNC_OUTPUT` can disable the wrappers while leaving autowrap discipline on.\n\n## Render safety constraints\n\nCritical safety checks in `TUI`:\n\n- Non-image rendered lines are expected to fit terminal width; the differential path truncates overwide lines as a last-resort guard and can write debug diagnostics when redraw debugging is enabled.\n- Overlay compositing includes defensive truncation and post-composite width guarding.\n- Width changes force repaint/rebuild planning because wrapping semantics change.\n- Cursor position is clamped before movement.\n\nThese constraints are runtime guards plus component conventions; renderers should still return width-safe lines rather than rely on truncation.\n\nThe deeper reasons these guards exist — why the renderer cannot observe scroll\nposition, why ED3 (`CSI 3 J`) is confined to one path, and why the hot path\nclamps instead of throwing — are documented in\n[`tui-core-renderer.md`](./tui-core-renderer.md).\n\n## Resize handling\n\nResize events are event-driven from `ProcessTerminal` to `TUI.requestRender()`.\n\nEffects:\n\n- A resize is an explicit user gesture: outside multiplexers the engine erases and replays (`ED3` + full paint) so history rewraps at the new geometry; the commit ledger restarts from the replayed frame.\n- Inside terminal multiplexers, resize repaints the visible window in place after a settle debounce (issue #2088); pane history keeps its old wrap, like any shell output, because pane scrollback cannot be erased safely.\n- Overlay visibility can depend on terminal dimensions (`OverlayOptions.visible`); focus is corrected when overlays become non-visible after resize.\n\n## Streaming and incremental UI updates\n\n`EventController` subscribes to `AgentSessionEvent` and updates UI incrementally:\n\n- `agent_start`: starts loader in `statusContainer`.\n- `message_start` assistant: creates `streamingComponent` and mounts it.\n- `message_update`: updates streaming assistant content; creates/updates tool execution components as tool calls appear.\n- `tool_execution_update/end`: updates tool result components and completion state.\n- `message_end`: finalizes assistant stream, handles aborted/error annotations, marks pending tool args complete on normal stop.\n- `agent_end`: stops loaders, clears transient stream state, flushes deferred model switch, issues completion notification if backgrounded.\n\nRead-tool grouping is intentionally stateful (`#lastReadGroup`) to coalesce consecutive read tool calls into one visual block until a non-read break occurs.\n\n## Status and loader orchestration\n\nStatus lane ownership:\n\n- `statusContainer` holds transient loaders (`loadingAnimation`, `autoCompactionLoader`, `retryLoader`).\n- `statusLine` renders persistent status/hooks/plan indicators and drives editor top border updates.\n\nLoader behavior:\n\n- `Loader` advances its spinner every 80ms (animated message colorizers redraw at ~30fps) and requests a component-scoped render each frame (`requestComponentRender`), so idle spinner ticks repaint without re-walking the transcript.\n- Escape handlers are temporarily overridden during auto-compaction and auto-retry to cancel those operations.\n- On end/cancel paths, controllers restore prior escape handlers and stop/clear loader components.\n\n## Mode transitions and backgrounding\n\n### Bash/Python input modes\n\nInput text prefixes toggle editor border mode flags:\n\n- `!` -> bash mode\n- `$` (non-template literal prefix) -> python mode\n\nEscape exits inactive mode by clearing editor text and restoring border color; when execution is active, escape aborts the running task instead.\n\n### Plan mode\n\n`InteractiveMode` tracks plan mode flags, status-line state, active tools, and model switching. Enter/exit updates session mode entries and status/UI state, including deferred model switch if streaming is active.\n\n### Suspend/resume (`Ctrl+Z`)\n\n`InputController.handleCtrlZ()`:\n\n1. Registers one-shot `SIGCONT` handler to restart TUI and force render.\n2. Stops TUI before suspend.\n3. Sends `SIGTSTP` to process group.\n\n## Cancellation paths\n\nPrimary cancellation inputs:\n\n- `Escape` during active stream loader: restores queued messages to editor and aborts agent.\n- `Escape` during bash/python execution: aborts running command.\n- `Escape` during auto-compaction/retry: invokes dedicated abort methods through temporary escape handlers.\n- `Ctrl+C` single press: clear editor; double press within 500ms: shutdown.\n\nCancellation is state-conditional; same key can mean abort, mode-exit, selector trigger, or no-op depending on runtime state.\n\n## Event-driven vs throttled behavior\n\nEvent-driven updates:\n\n- Agent session events (`EventController`)\n- Key input callbacks (`InputController`)\n- terminal resize callback\n- terminal appearance callbacks, SIGWINCH theme reevaluation, and git branch watchers in `InteractiveMode`\n\nThrottled/debounced paths:\n\n- TUI rendering is tick-debounced (`requestRender` coalescing).\n- Loader animation is interval-driven (80ms spinner advance; ~30fps when the message colorizer is animated), each frame requesting a component-scoped render.\n- Editor autocomplete updates (inside `Editor`) use debounce timers, reducing recompute churn during typing.\n\nThe runtime therefore mixes event-driven state transitions with bounded render cadence to keep interactivity responsive without repaint storms.\n",
|
|
117
|
+
"tui-core-renderer.md": "# TUI core renderer — the append-only contract\n\nWhat you are dealing with before you touch the rendering engine. This is the\ncompanion to [`tui-runtime-internals.md`](./tui-runtime-internals.md): that doc\nmaps the *flow* (input → component tree → render); this doc explains the\n**render contract, why it is shaped this way, and the invariants you must not\nviolate**. Scope is the core engine only:\n\n- [`packages/tui/src/tui.ts`](../packages/tui/src/tui.ts) — frame pipeline, commit ledger, window math, emitters, cursor placement.\n- [`packages/tui/src/terminal.ts`](../packages/tui/src/terminal.ts) — `ProcessTerminal`, capability probes, private-CSI reassembly.\n- [`packages/tui/src/terminal-capabilities.ts`](../packages/tui/src/terminal-capabilities.ts) — `TERMINAL` profile, sync-output / DECCARA / image detection.\n- [`packages/tui/src/stdin-buffer.ts`](../packages/tui/src/stdin-buffer.ts) — escape-sequence reassembly.\n- [`packages/tui/src/utils.ts`](../packages/tui/src/utils.ts) — width/slice/wrap (the width model).\n- [`packages/tui/src/kitty-graphics.ts`](../packages/tui/src/kitty-graphics.ts) + [`components/image.ts`](../packages/tui/src/components/image.ts) — inline images.\n- [`packages/tui/src/deccara.ts`](../packages/tui/src/deccara.ts) — rectangular-fill optimizer.\n\nApplication-layer renderers (transcript, tool calls, session tree, editor,\nwidgets) are **out of scope** — they live in `packages/coding-agent`. The one\napp-layer file that is load-bearing for this contract is\n[`transcript-container.ts`](../packages/coding-agent/src/modes/components/transcript-container.ts),\nwhich implements the commit-boundary seam described below.\n\n---\n\n## 1. The one thing to understand first\n\n> **The renderer cannot observe the terminal's scroll position** (ConPTY's\n> probe lies; POSIX has no API at all). The previous engine tried to *guess*\n> when it was safe to rewrite native scrollback, and every policy choice over\n> that unobservable variable traded one failure family for another (yank ↔\n> flash ↔ corruption ↔ invisible-until-resize — see the git history of this\n> file for the full war journal). The current engine removes the guess\n> entirely: **native scrollback is append-only.**\n\nWe keep the transcript on the **normal screen** (native scrollback, native\nselection, transcript persists after exit). The engine maintains one ledger:\n\n- **`committedRows` (C)** — frame rows `[0, C)` have been physically scrolled\n into terminal history. They are **immutable**: the engine never rewrites\n them, and components must never change them.\n- **`windowTopRow` (W)** — the frame row mapped to grid row 0. The visible\n window is frame rows `[W, W + height)`, repainted in place with relative\n cursor moves.\n- **commit boundary** — reported by the component tree per frame\n (`NativeScrollbackLiveRegion`) as two nested ends:\n - **byte-stable end (B)** — `commitSafeEnd ?? liveRegionStart ?? frame.length`.\n Rows below B are asserted never to re-layout and stay under the\n committed-prefix audit.\n - **durable end (D)** — `max(B, snapshotSafeEnd ?? B)`. Rows in `[B, D)` may\n still drift bytes later (a streaming markdown table re-aligning columns) but\n are *durable* — their current snapshot is permanent content, so dropping them\n when they scroll off is forbidden. They commit **audit-exempt**: later drift\n becomes a frozen stale row in history, never a re-anchor.\n\nPer ordinary frame: `W = max(C, L − height)`, `C' = max(C, min(D, W))`, and the\nonly bytes that ever touch history are the **chunk** `frame[C, C')` written at\nthe scrollback seam. The engine also tracks **`auditRows` (A ≤ C)** — the\nbyte-stable leading prefix `[0, A)`; the committed-prefix audit (§2) samples only\nthat prefix, so the durable suffix `[A, C)` drifting never triggers a re-anchor.\nScrollback therefore equals `frame[0..C)` — every row exactly once, in order,\nwith its content at commit time. There is nothing to guess, nothing to defer,\nand nothing to reconcile: the scroll position is irrelevant because ordinary\nupdates never rewrite anything a scrolled reader could be looking at.\n\n### What this costs (the accepted tradeoffs)\n\n- A block that has scrolled past the window top cannot reflow in place. A\n byte-stable block stays in the live region (below B) until final; a durable\n block (below D) commits its scroll-off snapshot, so a late layout change of an\n already-committed row is a frozen stale row in history (duplication never loss),\n not a dropped row.\n- A component tree that reports **no seam** gets shell semantics: whatever\n scrolls off is final. Shrinking such a frame into its committed prefix\n re-anchors the window and leaves the stale copy in history (§3).\n- Inside multiplexers, a resize leaves the pane history wrapped at the old\n width (same as any shell output).\n\n---\n\n## 2. The frame pipeline (what you are editing)\n\n`#doRender` per frame:\n\n1. Compose the frame (`render(width)`), collecting `liveRegionStart` /\n `commitSafeEnd` from the root children (absolute row indices).\n2. **Audit the committed prefix** (`findCommittedPrefixResync`, skipped on\n geometry frames). Components must never re-layout rows below C, but real\n flows violate it (a TTSR rewind truncating a streamed block, an image-cap\n demotion shrinking a committed image) and the violation must not become\n content loss. The detector samples the prefix *tail* (up to 8 non-blank\n rows in the last 24, SGR-stripped): an in-place edit or restyle disturbs\n only the touched rows (≤1 mismatch ⇒ aligned ⇒ ignored — stale styling in\n history is the accepted artifact), while any insertion/deletion shifts\n every row below it including the tail (⇒ re-anchor C at the first changed\n row and recommit from there: history keeps the stale copy and gains a\n fresh one — **duplication, never loss**).\n3. Classify: **fullPaint** (first paint, `clearScrollback` session replace, or\n geometry change outside a multiplexer — all user gestures) or **update**.\n4. Window math as in §1. Two special rules:\n - **Overlays freeze commits** (`C' = C`): composited rows must never enter\n history; the hidden gap backfills via the chunk after the overlay closes.\n - **Shrink into the committed prefix** (`L ≤ C`): re-anchor\n `W = max(0, L − height)`, reset `C = min(B, W)`, keep the stale history\n above (no gesture, no erase).\n5. Extract the cursor marker (strip-first: markers never reach the terminal,\n the prefix ledger, or the audit), prepare lines (width fitting), slice the\n window, composite overlays **into the window slice only** (screen\n coordinates — an overlay never touches the frame or the ledger).\n6. Emit:\n\n| Emitter | Bytes | When |\n|---|---|---|\n| `#emitFullPaint` | clears + `frame[0, C')` + window rows | gestures only. `clearScrollback` ⇒ `\\x1b[2J\\x1b[H\\x1b[3J`; otherwise ED22 (when supported) + `\\x1b[2J\\x1b[H` |\n| `#emitUpdate` scroll-append | `\\r\\n` + new bottom rows + changed-row range | the rows leaving the screen are exactly the chunk, content untouched since painted |\n| `#emitUpdate` in-window diff | relative move + changed-row range rewrite | nothing scrolls, nothing commits (cursor-only when nothing changed) |\n| `#emitUpdate` seam rewrite | chunk rows + full window rewrite | commit advance, window re-anchor, hidden-gap backfill, mux resize |\n\n**ED3 (`CSI 3 J`) is emitted in exactly one place** — `#emitFullPaint` with\n`clearScrollback: true` — and is reached only by user gestures: session\nreplace/branch/resume (`requestRender(true, { clearScrollback: true })`),\nresize outside a multiplexer, `resetDisplay()` (Ctrl+L). A gesture pins the\nuser to the tail, so the snap is acceptable; multiplexers never get ED3 (it is\na no-op there and a replay would duplicate pane history).\n\nThe ordinary update path never emits ED2/ED3 or an absolute cursor home —\nseveral terminal families snap a scrolled reader to the bottom on those.\n\n### The commit-boundary seam (the load-bearing app contract)\n\n`NativeScrollbackLiveRegion` (tui.ts) is how a component keeps mutable rows out\nof history:\n\n- `getNativeScrollbackLiveRegionStart()` — first row that may still mutate\n (everything below it, including root chrome rendered after it, stays in the\n window).\n- `getNativeScrollbackCommitSafeEnd()` — optional **byte-stable** deeper boundary\n (B): the append-only prefix of the live region (a streaming assistant message's\n settled rows), asserted never to re-layout, so it stays under the audit.\n- `getNativeScrollbackSnapshotSafeEnd()` — optional **durable** deeper boundary\n (D ≥ B): rows whose current snapshot is permanent but may still drift bytes\n (a streaming markdown table whose columns keep re-aligning). They commit on\n scroll-off (never dropped) but **audit-exempt** — drift after commit freezes a\n stale row in history rather than re-anchoring the audit and spraying duplicate\n snapshots. Without it, a commit-stable block that perpetually re-lays-out an\n interior row (a table taller than the window) had no byte-stable prefix past\n the table head, so its scrolled-off rows were committed nowhere and repainted\n nowhere — silent content loss as the reply streamed.\n\n`TranscriptContainer` implements this for the coding agent: finalized blocks\nfreeze (their render is snapshotted, so their content can never drift after\nthe engine may have committed it), still-mutating blocks\n(`isTranscriptBlockFinalized?.() === false`) anchor the live region, and\n`deriveLiveCommitState` derives the byte-stable commit-safe end of the first\nlive block from two independent signals:\n\n- **append-only detection** — a block observed growing without visibly\n rewriting an interior row commits its full body; a rewrite suspends this\n for `VOLATILE_REARM_FRAMES` clean frames.\n- **stable-prefix ratchet** — rows that stayed visibly identical for a full\n `STABLE_PREFIX_COMMIT_FRAMES` window commit even while the block's tail\n keeps rewriting (a task tool's static prompt above a ticking progress\n tree). Without it, one perpetually animating row holds the whole block out\n of history, so a block taller than the window reads as cut off (head\n neither committed nor on screen) for the entire run. The ratchet tracks the\n window-minimum common prefix; a rewrite above the promoted run retreats it\n to the divergence, and rows that already committed are the engine audit's\n problem (recommit → duplication, never loss). That retreat also arms a\n permanent **rewrite floor** at the divergence: a row that mutates *after*\n surviving a full promotion window is a slow ticker (an agent row's tool/cost\n counter updating every few seconds), not settling content — without the\n floor, every quiet stretch re-promoted it and every later tick forced an\n audit recommit, spraying stale snapshots of the block into scrollback for\n the whole run. Rows at/after the floor never re-promote while the block\n lives (the floor index travels with append-shaped insertions above it);\n one-off re-layouts before any promotion never arm it, and the append-only\n path commits the full block regardless.\n\nThe byte-stable end gates audited commits; the **durable snapshot end** is the\nseparate floor that guarantees no loss. `TranscriptContainer` reports the whole\nbody of a still-live **commit-stable** block (`isTranscriptBlockCommitStable?.()\n!== false`) as the snapshot-safe end, so its scrolled-off rows always reach\nhistory even while its interior re-lays-out. Provisional blocks\n(`isTranscriptBlockCommitStable?.() === false`: a collapsing tool/edit preview\nwhose head is a throwaway tail window) report no snapshot-safe end, so their\nhead is correctly dropped rather than stranded as stale history.\n\nFreezing is unconditional — it is the engine's required guarantee, not a\nper-terminal optimization.\n\n---\n\n## 3. Invariants — MUST / NEVER\n\n1. **NEVER add a new `CSI 3 J` (ED3) callsite.** ED3 flows only through\n `#emitFullPaint({ clearScrollback: true })`, only for gestures, never inside\n multiplexers.\n2. **NEVER rewrite a committed row.** No emitter may touch frame rows `< C`,\n and `W ≥ C` always (re-showing a committed row on the grid duplicates it\n for a scrolling reader — the historical corruption family). When a\n *component* violates immutability, the audit (§2) degrades to duplication —\n never silently skip rows, never erase history.\n3. **Commits are exactly the chunk.** Any byte shape that scrolls the screen\n must scroll *only* rows accounted for by `C' − C` — that is what makes\n scrollback provably `frame[0..C)`.\n4. **NEVER probe the viewport position or fork on platform in the update\n path.** win32 behaves like POSIX. The probe APIs are gone; do not\n reintroduce them.\n5. **Mutable content stays below the commit boundary.** App-layer renderers\n must finalize-before-commit; the engine trusts B and clamps, it does not\n verify content.\n6. **Park the hardware cursor at real content bottom**, not the padded window\n bottom, or height shrinks scroll live rows into history and duplicate them\n per resize step.\n7. **Cursor writes live inside the synchronized-output frame**, before ESU —\n never as a second frame after it.\n8. **NEVER throw in the render hot path.** Clamp over-wide lines\n (`truncateToWidth`); a width mismatch is cosmetic, not fatal.\n9. **Multiplexers get no destructive clear and no history rewrap on resize** —\n repaint the window in place; pane history keeps its old wrap.\n10. **Any change to the ledger math, the emitters, or the seam must be\n validated by the stress harness (§6)** across its full scenario matrix,\n not by a single-terminal smoke test.\n\n---\n\n## 4. Terminal capability detection\n\n`TERMINAL` (`terminal-capabilities.ts`) is resolved once at import from\n`TERMINAL_ID` plus environment sniffing; detection helpers are pure over\n`(env, platform)` and unit-testable.\n\n- `shouldEnableSynchronizedOutputByDefault(env, id)` → DEC 2026 default.\n Precedence: user opt-out (`PI_NO_SYNC_OUTPUT`/`PI_TUI_SYNC_OUTPUT=0`) → user\n force-on (`PI_FORCE_SYNC_OUTPUT=1`/`PI_TUI_SYNC_OUTPUT=1`) → `TERM_FEATURES`\n advertises `Sy` → `WT_SESSION` → known direct terminals → off for risky\n multiplexers and unknowns. Reconciled at runtime by the DECRQM mode-2026\n report; a user override still wins.\n- `detectRectangularSgrSupport(id, env)` → DECCARA fills: kitty only, off in\n multiplexers and under `PI_NO_DECCARA`.\n- `supportsScreenToScrollback` → kitty's ED22 (used once, on the initial\n paint, to preserve the pre-existing shell screen).\n\nThe old ED3-risk classifier (`eagerEraseScrollbackRisk`, `PI_TUI_ED3_SAFE`,\n`submitPinsViewportToTail`) is gone: behavior no longer depends on which\nterminal is rendering, so there is no risk class to detect. Env sniffing now\nonly selects *optimizations* (sync output, DECCARA, images), where a miss is\ncosmetic, not corrupting.\n\n---\n\n## 5. Width model\n\n`visibleWidth` / `truncateToWidth` / `sliceByColumn` / `wrapTextWithAnsi`\n(`utils.ts`) all route through **one native UAX#11 engine**\n(`@oh-my-pi/pi-natives`, Rust `unicode-width`). `Bun.stringWidth` was dropped\ndeliberately — mixing two width models in measure-vs-slice produced crashes.\n\n- Fast path: printable ASCII is one cell per code unit.\n- ZWJ pictographic emoji take the `visibleWidthByGrapheme` override.\n- OSC 66 sized text takes the native path.\n\n**Rule:** any new measuring code routes through these helpers, and the hot\npath clamps instead of throwing. Known residual: combining-heavy scripts\n(Arabic harakat) survive painting verbatim, but ghostty-web's cell readback can\nmigrate non-spacing marks across cells — the stress harness compares those rows\nwith marks stripped (`sameLinesAllowingMarkDrift`).\n\n---\n\n## 6. The fidelity gate (use it)\n\n`packages/tui/test/render-stress-harness.ts` drives the renderer's **real\nemitted ANSI** into a ghostty-web `VirtualTerminal` across randomized op\nsequences and parameterized terminal shapes, and validates the contract with a\n**shadow commit ledger**: an independent reimplementation of §1's math, fed\nonly by observed frames (a `render` wrap) and observed bytes (a `write` wrap).\nPer op it asserts:\n\n- the whole tape (scrollback + grid) equals `shadowTape + window slice`, row\n for row, including across resizes;\n- scrolled readers stay pinned and visible history rows are never rewritten;\n- multiplexer pane history grows by exactly the committed chunk;\n- sync-output/autowrap bracket discipline, cursor parking, background columns,\n duplicate accounting.\n\nRun it — plus `render-regressions.test.ts`,\n`streaming-scrollback-defer.test.ts`, and the `issue-*-repro.test.ts` files —\nbefore changing ledger math, emitters, or the seam. A change that passes one\nterminal and one seed is not verified.\n\n---\n\n## 7. Capability probes & stdin reassembly\n\n`ProcessTerminal` fuses capability queries with a bare DA1 (`CSI c`) sentinel so\na non-answering terminal is detected when DA1 returns first. Replies can arrive\n**split across a stdin flush**, so:\n\n- `#privateCsiResponseBuffer` accumulates `\\x1b[?…` partials while a sentinel is\n outstanding, rejoins on the terminator byte, then runs the handlers on the\n **complete** reply. A new `\\x1b` mid-reassembly or >256 bytes abandons the\n partial so real keys still reach input.\n- `#da1SentinelOwners` is a **typed FIFO** discriminated by `kind` so a\n keyboard DA1 cannot be mistaken for an OSC 11 / DECRQM / graphics-probe\n sentinel.\n- DECRQM probes (2026/2048/2031) drive runtime feature gating.\n\n**Rule:** any new probe must own a typed sentinel and survive a split reply\n(feed the reply byte-by-byte in a test and assert nothing leaks to input).\n\n---\n\n## 8. Inline images & memory\n\nKitty images are **transmit-once, place-many** (`kitty-graphics.ts`).\n`ImageBudget` keeps only the most-recent N images live; when the cap is\nexceeded the demoted image's pixels are deleted by id (`a=d,d=I`) and its\nvisible rows re-render as the text fallback through the ordinary window diff —\n**no destructive replay**. A demoted placement already committed to history\nsimply loses its pixels (committed rows are immutable), and the text fallback\nis **height-preserving** once a graphic has rendered (reserved rows + fallback\nline), so demotion never shrinks the block and never shifts committed content\nbelow it.\n\n**Rule:** never re-emit full base64 per frame. Kitty Unicode placeholders are\ndefault-on only for kitty/ghostty (`PI_NO_KITTY_PLACEHOLDERS` /\n`PI_KITTY_PLACEHOLDERS`).\n\n---\n\n## 9. Escape hatches (env vars)\n\n| Var | Effect |\n|---|---|\n| `PI_NO_SYNC_OUTPUT=1` | Disable DEC 2026 BSU/ESU wrappers (autowrap discipline stays on). |\n| `PI_TUI_SYNC_OUTPUT=0\\|1` / `PI_FORCE_SYNC_OUTPUT=1` | Force sync output off / on. |\n| `PI_NO_DECCARA` | Disable Kitty DECCARA rectangular-fill optimization. |\n| `PI_FORCE_IMAGE_PROTOCOL=kitty\\|iterm2\\|sixel\\|off` | Override image protocol detection. |\n| `PI_NO_KITTY_PLACEHOLDERS=1` / `PI_KITTY_PLACEHOLDERS=1` | Force Kitty Unicode placeholders off / on. |\n| `PI_HARDWARE_CURSOR=1` | Show the real hardware cursor instead of a rendered one. |\n| `PI_NOTIFICATIONS=off\\|0\\|false` | Suppress terminal notifications. |\n| `PI_DEBUG_REDRAW=1` | Log the chosen render intent + ledger state per frame to the debug log. |\n| `PI_TUI_RESIZE_IN_PLACE=1\\|0` | Force resize to repaint in place (no alt-screen borrow, no ED3 rewrap) on / off. Default-on for terminals that re-report size on alt-screen toggles (Warp). |\n\nRemoved with the old engine: `PI_TUI_ED3_SAFE` (no ED3-risk lever exists),\n`PI_CLEAR_ON_SHRINK` (shrinks always clear exactly), `PI_TUI_DEBUG` (per-render\ndump superseded by `PI_DEBUG_REDRAW` ledger logging and the stress harness\nreplay/reduce tooling).\n\n---\n\n## 10. Before you touch the render core — checklist\n\n- [ ] Are you about to emit `CSI 3 J` anywhere other than the gesture-driven\n `clearScrollback` full paint? **Stop.**\n- [ ] Could any code path rewrite, or re-show on the grid, a frame row below\n `committedRows`? **Stop.**\n- [ ] Does your byte shape scroll rows that are not the commit chunk? That\n breaks `scrollback == frame[0..C)`.\n- [ ] Are you adding a viewport probe, a platform fork, or a terminal-brand\n branch to the update path? The contract exists so none are needed.\n- [ ] New mutable UI above the editor? It must report (or live inside) the\n live-region seam, or it will freeze at first commit.\n- [ ] Did you run the stress harness and the repro suite across the full\n scenario matrix — not just one terminal and one seed?\n- [ ] New probe? Typed sentinel owner + split-reply test.\n- [ ] New width path? Routed through the shared native engine, clamped (never\n thrown) in the hot path.\n",
|
|
118
|
+
"tui-runtime-internals.md": "# TUI runtime internals\n\nThis document maps the non-theme runtime path from terminal input to rendered output in interactive mode. It focuses on behavior in `packages/tui` and its integration from `packages/coding-agent` controllers.\n\n> **Editing the rendering engine itself?** Read\n> [`tui-core-renderer.md`](./tui-core-renderer.md) first — it documents the\n> failure modes (yank / corruption / flash / width crashes) and the invariants\n> the render planner, native-scrollback bookkeeping, and capability detection\n> must not violate.\n\n## Runtime layers and ownership\n\n- **`packages/tui` engine**: terminal lifecycle, stdin normalization, focus routing, render scheduling, differential painting, overlay composition, hardware cursor placement.\n- **`packages/coding-agent` interactive mode**: builds component tree, binds editor callbacks and keymaps, reacts to agent/session events, and translates domain state (streaming, tool execution, retries, plan mode) into UI components.\n\nBoundary rule: the TUI engine is message-agnostic. It only knows `Component.render(width)`, `handleInput(data)`, focus, and overlays. Agent semantics stay in interactive controllers.\n\n## Implementation files\n\n- [`packages/coding-agent/src/modes/interactive-mode.ts`](../packages/coding-agent/src/modes/interactive-mode.ts)\n- [`packages/coding-agent/src/modes/controllers/event-controller.ts`](../packages/coding-agent/src/modes/controllers/event-controller.ts)\n- [`packages/coding-agent/src/modes/controllers/input-controller.ts`](../packages/coding-agent/src/modes/controllers/input-controller.ts)\n- [`packages/coding-agent/src/modes/components/custom-editor.ts`](../packages/coding-agent/src/modes/components/custom-editor.ts)\n- [`packages/tui/src/tui.ts`](../packages/tui/src/tui.ts)\n- [`packages/tui/src/terminal.ts`](../packages/tui/src/terminal.ts)\n- [`packages/tui/src/editor-component.ts`](../packages/tui/src/editor-component.ts)\n- [`packages/tui/src/stdin-buffer.ts`](../packages/tui/src/stdin-buffer.ts)\n- [`packages/tui/src/components/loader.ts`](../packages/tui/src/components/loader.ts)\n\n## Boot and component tree assembly\n\n`InteractiveMode` constructs `TUI(new ProcessTerminal(), settings.get(\"showHardwareCursor\"))`, applies `tui.maxInlineImages` and Kitty text-sizing settings, then creates persistent containers:\n\n- `chatContainer`\n- `pendingMessagesContainer`\n- `statusContainer`\n- `todoContainer`\n- `subagentContainer`\n- `btwContainer`\n- `omfgContainer`\n- `errorBannerContainer`\n- `statusLine`\n- `hookWidgetContainerAbove`\n- `editorContainer` (holds `CustomEditor`)\n- `hookWidgetContainerBelow`\n\n`init()` wires the tree in that order after any startup warnings/welcome/changelog, focuses the editor, registers input handlers via `InputController`, starts TUI, pushes terminal title state, updates the editor border, and requests a forced render.\nA forced render (`requestRender(true)`) queues a viewport repaint or explicit session replacement; it does **not** throw away previous-line history by default.\n\n## Terminal lifecycle and stdin normalization\n\n`ProcessTerminal.start()`:\n\n1. Enables raw mode and bracketed paste.\n2. Attaches resize handler and refreshes dimensions.\n3. Enables Windows VT input mode when running on win32.\n4. Creates a `StdinBuffer` to split partial escape chunks into complete sequences.\n5. Queries Kitty keyboard protocol support (`CSI ? u`), then enables protocol flags if supported; otherwise enables modifyOtherKeys fallback after a short timeout.\n6. Queries OSC 11 background color and Mode 2031 appearance notifications for dark/light theme detection.\n7. Queries OSC 99 notification capabilities.\n8. Starts periodic OSC 11 polling only where safe, then probes DEC private modes 2026/2048/2031 via DECRQM.\n\n`StdinBuffer` behavior:\n\n- Buffers fragmented escape sequences (CSI/OSC/DCS/APC/SS3).\n- Emits `data` only when a sequence is complete or timeout-flushed.\n- Detects bracketed paste and emits a `paste` event with raw pasted text.\n\nThis prevents partial escape chunks from being misinterpreted as normal keypresses.\n\n## Input routing and focus model\n\nInput path:\n\n`stdin -> ProcessTerminal -> StdinBuffer -> TUI.#handleInput -> focusedComponent.handleInput`\n\nRouting details:\n\n1. TUI runs registered input listeners first (`addInputListener`), allowing consume/transform behavior.\n2. TUI handles global debug shortcut (`shift+ctrl+d`) before component dispatch.\n3. If focused component belongs to an overlay that is now hidden/invisible, TUI reassigns focus to next visible overlay or saved pre-overlay focus.\n4. Key release events are filtered unless focused component sets `wantsKeyRelease = true`.\n5. After dispatch, TUI schedules render.\n\n`setFocus()` also toggles `Focusable.focused`, which controls whether components emit `CURSOR_MARKER` for hardware cursor placement.\n\n## Key handling split: editor vs controller\n\n`CustomEditor` intercepts high-priority combos first (escape, ctrl-c/d/z, ctrl-v, ctrl-p variants, ctrl-t, alt-up, extension custom keys) and delegates the rest to base `Editor` behavior (text editing, history, autocomplete, cursor movement).\n\n`InputController.setupKeyHandlers()` then binds editor callbacks to mode actions:\n\n- cancellation / mode exits on `Escape`\n- shutdown on double `Ctrl+C` or empty-editor `Ctrl+D`\n- suspend/resume on `Ctrl+Z`\n- slash-command and selector hotkeys\n- follow-up/dequeue toggles and expansion toggles\n\nThis keeps key parsing/editor mechanics in `packages/tui` and mode semantics in coding-agent controllers.\n\n## Render loop and the append-only contract\n\n`TUI.requestRender()` coalesces render requests and rate-limits ordinary frames:\n\n- forced renders (`requestRender(true, ...)`) schedule an immediate frame and force a full window rewrite; with `clearScrollback`, they trigger a destructive full paint (ED3 outside multiplexers)\n- ordinary renders schedule through `#scheduleRender()` and respect `TUI.#MIN_RENDER_INTERVAL_MS`\n- repeated requests while a render is pending collapse into the same scheduled frame\n- `requestComponentRender(component)` requests on behalf of a single self-contained change (spinner frame, blink): when every request in the coalesced frame is component-scoped and the frame is quiet (no resize, overlays, inline images, forced repaint, or root-list change), compose re-renders only the root subtrees containing the requesting components and reuses every other root child's previous rows and seam report; any unsafe condition or concurrent full request downgrades to a full compose\n\n`#doRender()` pipeline:\n\n1. Render root component tree, collecting the commit-boundary seam (`NativeScrollbackLiveRegion`) from the children.\n2. Advance the append-only ledger: `windowTop = max(committedRows, frame.length - height)`, commit chunk = settled rows crossing the window top (never past the seam).\n3. Extract and strip `CURSOR_MARKER`, normalize lines, slice the visible window, composite overlays into the window slice (screen coordinates; overlays freeze commits).\n4. Emit one of: gesture-driven full paint (initial / session replace / resize), scroll-append (chunk rows only), in-window row diff, or seam rewrite (chunk + full window).\n\nNative scrollback always equals the committed frame prefix — rows enter history exactly once, in order, when the seam says they are final. There are no viewport probes and no deferred reconciliation; see [`tui-core-renderer.md`](./tui-core-renderer.md).\n\nRender writes use synchronized output mode (`CSI ? 2026 h/l`) when enabled; capability detection, DECRQM, or `PI_NO_SYNC_OUTPUT` can disable the wrappers while leaving autowrap discipline on.\n\n## Render safety constraints\n\nCritical safety checks in `TUI`:\n\n- Non-image rendered lines are expected to fit terminal width; the differential path truncates overwide lines as a last-resort guard and can write debug diagnostics when redraw debugging is enabled.\n- Overlay compositing includes defensive truncation and post-composite width guarding.\n- Width changes force repaint/rebuild planning because wrapping semantics change.\n- Cursor position is clamped before movement.\n\nThese constraints are runtime guards plus component conventions; renderers should still return width-safe lines rather than rely on truncation.\n\nThe deeper reasons these guards exist — why the renderer cannot observe scroll\nposition, why ED3 (`CSI 3 J`) is confined to one path, and why the hot path\nclamps instead of throwing — are documented in\n[`tui-core-renderer.md`](./tui-core-renderer.md).\n\n## Resize handling\n\nResize events are event-driven from `ProcessTerminal` to `TUI.requestRender()`.\n\nEffects:\n\n- A resize is an explicit user gesture: outside multiplexers the engine erases and replays (`ED3` + full paint) so history rewraps at the new geometry; the commit ledger restarts from the replayed frame.\n- Inside terminal multiplexers, resize repaints the visible window in place after a settle debounce (issue #2088); pane history keeps its old wrap, like any shell output, because pane scrollback cannot be erased safely.\n- Terminals that re-report their size when the alternate screen buffer is toggled (Warp reports a height one row different for the alt buffer) take the in-place path too. The non-multiplexer fast path borrows the alternate screen for drag frames, so on these terminals each alt enter/leave emits a fresh resize event, which re-enters the fast path — a self-sustaining loop that floods ED3 full repaints with stable geometry. `resizeRepaintsInPlace()` (covering multiplexers and these terminals; overridable via `PI_TUI_RESIZE_IN_PLACE`) routes them through the in-place repaint, which never touches the alt buffer.\n- Overlay visibility can depend on terminal dimensions (`OverlayOptions.visible`); focus is corrected when overlays become non-visible after resize.\n\n## Streaming and incremental UI updates\n\n`EventController` subscribes to `AgentSessionEvent` and updates UI incrementally:\n\n- `agent_start`: starts loader in `statusContainer`.\n- `message_start` assistant: creates `streamingComponent` and mounts it.\n- `message_update`: updates streaming assistant content; creates/updates tool execution components as tool calls appear.\n- `tool_execution_update/end`: updates tool result components and completion state.\n- `message_end`: finalizes assistant stream, handles aborted/error annotations, marks pending tool args complete on normal stop.\n- `agent_end`: stops loaders, clears transient stream state, flushes deferred model switch, issues completion notification if backgrounded.\n\nRead-tool grouping is intentionally stateful (`#lastReadGroup`) to coalesce consecutive read tool calls into one visual block until a non-read break occurs.\n\n## Status and loader orchestration\n\nStatus lane ownership:\n\n- `statusContainer` holds transient loaders (`loadingAnimation`, `autoCompactionLoader`, `retryLoader`).\n- `statusLine` renders persistent status/hooks/plan indicators and drives editor top border updates.\n\nLoader behavior:\n\n- `Loader` advances its spinner every 80ms (animated message colorizers redraw at ~30fps) and requests a component-scoped render each frame (`requestComponentRender`), so idle spinner ticks repaint without re-walking the transcript.\n- Escape handlers are temporarily overridden during auto-compaction and auto-retry to cancel those operations.\n- On end/cancel paths, controllers restore prior escape handlers and stop/clear loader components.\n\n## Mode transitions and backgrounding\n\n### Bash/Python input modes\n\nInput text prefixes toggle editor border mode flags:\n\n- `!` -> bash mode\n- `$` (non-template literal prefix) -> python mode\n\nEscape exits inactive mode by clearing editor text and restoring border color; when execution is active, escape aborts the running task instead.\n\n### Plan mode\n\n`InteractiveMode` tracks plan mode flags, status-line state, active tools, and model switching. Enter/exit updates session mode entries and status/UI state, including deferred model switch if streaming is active.\n\n### Suspend/resume (`Ctrl+Z`)\n\n`InputController.handleCtrlZ()`:\n\n1. Registers one-shot `SIGCONT` handler to restart TUI and force render.\n2. Stops TUI before suspend.\n3. Sends `SIGTSTP` to process group.\n\n## Cancellation paths\n\nPrimary cancellation inputs:\n\n- `Escape` during active stream loader: restores queued messages to editor and aborts agent.\n- `Escape` during bash/python execution: aborts running command.\n- `Escape` during auto-compaction/retry: invokes dedicated abort methods through temporary escape handlers.\n- `Ctrl+C` single press: clear editor; double press within 500ms: shutdown.\n\nCancellation is state-conditional; same key can mean abort, mode-exit, selector trigger, or no-op depending on runtime state.\n\n## Event-driven vs throttled behavior\n\nEvent-driven updates:\n\n- Agent session events (`EventController`)\n- Key input callbacks (`InputController`)\n- terminal resize callback\n- terminal appearance callbacks, SIGWINCH theme reevaluation, and git branch watchers in `InteractiveMode`\n\nThrottled/debounced paths:\n\n- TUI rendering is tick-debounced (`requestRender` coalescing).\n- Loader animation is interval-driven (80ms spinner advance; ~30fps when the message colorizer is animated), each frame requesting a component-scoped render.\n- Editor autocomplete updates (inside `Editor`) use debounce timers, reducing recompute churn during typing.\n\nThe runtime therefore mixes event-driven state transitions with bounded render cadence to keep interactivity responsive without repaint storms.\n",
|
|
119
119
|
"tui.md": "# TUI integration for extensions and custom tools\n\nThis document covers the **current** TUI contract used by `packages/coding-agent` and `packages/tui` for extension UI, custom tool UI, and custom renderers.\n\n## What this subsystem is\n\nThe runtime has two layers:\n\n- **Rendering engine (`packages/tui`)**: differential terminal renderer, input dispatch, focus, overlays, cursor placement.\n- **Integration layer (`packages/coding-agent`)**: mounts extension/custom-tool components, wires keybindings/theme, and restores editor state.\n\n## Runtime behavior by mode\n\n| Mode | `ctx.ui.custom(...)` availability | Notes |\n| ------------------- | --------------------------------- | ------------------------------------------------------------------------------------------------------------------------------ |\n| Interactive TUI | Supported | Component is mounted in the editor area or overlay, focused, and must call `done(result)` to resolve. |\n| Background/headless | Not interactive | UI context is no-op (`hasUI === false`). |\n| RPC mode | Not mounted | `custom()` is implemented as unsupported UI and returns `undefined as never`; do not depend on interactive UI in RPC handlers. |\n\nIf your extension/tool can run in non-interactive mode, guard with `ctx.hasUI` / `pi.hasUI`.\n\n## Core component contract (`@oh-my-pi/pi-tui`)\n\n`packages/tui/src/tui.ts` defines:\n\n```ts\nexport interface Component {\n render(width: number): readonly string[];\n handleInput?(data: string): void;\n wantsKeyRelease?: boolean;\n invalidate?(): void;\n dispose?(): void;\n}\n```\n\nRender results are component-owned and immutable to callers; a component that did not change should return the **same array reference** it returned last time (reference equality is what enables the renderer's memoization and row virtualization), and must return a new array whenever its content changed.\n\n`Focusable` is separate:\n\n```ts\nexport interface Focusable {\n focused: boolean;\n}\n```\n\nCursor behavior uses `CURSOR_MARKER` (not `getCursorPosition`). Focused components emit the marker in rendered text; `TUI` extracts it and positions the hardware cursor.\n\n## Rendering constraints (terminal safety)\n\nYour `render(width)` output must be terminal-safe:\n\n1. **Do not intentionally exceed `width` on any line**. The renderer truncates overwide non-image lines as a last-resort guard, but components should still return width-safe output.\n2. **Measure visual width**, not string length: use `visibleWidth()`.\n3. **Truncate/wrap ANSI-aware text** with `truncateToWidth()` / `wrapTextWithAnsi()`.\n4. **Sanitize tabs/content** from external sources using `replaceTabs()` (and higher-level sanitizers in coding-agent render paths).\n\nMinimal pattern:\n\n```ts\nimport { replaceTabs, truncateToWidth } from \"@oh-my-pi/pi-tui\";\n\nrender(width: number): readonly string[] {\n return this.lines.map(line => truncateToWidth(replaceTabs(line), width));\n}\n```\n\n## Input handling and keybindings\n\n### Raw key matching\n\nUse `matchesKey(data, \"...\")` for navigation keys and combos.\n\n### Match app keybinding actions\n\nExtension UI factories receive a `KeybindingsManager` (interactive mode; an in-memory instance carrying the default bindings, not the user's `keybindings.yml`) so you can match action ids instead of hardcoding keys:\n\n```ts\nif (keybindings.matches(data, \"app.interrupt\")) {\n done(undefined);\n return;\n}\n```\n\n### Key release/repeat events\n\nKey release events are filtered unless your component sets:\n\n```ts\nwantsKeyRelease = true;\n```\n\nThen use `isKeyRelease()` / `isKeyRepeat()` if needed.\n\n## Focus, overlays, and cursor\n\n- `TUI.setFocus(component)` routes input to that component.\n- Overlay APIs exist in `TUI` (`showOverlay`, `OverlayHandle`). In interactive extension/custom UI, `custom(..., { overlay: true })` mounts your component through `TUI.showOverlay(...)`; without `overlay`, it replaces the editor component area directly.\n- Overlay custom UI is anchored at `bottom-center` with full terminal width/max height and is removed through the returned overlay handle when `done(...)` closes the flow.\n\n## Mount points and return contracts\n\n## 1) Extension UI (`ExtensionUIContext`)\n\nCurrent signature (`extensibility/extensions/types.ts`):\n\n```ts\ncustom<T>(\n factory: (\n tui: TUI,\n theme: Theme,\n keybindings: KeybindingsManager,\n done: (result: T) => void,\n ) => (Component & { dispose?(): void }) | Promise<Component & { dispose?(): void }>,\n options?: { overlay?: boolean },\n): Promise<T>\n```\n\nBehavior in interactive mode (`extension-ui-controller.ts`):\n\n- Saves editor text.\n- Without `options.overlay`, replaces the editor component with your component.\n- With `options.overlay`, mounts your component as a bottom-centered overlay instead of replacing the editor.\n- Focuses your component.\n- On `done(result)`: calls `component.dispose?.()`, hides the overlay if present, restores editor + text for non-overlay flows, focuses editor, resolves promise.\n So `done(...)` is mandatory for completion.\n\n## 2) Hook/custom-tool UI context (legacy typing)\n\n`HookUIContext.custom` is typed as `(tui, theme, done)` in hook/custom-tool types.\nUnderlying interactive implementation calls factories with `(tui, theme, keybindings, done)`. JS consumers can use the extra arg; type-level compatibility still reflects the 3-arg legacy signature.\n\nCustom tools typically use the same UI entrypoint via the factory-scoped `pi.ui` object, then return the selected value in normal tool content:\n\n```ts\nasync execute(toolCallId, params, onUpdate, ctx, signal) {\n if (!pi.hasUI) {\n return { content: [{ type: \"text\", text: \"UI unavailable\" }] };\n }\n\n const picked = await pi.ui.custom<string | undefined>((tui, theme, done) => {\n const component = new MyPickerComponent(done, signal);\n return component;\n });\n\n return { content: [{ type: \"text\", text: picked ? `Picked: ${picked}` : \"Cancelled\" }] };\n}\n```\n\n## 3) Custom tool call/result renderers\n\nCustom tools and extension tools can return components from:\n\n- `renderCall(args, options, theme)`\n- `renderResult(result, options, theme, args?)`\n\n`options` currently includes:\n\n- `expanded: boolean`\n- `isPartial: boolean`\n- `spinnerFrame?: number`\n\nThese renderers are mounted by `ToolExecutionComponent`.\n\n## Lifecycle and cancellation\n\n- `dispose()` is optional at type level but should be implemented when you own timers, subprocesses, watchers, sockets, or overlays.\n- `done(...)` should be called exactly once from your component flow.\n- For cancellable long-running UI, pair `CancellableLoader` with `AbortSignal` and call `done(...)` from `onAbort`.\n\nExample cancellation pattern:\n\n```ts\nconst loader = new CancellableLoader(\n tui,\n theme.fg(\"accent\"),\n theme.fg(\"muted\"),\n \"Working...\",\n);\nloader.onAbort = () => done(undefined);\nvoid doWork(loader.signal).then((result) => done(result));\nreturn loader;\n```\n\n## Realistic custom component example (extension command)\n\n```ts\nimport type { Component } from \"@oh-my-pi/pi-tui\";\nimport {\n SelectList,\n matchesKey,\n replaceTabs,\n truncateToWidth,\n} from \"@oh-my-pi/pi-tui\";\nimport {\n getSelectListTheme,\n type ExtensionAPI,\n} from \"@oh-my-pi/pi-coding-agent\";\n\nclass Picker implements Component {\n list: SelectList;\n keybindings: any;\n done: (value: string | undefined) => void;\n\n constructor(\n items: Array<{ value: string; label: string }>,\n keybindings: any,\n done: (value: string | undefined) => void,\n ) {\n this.list = new SelectList(items, 8, getSelectListTheme());\n this.keybindings = keybindings;\n this.done = done;\n this.list.onSelect = (item) => this.done(item.value);\n this.list.onCancel = () => this.done(undefined);\n }\n\n handleInput(data: string): void {\n if (this.keybindings.matches(data, \"app.interrupt\")) {\n this.done(undefined);\n return;\n }\n this.list.handleInput(data);\n }\n\n render(width: number): readonly string[] {\n return this.list\n .render(width)\n .map((line) => truncateToWidth(replaceTabs(line), width));\n }\n\n invalidate(): void {\n this.list.invalidate();\n }\n}\n\nexport default function extension(pi: ExtensionAPI): void {\n pi.registerCommand(\"pick-model\", {\n description: \"Pick a model profile\",\n handler: async (_args, ctx) => {\n if (!ctx.hasUI) return;\n\n const selected = await ctx.ui.custom<string | undefined>(\n (tui, theme, keybindings, done) => {\n const items = [\n { value: \"fast\", label: theme.fg(\"accent\", \"Fast\") },\n { value: \"balanced\", label: \"Balanced\" },\n { value: \"quality\", label: \"Quality\" },\n ];\n return new Picker(items, keybindings, done);\n },\n );\n\n if (selected) ctx.ui.notify(`Selected profile: ${selected}`, \"info\");\n },\n });\n}\n```\n\n## Key implementation files\n\n- `packages/tui/src/tui.ts` — `Component`, `Focusable`, cursor marker, focus, overlay, input dispatch.\n- `packages/tui/src/utils.ts` — width/truncation/sanitization primitives.\n- `packages/tui/src/keys.ts` / `keybindings.ts` — key parsing and configurable action mapping.\n- `packages/coding-agent/src/modes/controllers/extension-ui-controller.ts` — interactive mounting/unmounting for extension/hook/custom-tool UI.\n- `packages/coding-agent/src/extensibility/extensions/types.ts` — extension UI and renderer contracts.\n- `packages/coding-agent/src/extensibility/hooks/types.ts` — hook UI contract (legacy custom signature).\n- `packages/coding-agent/src/extensibility/custom-tools/types.ts` — custom tool execute/render contracts.\n- `packages/coding-agent/src/modes/components/tool-execution.ts` — mounting `renderCall`/`renderResult` components and partial-state options.\n- `packages/coding-agent/src/tools/context.ts` — tool UI context propagation (`hasUI`, `ui`).\n",
|
|
120
120
|
};
|