@oh-my-pi/pi-coding-agent 15.13.1 → 15.13.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/CHANGELOG.md +25 -0
  2. package/dist/cli.js +957 -214
  3. package/dist/types/config/model-registry.d.ts +1 -0
  4. package/dist/types/config/models-config-schema.d.ts +3 -0
  5. package/dist/types/config/models-config.d.ts +3 -0
  6. package/dist/types/config/settings-schema.d.ts +66 -0
  7. package/dist/types/edit/hashline/block-resolver.d.ts +1 -1
  8. package/dist/types/edit/index.d.ts +2 -0
  9. package/dist/types/modes/components/welcome.d.ts +1 -0
  10. package/dist/types/modes/controllers/input-controller.d.ts +4 -4
  11. package/dist/types/modes/rpc/rpc-types.d.ts +2 -1
  12. package/dist/types/sdk.d.ts +3 -0
  13. package/dist/types/session/session-dump-format.d.ts +2 -1
  14. package/dist/types/system-prompt.d.ts +11 -0
  15. package/dist/types/tools/ask.d.ts +2 -0
  16. package/dist/types/tools/ast-edit.d.ts +2 -0
  17. package/dist/types/tools/ast-grep.d.ts +2 -0
  18. package/dist/types/tools/browser.d.ts +2 -0
  19. package/dist/types/tools/debug.d.ts +2 -0
  20. package/dist/types/tools/eval.d.ts +2 -0
  21. package/dist/types/tools/find.d.ts +2 -0
  22. package/dist/types/tools/inspect-image.d.ts +2 -1
  23. package/dist/types/tools/irc.d.ts +2 -0
  24. package/dist/types/tools/ssh.d.ts +2 -0
  25. package/dist/types/tools/todo.d.ts +2 -0
  26. package/dist/types/tui/tree-list.d.ts +1 -0
  27. package/package.json +12 -12
  28. package/src/config/model-registry.ts +10 -0
  29. package/src/config/models-config-schema.ts +2 -0
  30. package/src/config/models-config.ts +1 -0
  31. package/src/config/settings-schema.ts +53 -0
  32. package/src/edit/hashline/block-resolver.ts +1 -1
  33. package/src/edit/hashline/execute.ts +1 -6
  34. package/src/edit/index.ts +48 -0
  35. package/src/eval/__tests__/js-context-manager.test.ts +41 -1
  36. package/src/eval/js/context-manager.ts +92 -26
  37. package/src/eval/js/worker-core.ts +1 -1
  38. package/src/internal-urls/docs-index.generated.ts +9 -2
  39. package/src/modes/components/welcome.ts +14 -4
  40. package/src/modes/controllers/input-controller.ts +21 -38
  41. package/src/modes/rpc/rpc-mode.ts +1 -0
  42. package/src/modes/rpc/rpc-types.ts +2 -2
  43. package/src/prompts/system/system-prompt.md +17 -21
  44. package/src/prompts/tools/ask.md +0 -8
  45. package/src/prompts/tools/ast-edit.md +0 -15
  46. package/src/prompts/tools/ast-grep.md +0 -13
  47. package/src/prompts/tools/browser.md +0 -21
  48. package/src/prompts/tools/debug.md +0 -13
  49. package/src/prompts/tools/eval.md +0 -9
  50. package/src/prompts/tools/find.md +0 -13
  51. package/src/prompts/tools/inspect-image.md +0 -9
  52. package/src/prompts/tools/irc.md +0 -15
  53. package/src/prompts/tools/patch.md +0 -13
  54. package/src/prompts/tools/ssh.md +0 -9
  55. package/src/prompts/tools/todo.md +1 -19
  56. package/src/sdk.ts +19 -0
  57. package/src/session/agent-session.ts +125 -19
  58. package/src/session/session-dump-format.ts +10 -31
  59. package/src/system-prompt.ts +31 -0
  60. package/src/tools/ask.ts +41 -0
  61. package/src/tools/ast-edit.ts +46 -0
  62. package/src/tools/ast-grep.ts +24 -0
  63. package/src/tools/browser.ts +52 -0
  64. package/src/tools/debug.ts +17 -0
  65. package/src/tools/eval.ts +20 -1
  66. package/src/tools/find.ts +24 -0
  67. package/src/tools/inspect-image.ts +27 -1
  68. package/src/tools/irc.ts +41 -0
  69. package/src/tools/ssh.ts +16 -0
  70. package/src/tools/todo.ts +82 -3
  71. package/src/tui/tree-list.ts +68 -19
@@ -1,6 +1,6 @@
1
1
  // Auto-generated by scripts/generate-docs-index.ts - DO NOT EDIT
2
2
 
3
- export const EMBEDDED_DOC_FILENAMES: readonly string[] = ["ERRATA-GPT5-HARMONY.md","adding-a-provider.md","ai-schema-normalize.md","approval-mode.md","auth-broker-gateway.md","bash-tool-runtime.md","blob-artifact-architecture.md","collab.md","compaction.md","config-usage.md","context-files.md","custom-tools.md","environment-variables.md","extension-loading.md","extensions.md","fs-scan-cache-architecture.md","gemini-manifest-extensions.md","handoff-generation-pipeline.md","hooks.md","install-id.md","keybindings.md","local-models.md","lsp-config.md","macos-signing-notarization.md","marketplace.md","mcp-config.md","mcp-protocol-transports.md","mcp-runtime-lifecycle.md","mcp-server-tool-authoring.md","memory.md","mnemosyne-memory-backend.md","models.md","natives-addon-loader-runtime.md","natives-architecture.md","natives-binding-contract.md","natives-build-release-debugging.md","natives-media-system-utils.md","natives-rust-task-cancellation.md","natives-shell-pty-process.md","natives-text-search-pipeline.md","non-compaction-retry-policy.md","notebook-tool-runtime.md","plugin-manager-installer-plumbing.md","porting-from-pi-mono.md","porting-to-natives.md","provider-streaming-internals.md","providers.md","python-repl.md","render-mermaid.md","resolve-tool-runtime.md","rpc.md","rulebook-matching-pipeline.md","sdk.md","secrets.md","session-operations-export-share-fork-resume.md","session-switching-and-recent-listing.md","session-tree-plan.md","session.md","settings.md","skills.md","skills/authoring-extensions.md","skills/authoring-hooks.md","skills/authoring-marketplaces.md","skills/examples/hello-extension/README.md","skills/examples/mini-marketplace/README.md","skills/examples/safety-hook/README.md","slash-command-internals.md","system-prompt-customization.md","task-agent-discovery.md","theme.md","tools/ask.md","tools/ast-edit.md","tools/ast-grep.md","tools/bash.md","tools/browser.md","tools/checkpoint.md","tools/debug.md","tools/edit.md","tools/eval.md","tools/find.md","tools/github.md","tools/inspect_image.md","tools/irc.md","tools/job.md","tools/lsp.md","tools/read.md","tools/recall.md","tools/reflect.md","tools/render_mermaid.md","tools/resolve.md","tools/retain.md","tools/rewind.md","tools/search.md","tools/search_tool_bm25.md","tools/ssh.md","tools/task.md","tools/todo.md","tools/web_search.md","tools/write.md","tree.md","ttsr-injection-lifecycle.md","tui-core-renderer.md","tui-runtime-internals.md","tui.md"];
3
+ export const EMBEDDED_DOC_FILENAMES: readonly string[] = ["ERRATA-GPT5-HARMONY.md","adding-a-provider.md","ai-schema-normalize.md","approval-mode.md","auth-broker-gateway.md","bash-tool-runtime.md","blob-artifact-architecture.md","collab.md","compaction.md","config-usage.md","context-files.md","custom-tools.md","environment-variables.md","extension-loading.md","extensions.md","fs-scan-cache-architecture.md","gemini-manifest-extensions.md","handoff-generation-pipeline.md","hooks.md","install-id.md","keybindings.md","local-models.md","lsp-config.md","macos-signing-notarization.md","marketplace.md","mcp-config.md","mcp-protocol-transports.md","mcp-runtime-lifecycle.md","mcp-server-tool-authoring.md","memory.md","mnemosyne-memory-backend.md","models.md","natives-addon-loader-runtime.md","natives-architecture.md","natives-binding-contract.md","natives-build-release-debugging.md","natives-media-system-utils.md","natives-rust-task-cancellation.md","natives-shell-pty-process.md","natives-text-search-pipeline.md","non-compaction-retry-policy.md","notebook-tool-runtime.md","plugin-manager-installer-plumbing.md","porting-from-pi-mono.md","porting-to-natives.md","provider-streaming-internals.md","providers.md","python-repl.md","render-mermaid.md","resolve-tool-runtime.md","rpc.md","rulebook-matching-pipeline.md","sdk.md","secrets.md","session-operations-export-share-fork-resume.md","session-switching-and-recent-listing.md","session-tree-plan.md","session.md","settings.md","skills.md","skills/authoring-extensions.md","skills/authoring-hooks.md","skills/authoring-marketplaces.md","skills/examples/hello-extension/README.md","skills/examples/mini-marketplace/README.md","skills/examples/safety-hook/README.md","slash-command-internals.md","system-prompt-customization.md","task-agent-discovery.md","theme.md","toolconv/anthropic.md","toolconv/deepseek.md","toolconv/glm-4.5.md","toolconv/harmony.md","toolconv/kimi-k2.md","toolconv/pi-native.md","toolconv/qwen3.md","tools/ask.md","tools/ast-edit.md","tools/ast-grep.md","tools/bash.md","tools/browser.md","tools/checkpoint.md","tools/debug.md","tools/edit.md","tools/eval.md","tools/find.md","tools/github.md","tools/inspect_image.md","tools/irc.md","tools/job.md","tools/lsp.md","tools/read.md","tools/recall.md","tools/reflect.md","tools/render_mermaid.md","tools/resolve.md","tools/retain.md","tools/rewind.md","tools/search.md","tools/search_tool_bm25.md","tools/ssh.md","tools/task.md","tools/todo.md","tools/web_search.md","tools/write.md","tree.md","ttsr-injection-lifecycle.md","tui-core-renderer.md","tui-runtime-internals.md","tui.md"];
4
4
 
5
5
  export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
6
6
  "ERRATA-GPT5-HARMONY.md": "# ERRATA — GPT-5 Harmony-Header Leakage\n\nHistorical research note, not a current runtime contract. The statistics below\ncome from the named local stats database snapshot, not from checked-in tests or\nruntime code.\n\n## 1. The problem\n\nOpenAI frames tool calls in the Harmony chat protocol:\n\n```\n<|start|>assistant<|channel|>commentary to=functions.<NAME><|message|>{ARGS}<|call|>\n```\n\n`<|channel|>commentary to=functions.NAME` is the **routing header** —\ncontrol tokens consumed by the runtime to dispatch the call. These\ntokens never appear as content under normal operation; the runtime\nstrips them.\n\nThe defect: gpt-5 models occasionally emit, **as ordinary content\ninside `{ARGS}`**, the **plain-text shadow** of these routing tokens —\nthe same characters without the `<|…|>` brackets — and continue\nproducing more pseudo-routing structure (channel name, body marker,\nmultilingual spam, fake tool-result framing). The contamination lives\ninside the visible tool argument and is dispatched to the tool as if it\nwere intended content.\n\n**Critical detail.** The actual `<|start|>` / `<|channel|>` /\n`<|message|>` / `<|call|>` special tokens almost never appear in tool\nargs. What leaks is the bracket-less spelling — `analysis to=functions.X\ncode …` — because OpenAI applies a logit mask suppressing the\ncontrol-token IDs inside the args region. The mass that would have gone\nto those special tokens redistributes onto the un-bracketed plain-text\nrepresentation the model also learned. This makes the leak structurally\ninvisible to the routing parser and lands it in the tool input verbatim.\n\nManifestation in tool args (real corpus example):\n\n```\n~ add_function(iso, ctx, ns, \"installSystemChangeObserver\",\n os_install_system_change_observer);】【\"】【analysis to=functions.edit\n code above เงินไทยฟรีuser to=functions.edit code …\n```\n\nThe leading code is real and intended. Everything after the first\nnon-Latin token through the next clean structural boundary is corruption.\n\n---\n\n## 2. Observed statistics & failure modes\n\nSource: `~/.omp/stats.db` (`ss_tool_calls`, `ss_assistant_msgs`), through\n2026-05-10. 1.05M tool calls scanned.\n\n### 2.1 Rate\n\n| Model | Leaks in tool args | Calls | per million |\n| ------------- | -----------------: | ------: | ----------: |\n| gpt-5.4 | 37 | 226,957 | 163 |\n| gpt-5.3-codex | 17 | 112,243 | 151 |\n| gpt-5.5 | 2 | 80,750 | 25 |\n| gpt-5.2-codex | 0 | — | — |\n\nPlus 15 hits in assistant visible text / thinking blobs.\n\n### 2.2 Tool distribution\n\n| Tool | Hits |\n| ------------------------------ | -----: |\n| `edit` | 38 |\n| `eval` | 11 |\n| `report_tool_issue` | 3 |\n| `grep`/`read`/`search`/`yield` | 1 each |\n\nConcentrated in tools with free-form (non-JSON-schema) argument formats.\n\n### 2.3 Leak shape (deterministic)\n\n```\nLEAK ::= JUNK_PREFIX MARKER CHANNEL_BODY (LEAK)?\nMARKER ::= \"to=functions.\" TOOL_NAME\nCHANNEL_BODY ::= \" code \" (SPAM | reasoning_prose | fake_tool_output)*\nJUNK_PREFIX ::= (GLITCH_TOKEN | CHANNEL_WORD | NON_LATIN_RUN | \"}\" | \"】【\")+\n```\n\n**Cascading is common.** Of 96 marker occurrences across 71 contaminated\nrecords, 39 contain ≥2 markers and 7 contain ≥3 — the model emits\nmultiple fake `to=functions.X code …` blocks back-to-back, often with\nfake `code_output\\nCell N:\\n…` framing between them. Once the\nplain-text scaffolding is in the residual stream, the prefix now _looks\nlike_ a fresh tool envelope start, so the macro prior over continuations\nkeeps voting for more scaffolding. Self-amplifying.\n\n### 2.4 Glitch tokens\n\nSingle-token identifiers in `o200k_base` whose embeddings appear to be\nnear-init from underrepresentation in post-training. ASCII residue\nimmediately before the marker in the natural corpus:\n\n| Surface string | Single-token | Token ID | Hits in corpus |\n| ----------------- | :----------: | -------: | ------------------------------: |\n| `Japgolly` | ✅ | 199,745 | 1 |\n| `Jsii` | ✅ | 114,318 | (subtoken of `Jsii_commentary`) |\n| `Jsii_commentary` | — (3 toks) | — | 2 |\n| `changedFiles` | — (2 toks) | — | 8 |\n| `RTLU` | — (2 toks) | — | 3 |\n\n`Japgolly` is in the last 0.13% of the vocabulary — the same family of\nGitHub-corpus residue that produced `SolidGoldMagikarp` in the 2023\nGPT-2 vocabulary (Rumbelow & Watkins). `SolidGoldMagikarp` itself\ntokenizes to 5 tokens in `o200k_base` — that specific token was retired,\nbut the class wasn't.\n\nFor the multi-token entries, the corpus-level signature is the surface\nstring; the underlying glitch trigger is a sub-token (e.g. `Jsii` inside\n`Jsii_commentary`). The detector list (`G` signal) keys on the surface\nstrings.\n\nStable across unrelated sessions. Treated as a high-precision detector\nsignal.\n\n### 2.5 Channel-word leakage\n\n`analysis` (5), `assistant` (5), `commentary` (3), `user` (1) appear\ndirectly preceding `to=`. Always bare words; never `<|channel|>analysis`\nor any other bracketed form. Consistent with §1 — the brackets are\nmasked, the words are not.\n\n### 2.6 Non-Latin spam residue\n\n96 marker hits, by script: CJK 40, Cyrillic 12, Telugu/Kannada/Malayalam\n18, Thai 8, Georgian 7, Armenian 7, Arabic 1. Recurring fragments are\nChinese gambling SEO (`大发时时彩`, `天天中彩票`), Georgian/Abkhaz junk,\nand Thai casino spam — well-known low-quality crawl residue.\n\nThis is the same script distribution observed in the controlled\nreproduction (§7.3), independent of the prompt's natural language.\n\n### 2.7 Failure-mode breakdown for the `edit` tool\n\nThe `edit` tool exists in two variants in the corpus:\n\n| Variant | Calls | Recovery |\n| ------------------------------------ | ----: | ---------------------------------------------------------------------------------------------------------------------------------------------------- |\n| Patch-DSL (`§PATH`/anchor/`«»≔` ops) | 27 | **Recoverable** by op-truncation (§3.3) |\n| JSON-schema (`{path,edits:[…]}`) | 11 | **Not recoverable** — contamination is escaped _inside_ JSON strings, parser accepts it cleanly, content would be written verbatim into source files |\n\nFor Patch-DSL leaks specifically:\n\n- 20/27 cases: contamination on the last input line; nothing follows.\n- 7/27 cases: contamination mid-input; what follows is one of: a\n duplicate replay of an earlier file/anchor, intended content for a\n _different_ tool call (the model started its next call inline), or\n pure hallucination. Post-contamination content is never trustworthy.\n\n### 2.8 Mechanism (confirmed)\n\n**Prior collapse from null-embedding glitch tokens, into a\ncontrol-token-masked basin whose mass redistributes onto the\nplain-text shadow of the Harmony protocol.**\n\nStep by step:\n\n1. The model is mid-`{ARGS}` of a Harmony tool call. The runtime applies\n a logit mask suppressing structural control tokens (`<|channel|>`,\n `<|message|>`, `<|call|>`, `<|start|>`, `<|end|>`) inside the args\n region. Without this mask, normal generation would constantly\n hallucinate envelope-closes; with it, those token IDs have logit\n `-∞` in args.\n2. A glitch token `g` is sampled. By construction `g` was in the BPE\n merge corpus but barely in LM/RL training, so its **input embedding\n `e_g` ≈ near-init noise of small norm**.\n3. At position t+1, the residual update `h_{t+1} ≈ LN(h_t + e_g + Attn +\nMLP)` is dominated by the prefix-derived terms; the just-emitted-token\n signal is effectively absent. Generation diversity normally comes\n from `e_x` steering the residual into different sub-regions —\n stripped here.\n4. The next-token distribution therefore collapses onto the **conditional\n prior over continuations of the prefix, with local conditioning\n removed**. In a tool-calling rollout context, that prior is sharply\n peaked on Harmony scaffolding (control tokens + routing tokens) —\n that's what RL trained.\n5. The mask zeros the control-token IDs. Mass redistributes onto the\n **next-best continuation**: the un-bracketed surface-form spelling of\n the same protocol (`analysis`, `commentary`, ` to=functions.X`,\n `code`). This spelling is unmasked because those characters are\n ordinary tokens.\n6. Once a few tokens of plain-text scaffolding land in the residual\n stream, the prefix now resembles a fresh envelope start. The macro\n prior keeps voting for more scaffolding. Cascading (§2.3) follows.\n7. Multilingual spam after the marker is the same prior-collapse\n continuation, drawn from the training neighborhood of the glitch\n token (often ESL/auto-generated multilingual web junk — exactly the\n crawl residue in §2.6).\n\n**Two corollaries the corpus data demanded but only the experiment\nexplained:**\n\n- **The brackets never appear** (§1, §2.5). The mask is what makes the\n leak land in plain text instead of as a real envelope-close.\n- **Counterintuitive grammar dependency** (§7.4). The leak is _worse_ in\n formats closest to OpenAI's training distribution. Off-distribution\n custom grammars dampen the macro-prior basin; the official\n `*** Begin Patch` format is the strongest collapse target.\n\nThe 2023 SolidGoldMagikarp paper documented mechanism (1)+(2)+(4). The\nnew piece is (5): when constrained decoding masks the natural collapse\ntarget, the mass laundered through the un-masked plain-text shadow\nbecomes a structurally-invisible exfiltration channel.\n",
@@ -73,6 +73,13 @@ export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
73
73
  "system-prompt-customization.md": "# System Prompt Customization\n\nHow the coding-agent assembles the system prompt sent to the model, and what users can control via `SYSTEM.md`, `APPEND_SYSTEM.md`, and the matching CLI flags.\n\nPrimary implementation:\n\n- `packages/coding-agent/src/system-prompt.ts` (`buildSystemPrompt`, `loadSystemPromptFiles`)\n- `packages/coding-agent/src/main.ts` (`discoverSystemPromptFile`, `discoverAppendSystemPromptFile`)\n- `packages/coding-agent/src/prompts/system/system-prompt.md` (default stable instruction template)\n- `packages/coding-agent/src/prompts/system/custom-system-prompt.md` (internal custom-prompt template; not the normal CLI `SYSTEM.md` path)\n- `packages/coding-agent/src/prompts/system/project-prompt.md` (project/environment footer)\n\n---\n\n## 1) Inputs\n\nFour user-controllable inputs feed prompt assembly. All four resolve a value as either a literal string or, if the argument looks like a file path, the contents of that file (`resolvePromptInput`).\n\n| Input | Source | Effect |\n|---|---|---|\n| `--system-prompt <text-or-file>` | CLI flag | Replaces block 0: the default stable instructions. Highest precedence. |\n| `SYSTEM.md` | `<cwd>/.omp/SYSTEM.md`, then `~/.omp/agent/SYSTEM.md` (and equivalent paths under `.claude`, `.codex`, `.gemini`) | Same effect as `--system-prompt`; used when the flag is absent. |\n| `--append-system-prompt <text-or-file>` | CLI flag | Adds a prompt block. Without a custom system prompt it goes after all default blocks; with one it goes after the custom block and before the preserved project/environment footer. |\n| `APPEND_SYSTEM.md` | Same discovery as `SYSTEM.md` | Same effect as `--append-system-prompt`; used when the flag is absent. |\n\nDiscovery for `SYSTEM.md` / `APPEND_SYSTEM.md` uses `findConfigFile` (`packages/coding-agent/src/config.ts`): the first existing file across the ordered bases (`.omp`, `.claude`, `.codex`, `.gemini` — project-level at `<cwd>` first, then user-level at `~`) wins. **No ancestor walk-up.** Running `omp` from `<repo>/subdir` does not pick up `<repo>/.omp/SYSTEM.md`; the file must live directly under the cwd's config base or in the user-level location. See [`docs/config-usage.md`](./config-usage.md) for the full discovery contract.\n\nPrecedence (highest first):\n\n1. `--system-prompt`\n2. project `SYSTEM.md`\n3. user `SYSTEM.md`\n\nFor append, the same precedence applies between `--append-system-prompt`, project `APPEND_SYSTEM.md`, and user `APPEND_SYSTEM.md`.\n\n---\n\n## 2) Replace vs. append\n\nNormal CLI startup builds the default provider-facing prompt blocks first, then applies CLI / discovered file overrides in `packages/coding-agent/src/main.ts`:\n\n```ts\nif (resolvedSystemPrompt && resolvedAppendPrompt) {\n options.systemPrompt = defaultPrompt => [resolvedSystemPrompt, resolvedAppendPrompt, ...defaultPrompt.slice(1)];\n} else if (resolvedSystemPrompt) {\n options.systemPrompt = defaultPrompt => [resolvedSystemPrompt, ...defaultPrompt.slice(1)];\n} else if (resolvedAppendPrompt) {\n options.systemPrompt = defaultPrompt => [...defaultPrompt, resolvedAppendPrompt];\n}\n```\n\nThe default blocks come from `buildSystemPrompt`:\n\n- block 0: `system-prompt.md` — the stable default instructions (staff-engineer preamble, tool inventory, exploration rules, workflow rules, etc.);\n- block 1, when non-empty: `project-prompt.md` — dynamic project/environment context (workstation info, context files, dir-context list, workspace tree, current date/cwd, and other project footer content).\n\nConsequences for normal CLI use:\n\n- Providing `--system-prompt` or `SYSTEM.md` replaces only block 0. The stable default instructions are removed, but the dynamic project/environment footer from `project-prompt.md` remains as `defaultPrompt.slice(1)`.\n- Providing `--append-system-prompt` or `APPEND_SYSTEM.md` without a custom system prompt appends a new block after all default blocks.\n- Providing both a custom system prompt and an append prompt produces: custom system prompt block, append prompt block, then the preserved dynamic project/environment footer.\n\nIf you want to keep both default blocks and add to them, use `--append-system-prompt` / `APPEND_SYSTEM.md` without `--system-prompt` / `SYSTEM.md`. If you want to replace the stable default instructions while keeping the dynamic footer, use `--system-prompt` / `SYSTEM.md`.\n\n---\n\n## 3) Templating contract\n\n**Contents of `SYSTEM.md`, `APPEND_SYSTEM.md`, `--system-prompt`, and `--append-system-prompt` are treated as plain text.** They are resolved before prompt-block replacement and are not rendered as Handlebars templates.\n\nThe built-in prompt templates are Handlebars (`packages/utils/src/prompt.ts`), but user-provided strings are not compiled with that renderer. The secondary capability path can insert `systemPromptCustomization` into a Handlebars parent template, but a `{{value}}` reference in Handlebars still does not recursively render its substituted contents — the value is emitted as a string. Concretely:\n```handlebars\n{{! parent template — handled by Handlebars }}\n{{#if systemPromptCustomization}}\n{{systemPromptCustomization}}\n{{/if}}\n```\n\nIf `SYSTEM.md` contains:\n\n```handlebars\nWorking in {{cwd}} on {{date}}.\n{{#if hasMemoryRoot}}Memory enabled.{{/if}}\n```\n\nthe rendered output contains those characters verbatim — `{{cwd}}`, `{{#if hasMemoryRoot}}`, etc. are NOT substituted. They will be shown to the model as literal Handlebars syntax.\n\nThis is by design. The internal template variables (`cwd`, `date`, `environment`, `workspaceTree`, `skills`, `rules`, `toolRefs`, `hasMemoryRoot`, `hasObsidian`, `mcpDiscoveryServerSummaries`, ...) are not a supported public surface — they change between releases as the prompt is rewritten, and they would couple user configs to internals. Treat them as private.\n\nIf a future release exposes a templating surface for `SYSTEM.md`, it will be opt-in (e.g. via a settings flag or a different filename) and documented here.\n\n---\n\n## 4) Recommended patterns\n\n### \"Tweak the default\" — keep default, add a few rules\n\nUse `APPEND_SYSTEM.md` (or `--append-system-prompt`) without `SYSTEM.md`. The default stable instructions and the dynamic project/environment footer stay intact; your text is appended as an additional block.\n\n```text\n# ~/.omp/agent/APPEND_SYSTEM.md\nPrefer Bun APIs over Node APIs in this project.\nWhen you change a public function, run `bun check` before yielding.\n```\n\n### \"Replace the stable default instructions\" — bring your own base prompt\n\nUse `SYSTEM.md` (or `--system-prompt`). You replace the stable default instructions in block 0, but normal CLI startup still preserves the dynamic project/environment footer block (`project-prompt.md`): workstation info, context files, dir-context list, workspace tree, current date, cwd, and related project context.\n\n```text\n# ~/.omp/agent/SYSTEM.md\nYou are a code reviewer. Read diffs, surface issues, never edit files.\n- Cite paths with backticks.\n- Prefer concrete fixes over abstract advice.\n```\n\nIf you do this and want default tool guidance, exploration rules, or workflow rules, copy what you need from `packages/coding-agent/src/prompts/system/system-prompt.md` and maintain it yourself — there is currently no way to inherit selected sections from that stable default instruction block.\n\n### \"Customize while keeping generated skills/rules/tool guidance\"\n\nUse `APPEND_SYSTEM.md`, not `SYSTEM.md`. Skills, rulebook summaries, always-apply rules, the tool inventory, and the built-in guidance that tells the model when to read `skill://<name>` are part of block 0 (`system-prompt.md`). Because `SYSTEM.md` replaces block 0, those generated lists are not available to the model in a custom system prompt.\n\nThe dynamic project/environment footer that remains after `SYSTEM.md` is only block 1 (`project-prompt.md`): workstation info, AGENTS.md context files, dir-context list, workspace tree, current date, cwd, and related project context. It does not include discovered skills.\n\nThere is currently no supported CLI mode for \"replace the stable default instructions but keep the generated skills/rules/tool guidance.\" If you need automatic skills loading, keep the default block and add your customization via `APPEND_SYSTEM.md`. If you fully replace with `SYSTEM.md`, you must hard-code any skill names/instructions you want the model to know about, and those will not track discovery automatically.\n\n### \"Customize automatic session titles\"\n\n`SYSTEM.md` and `APPEND_SYSTEM.md` do not affect the model call that names a new session. Create the title-specific prompt file instead:\n\n```text\n# ~/.omp/agent/TITLE_SYSTEM.md\nGenerate a session name using lowercase `<type>:<primary-objective>`.\nIf the message carries no concrete task, output exactly `none`.\n```\n\n`TITLE_SYSTEM.md` is discovered with the same project-then-user config-directory pattern as `SYSTEM.md` / `APPEND_SYSTEM.md`. When absent, OMP uses the bundled `title-system.md` / `tiny-title-system.md` prompts. When present, the online title path still forces the `set_title` tool call, and the local tiny-model path keeps the `<title>...</title>` wrapper while using this file as the system turn.\n\n### \"Replace everything, including project context\" — SDK-only\n\nThe normal CLI file/flag path intentionally preserves `defaultPrompt.slice(1)`. Code using `CreateAgentSessionOptions.systemPrompt` directly can return a full replacement array and omit the project footer, but that is not what `.omp/SYSTEM.md`, `~/.omp/agent/SYSTEM.md`, or `--system-prompt` do.\n\n### \"Replace, but keep one section of the default instructions\" — not directly supported\n\nThere is no built-in way to inherit specific sections from `system-prompt.md` while replacing the rest. The supported CLI modes are: append to the default prompt, or replace block 0 and keep the dynamic footer.\n\n---\n\n## 5) Deduplication\n\nThe CLI path avoids double-injecting discovered `SYSTEM.md` by replacing block 0 after the default prompt blocks are rendered. Any `systemPromptCustomization` from the secondary capability path would have been rendered into block 0, and that block is discarded when `main.ts` applies `[resolvedSystemPrompt, ...defaultPrompt.slice(1)]`.\n\nInside `buildSystemPrompt` itself, secondary customization and always-apply rules are still deduplicated:\n\n- `dedupePromptSource` drops a `systemPromptCustomization` block when it already appears in an internally supplied `customPrompt` or append prompt.\n- `dedupeAlwaysApplyRules` omits always-apply rules whose body appears verbatim in any of `{customPrompt, appendPrompt, systemPromptCustomization}`.\n\n---\n\n## 6) Discovery paths\n\nOnly one path actually drives the customization a CLI user sees: the primary CLI path. The capability layer exists but its `SYSTEM.md` output never reaches the rendered prompt under normal CLI startup.\n\n- The primary CLI path (`discoverSystemPromptFile` / `discoverAppendSystemPromptFile` in `main.ts`, which feeds `resolvedSystemPrompt` / `resolvedAppendPrompt`) calls `findConfigFile`. `findConfigFile` checks only `<cwd>/.omp`, `<cwd>/.claude`, `<cwd>/.codex`, `<cwd>/.gemini`, and the user-level equivalents — it does **not** walk up ancestors. Files in `<ancestor>/.omp/SYSTEM.md` are ignored when `omp` is started from a subdirectory.\n- The secondary capability path (`loadSystemPromptFiles` → builtin discovery) does walk up via `findNearestProjectConfigDir` and requires the project `.omp/` directory to be non-empty. Its result is rendered into the template variable `systemPromptCustomization`. Under normal CLI startup the default template (`system-prompt.md`) never references that variable, so ancestor-walk capability content has no user-visible effect.\n\nNet effect for CLI users: put `SYSTEM.md` / `APPEND_SYSTEM.md` directly under `<cwd>/.omp` (or another supported config base under cwd) or in the user-level location (`~/.omp/agent/SYSTEM.md` etc.). Ancestor paths are not searched.\n\n---\n\n## 7) Quick reference\n\n| Goal | Use |\n|---|---|\n| Add an instruction on top of the full default prompt | `APPEND_SYSTEM.md` or `--append-system-prompt` |\n| Replace the stable default instructions but keep project/environment context | `SYSTEM.md` or `--system-prompt` |\n| Preserve generated skills/rules/tool guidance while customizing | `APPEND_SYSTEM.md`; `SYSTEM.md` replaces that generated block |\n| Customize automatic session titles | `TITLE_SYSTEM.md`; chat-turn `SYSTEM.md` / `APPEND_SYSTEM.md` do not affect title generation |\n| Use `{{cwd}}` / `{{date}}` / other internals in my file | Not supported. Files are inserted verbatim. |\n| Inherit specific sections from `system-prompt.md` | Not supported; use append, or copy what you need into `SYSTEM.md`. |\n| Override at a per-repo level | Project `.omp/SYSTEM.md` under the cwd you launch `omp` from |\n| Override globally | `~/.omp/agent/SYSTEM.md` or `~/.omp/agent/APPEND_SYSTEM.md` |\n",
74
74
  "task-agent-discovery.md": "# Task Agent Discovery and Selection\n\nThis document describes how the task subsystem discovers agent definitions, merges multiple sources, and resolves a requested agent at execution time.\n\nIt covers runtime behavior as implemented today, including precedence, invalid-definition handling, and spawn/depth constraints that can make an agent effectively unavailable.\n\n## Implementation files\n\n- [`src/task/discovery.ts`](../packages/coding-agent/src/task/discovery.ts)\n- [`src/task/agents.ts`](../packages/coding-agent/src/task/agents.ts)\n- [`src/task/types.ts`](../packages/coding-agent/src/task/types.ts)\n- [`src/task/index.ts`](../packages/coding-agent/src/task/index.ts)\n- [`src/task/commands.ts`](../packages/coding-agent/src/task/commands.ts)\n- [`src/prompts/agents/task.md`](../packages/coding-agent/src/prompts/agents/task.md)\n- [`src/prompts/tools/task.md`](../packages/coding-agent/src/prompts/tools/task.md)\n- [`src/discovery/helpers.ts`](../packages/coding-agent/src/discovery/helpers.ts)\n- [`src/config.ts`](../packages/coding-agent/src/config.ts)\n- [`src/task/executor.ts`](../packages/coding-agent/src/task/executor.ts)\n\n---\n\n## Agent definition shape\n\nTask agents normalize into `AgentDefinition` (`src/task/types.ts`):\n\n- `name`, `description`, `systemPrompt` (required for a valid loaded agent)\n- optional `tools`, `spawns`, `model`, `thinkingLevel`, `output`, `blocking`, `autoloadSkills`, `readSummarize`\n- `source`: `\"bundled\" | \"user\" | \"project\"`\n- optional `filePath`\n\nParsing comes from frontmatter via `parseAgentFields()` (`src/discovery/helpers.ts`):\n\n- missing `name` or `description` => invalid (`null`), caller treats as parse failure\n- `tools` accepts CSV or array; if provided, `yield` is auto-added\n- `spawns` accepts `*`, CSV, or array\n- backward-compat behavior: if `spawns` missing but `tools` includes `task`, `spawns` becomes `*`\n- `output` is passed through as opaque schema data\n- `read-summarize: false` (parsed as `readSummarize`) forces the subagent's `read` tool to return verbatim file content instead of structural summaries — `runSubprocess` applies it as a `read.summarize.enabled: false` override on the subagent's isolated settings (`src/task/executor.ts`). `explore` and `librarian` ship with it disabled. Defaults to enabled when the field is absent.\n\n## Bundled agents\n\nBundled agents are embedded at build time (`src/task/agents.ts`) using text imports.\n\n`EMBEDDED_AGENT_DEFS` defines:\n\n- `explore`, `plan`, `designer`, `reviewer`, `librarian`, `oracle` from prompt files\n- `task` and `quick_task` from shared `task.md` body plus injected frontmatter\n\nLoading path:\n\n1. `loadBundledAgents()` parses embedded markdown with `parseAgent(..., \"bundled\", \"fatal\")`\n2. results are cached in-memory (`bundledAgentsCache`)\n3. `clearBundledAgentsCache()` is test-only cache reset\n\nBecause bundled parsing uses `level: \"fatal\"`, malformed bundled frontmatter throws and can fail discovery entirely.\n\n## Filesystem and plugin discovery\n\n`discoverAgents(cwd, home)` (`src/task/discovery.ts`) merges agents from OMP-native roots and Claude plugin roots before appending bundled definitions. Cross-harness roots such as `.claude/agents`, `.codex/agents`, and `.gemini/agents` are intentionally skipped — their frontmatter schema is not the OMP task-agent contract (`TASK_AGENT_CONFIG_SOURCE = \".omp\"` filters both dir lists).\n\n### Discovery inputs\n\n1. Nearest project `.omp` agents dir from `findAllNearestProjectConfigDirs(\"agents\", cwd)` (filtered to `.omp`; first hit only)\n2. User `.omp` agents dir from `getConfigDirs(\"agents\", { project: false })` (filtered to `.omp`; first hit only)\n3. Claude plugin roots (`listClaudePluginRoots(home, cwd)`) with `agents/` subdirs — only when `isProviderEnabled(\"claude-plugins\")`; project-scope plugins sort before user-scope\n4. Bundled agents (`loadBundledAgents()`)\n\n### Actual source order\n\n1. project `.omp/agents`\n2. user `~/.omp/agent/agents`\n3. plugin `agents/` dirs (project-scope first, then user-scope)\n4. bundled agents last\n\n## Merge and collision rules\n\nDiscovery uses first-wins dedup by exact `agent.name`:\n\n- A `Set<string>` tracks seen names.\n- Loaded agents are flattened in directory order and kept only if name unseen.\n- Bundled agents are filtered against the same set and only added if still unseen.\n\nImplications:\n\n- Project `.omp` overrides user `.omp`.\n- Non-bundled agents override bundled agents with the same name.\n- Name matching is case-sensitive (`Task` and `task` are distinct).\n- Within one directory, markdown files are read in lexicographic filename order before dedup.\n\n## Invalid/missing agent file behavior\n\nPer directory (`loadAgentsFromDir`):\n\n- unreadable/missing directory: treated as empty (`readdir(...).catch(() => [])`)\n- file read or parse failure: warning logged, file skipped\n- parse path uses `parseAgent(..., level: \"warn\")`\n\nFrontmatter failure behavior comes from `parseFrontmatter`:\n\n- parse error at `warn` level logs warning\n- parser falls back to a simple `key: value` line parser\n- if required fields are still missing, `parseAgentFields` fails, then `AgentParsingError` is thrown and caught by caller (file skipped)\n\nNet effect: one bad custom agent file does not abort discovery of other files.\n\n## Agent lookup and selection\n\nLookup is exact-name linear search:\n\n- `getAgent(agents, name)` => `agents.find(a => a.name === name)`\n\nIn spawn execution (`TaskTool.#executeSync` → `#runSpawn`):\n\n1. agents are rediscovered at execution time (`discoverAgents(this.session.cwd)`)\n2. requested `params.agent` is resolved through `getAgent`\n3. missing agent returns immediate tool response:\n - `Unknown agent \"...\". Available: ...`\n - no subprocess runs\n\n### Description vs execution-time discovery\n\n`TaskTool.create()` builds the tool description from discovery results at initialization time. `#executeSync` rediscovers agents, so the runtime set can differ from what was listed in the earlier tool description if agent files changed mid-session. The async entry path still uses the initialization-time list to decide whether an agent is marked `blocking` before scheduling.\n\n## Structured-output guardrails and schema precedence\n\nRuntime output schema precedence in `TaskTool.execute`:\n\n1. agent frontmatter `output`\n2. parent session `outputSchema`\n\n(`effectiveOutputSchema = effectiveAgent.output ?? this.session.outputSchema` — the task call itself never carries a schema; ad-hoc structured workflows go through the eval bridge's `agent(prompt, schema)`.)\n\nThe model-facing prompt (`src/prompts/tools/task.md`) no longer carries the old structured-output mismatch warning; it tags read-only agents and warns against offloading reasoning to `explore`/`quick_task` instead.\n\n## Command discovery interaction\n\n`src/task/commands.ts` is parallel infrastructure for workflow commands (not agent definitions), but it follows the same overall pattern:\n\n- discover from capability providers first\n- deduplicate by name with first-wins\n- append bundled commands if still unseen\n- exact-name lookup via `getCommand`\n\nIn `src/task/index.ts`, command helpers are re-exported with agent discovery helpers. Agent discovery itself does not depend on command discovery at runtime.\n\n## Availability constraints beyond discovery\n\nAn agent can be discoverable but still unavailable to run because of execution guardrails.\n\n### Disabled-agent settings\n\n`TaskTool.#executeSync` checks `task.disabledAgents` after resolving the agent. If the requested name is disabled, execution returns an immediate error listing enabled alternatives when available.\n\n### Parent spawn policy\n\n`TaskTool.#executeSync` checks `session.getSessionSpawns()`:\n\n- `\"*\"` => allow any\n- `\"\"` => deny all\n- CSV list => allow only listed names\n\nIf denied: immediate `Cannot spawn '...'. Allowed: ...` response.\n\n### Blocked self-recursion env guard\n\n`PI_BLOCKED_AGENT` is read at tool construction. If request matches, execution is rejected with recursion-prevention message.\n\n### Recursion-depth gating (task tool availability inside child sessions)\n\nIn `runSubprocess` (`src/task/executor.ts`):\n\n- depth computed from `taskDepth`\n- `task.maxRecursionDepth` controls cutoff\n- when at max depth:\n - `task` tool is removed from child tool list\n - child `spawns` env is set to empty\n\nSo deeper levels cannot spawn further tasks even if the agent definition includes `spawns`.\n\n## Plan mode behavior\n\nWhen parent plan mode is enabled, `TaskTool.#runSpawn` builds an `effectiveAgent` before launching subprocesses:\n\n- prepends the plan-mode subagent system prompt\n- restricts tools to `read`, `search`, `find`, `lsp`, and `web_search`, plus `ast_grep`/`report_finding` when the agent's own tool list declares them (`PLAN_MODE_AGENT_TOOL_ALLOWLIST`)\n- clears child spawns\n\nThe same `effectiveAgent` is used for subprocess launch, model/thinking overrides, and output-schema selection.\n",
75
75
  "theme.md": "# Theming Reference\n\nThis document describes how theming works in the coding-agent today: schema, loading, runtime behavior, and failure modes.\n\n## What the theme system controls\n\nThe theme system drives:\n\n- foreground/background color tokens used across the TUI\n- markdown styling adapters (`getMarkdownTheme()`)\n- selector/editor/settings list adapters (`getSelectListTheme()`, `getEditorTheme()`, `getSettingsListTheme()`)\n- symbol preset + symbol overrides (`unicode`, `nerd`, `ascii`)\n- syntax highlighting colors used by native highlighter (`@oh-my-pi/pi-natives`)\n- status line segment colors\n\nPrimary implementation: `src/modes/theme/theme.ts`.\n\n## Theme JSON shape\n\nTheme files are JSON objects validated against the runtime schema in `theme.ts` (`themeJsonSchema`) and mirrored by `src/modes/theme/theme-schema.json`.\n\nTop-level fields:\n\n- `name` (required)\n- `colors` (required; all color tokens required)\n- `vars` (optional; reusable color variables)\n- `export` (optional; HTML export colors)\n- `symbols` (optional)\n - `preset` (optional: `unicode | nerd | ascii`)\n - `overrides` (optional: key/value overrides for `SymbolKey`)\n\nColor values accept:\n\n- hex string (`\"#RRGGBB\"`)\n- 256-color index (`0..255`)\n- variable reference string (resolved through `vars`)\n- empty string (`\"\"`) meaning terminal default (`\\x1b[39m` fg, `\\x1b[49m` bg)\n\n## Required color tokens (current)\n\nAll tokens below are required in `colors`.\n\n### Core text and borders (11)\n\n`accent`, `border`, `borderAccent`, `borderMuted`, `success`, `error`, `warning`, `muted`, `dim`, `text`, `thinkingText`\n\n### Background blocks (7)\n\n`selectedBg`, `userMessageBg`, `customMessageBg`, `toolPendingBg`, `toolSuccessBg`, `toolErrorBg`, `statusLineBg`\n\n### Message/tool text (5)\n\n`userMessageText`, `customMessageText`, `customMessageLabel`, `toolTitle`, `toolOutput`\n\n### Markdown (10)\n\n`mdHeading`, `mdLink`, `mdLinkUrl`, `mdCode`, `mdCodeBlock`, `mdCodeBlockBorder`, `mdQuote`, `mdQuoteBorder`, `mdHr`, `mdListBullet`\n\n### Tool diff + syntax highlighting (12)\n\n`toolDiffAdded`, `toolDiffRemoved`, `toolDiffContext`,\n`syntaxComment`, `syntaxKeyword`, `syntaxFunction`, `syntaxVariable`, `syntaxString`, `syntaxNumber`, `syntaxType`, `syntaxOperator`, `syntaxPunctuation`\n\n### Mode/thinking borders (8)\n\n`thinkingOff`, `thinkingMinimal`, `thinkingLow`, `thinkingMedium`, `thinkingHigh`, `thinkingXhigh`, `bashMode`, `pythonMode`\n\n### Status line segment colors (13)\n\n`statusLineSep`, `statusLineModel`, `statusLinePath`, `statusLineGitClean`, `statusLineGitDirty`, `statusLineContext`, `statusLineSpend`, `statusLineStaged`, `statusLineDirty`, `statusLineUntracked`, `statusLineOutput`, `statusLineCost`, `statusLineSubagents`\n\n## Optional tokens\n\n### `export` section (optional)\n\nUsed for HTML export theming helpers:\n\n- `export.pageBg`\n- `export.cardBg`\n- `export.infoBg`\n\nIf omitted, export code derives defaults from resolved theme colors.\n\n### `symbols` section (optional)\n\n- `symbols.preset` sets a theme-level default symbol set.\n- `symbols.overrides` can override individual `SymbolKey` values.\n- `symbols.spinnerFrames` overrides the loading spinner frames. Accepts either a flat `string[]` (applied to both spinner types) or an object `{ \"status\"?: string[], \"activity\"?: string[] }` to override each type independently. Any type not specified falls back to the symbol preset's default frames. `status` drives the ~12.5fps spinner used by loaders and tool-execution indicators; `activity` drives the ~30fps spinner used by markdown progress bars and similar high-frequency UI.\n\nRuntime precedence:\n\n1. settings `symbolPreset` override (if set)\n2. theme JSON `symbols.preset`\n3. fallback `\"unicode\"`\n\nInvalid override keys are ignored and logged (`logger.debug`).\n\n## Built-in vs custom theme sources\n\nTheme lookup order (`loadThemeJson`):\n\n1. built-in embedded themes (`dark.json`, `light.json`, and all `defaults/*.json` compiled into `defaultThemes`)\n2. custom theme file: `<customThemesDir>/<name>.json`\n\nCustom themes directory comes from `getCustomThemesDir()`:\n\n- default: `~/.omp/agent/themes`\n- overridden by `PI_CODING_AGENT_DIR` (`$PI_CODING_AGENT_DIR/themes`)\n\n`getAvailableThemes()` returns merged built-in + custom names, sorted, with built-ins taking precedence on name collision.\n\n## Loading, validation, and resolution\n\nFor custom theme files:\n\n1. read JSON\n2. parse JSON\n3. validate against `ThemeJsonSchema`\n4. resolve `vars` references recursively\n5. convert resolved values to ANSI by terminal capability mode\n\nValidation behavior:\n\n- missing required color tokens: explicit grouped error message\n- bad token types/values: validation errors with JSON path\n- unknown theme file: `Theme not found: <name>`\n\nVar reference behavior:\n\n- supports nested references\n- throws on missing variable reference\n- throws on circular references\n\n## Terminal color mode behavior\n\nColor mode detection (`detectColorMode`):\n\n- `COLORTERM=truecolor|24bit` => truecolor\n- `WT_SESSION` => truecolor\n- `TERM` in `dumb`, `linux`, or empty => 256color\n- otherwise => truecolor\n\nConversion behavior:\n\n- hex -> `Bun.color(..., \"ansi-16m\" | \"ansi-256\")`\n- numeric -> `38;5` / `48;5` ANSI\n- `\"\"` -> default fg/bg reset\n\n## Runtime switching behavior\n\n### Initial theme (`initTheme`)\n\n`main.ts` initializes theme with settings:\n\n- `symbolPreset`\n- `colorBlindMode`\n- `theme.dark`\n- `theme.light`\n\nAuto theme slot selection uses terminal appearance in this order:\n\n1. terminal-reported OSC 11 background luminance, unless the macOS/Zellij fallback path is active\n2. `COLORFGBG` background index (`< 8` => dark, `>= 8` => light)\n3. macOS appearance fallback only for the known-broken macOS/Zellij OSC 11 path\n4. dark slot fallback\n\nCurrent defaults from settings schema:\n\n- `theme.dark = \"titanium\"`\n- `theme.light = \"light\"`\n- `symbolPreset = \"unicode\"`\n- `colorBlindMode = false`\n\n### Explicit switching (`setTheme`)\n\n- loads selected theme\n- updates global `theme` singleton\n- optionally starts watcher\n- triggers `onThemeChange` callback\n\nOn failure:\n\n- falls back to built-in `dark`\n- returns `{ success: false, error }`\n\n### Preview switching (`previewTheme`)\n\n- applies temporary preview theme to global `theme`\n- does **not** change persisted settings by itself\n- returns success/error without fallback replacement\n\nSettings UI uses this for live preview and restores prior theme on cancel.\n\n## Watchers and live reload\n\nWhen watcher is enabled (`setTheme(..., true)` / interactive init):\n\n- watches `<customThemesDir>/<currentTheme>.json` only when that file exists\n- built-ins are effectively not watched; built-in theme lookup also takes precedence over same-name custom files\n- matching file changes schedule a debounced reload; reload errors or temporary file absence keep the last successfully loaded theme\n- the watcher does not perform a delete/rename fallback; it waits for a future successful reload or explicit theme switch\n\nAuto mode also reevaluates dark/light slot mapping from terminal appearance changes, `SIGWINCH`, and the macOS fallback observer when active.\n\n## Color-blind mode behavior\n\n`colorBlindMode` changes only one token at runtime:\n\n- `toolDiffAdded` is HSV-adjusted (green shifted toward blue)\n- adjustment is applied only when resolved value is a hex string\n\nOther tokens are unchanged.\n\n## Where theme settings are persisted\n\nTheme-related settings are persisted by `Settings` to global config YAML:\n\n- path: `<agentDir>/config.yml`\n- default agent dir: `~/.omp/agent`\n- effective default file: `~/.omp/agent/config.yml`\n\nPersisted keys:\n\n- `theme.dark`\n- `theme.light`\n- `symbolPreset`\n- `colorBlindMode`\n\nLegacy migration exists: old flat `theme: \"name\"` is migrated to nested `theme.dark` or `theme.light` based on luminance detection.\n\n## Creating a custom theme (practical)\n\n1. Create file in custom themes dir, e.g. `~/.omp/agent/themes/my-theme.json`.\n2. Include `name`, optional `vars`, and **all required** `colors` tokens.\n3. Optionally include `symbols` and `export`.\n4. Select the theme in Settings (`Appearance -> Dark Theme` or `Appearance -> Light Theme`) depending on which auto slot you want.\n\nMinimal skeleton:\n\n```json\n{\n \"name\": \"my-theme\",\n \"vars\": {\n \"accent\": \"#7aa2f7\",\n \"muted\": 244\n },\n \"colors\": {\n \"accent\": \"accent\",\n \"border\": \"#4c566a\",\n \"borderAccent\": \"accent\",\n \"borderMuted\": \"muted\",\n \"success\": \"#9ece6a\",\n \"error\": \"#f7768e\",\n \"warning\": \"#e0af68\",\n \"muted\": \"muted\",\n \"dim\": 240,\n \"text\": \"\",\n \"thinkingText\": \"muted\",\n\n \"selectedBg\": \"#2a2f45\",\n \"userMessageBg\": \"#1f2335\",\n \"userMessageText\": \"\",\n \"customMessageBg\": \"#24283b\",\n \"customMessageText\": \"\",\n \"customMessageLabel\": \"accent\",\n \"toolPendingBg\": \"#1f2335\",\n \"toolSuccessBg\": \"#1f2d2a\",\n \"toolErrorBg\": \"#2d1f2a\",\n \"toolTitle\": \"\",\n \"toolOutput\": \"muted\",\n\n \"mdHeading\": \"accent\",\n \"mdLink\": \"accent\",\n \"mdLinkUrl\": \"muted\",\n \"mdCode\": \"#c0caf5\",\n \"mdCodeBlock\": \"#c0caf5\",\n \"mdCodeBlockBorder\": \"muted\",\n \"mdQuote\": \"muted\",\n \"mdQuoteBorder\": \"muted\",\n \"mdHr\": \"muted\",\n \"mdListBullet\": \"accent\",\n\n \"toolDiffAdded\": \"#9ece6a\",\n \"toolDiffRemoved\": \"#f7768e\",\n \"toolDiffContext\": \"muted\",\n\n \"syntaxComment\": \"#565f89\",\n \"syntaxKeyword\": \"#bb9af7\",\n \"syntaxFunction\": \"#7aa2f7\",\n \"syntaxVariable\": \"#c0caf5\",\n \"syntaxString\": \"#9ece6a\",\n \"syntaxNumber\": \"#ff9e64\",\n \"syntaxType\": \"#2ac3de\",\n \"syntaxOperator\": \"#89ddff\",\n \"syntaxPunctuation\": \"#9aa5ce\",\n\n \"thinkingOff\": 240,\n \"thinkingMinimal\": 244,\n \"thinkingLow\": \"#7aa2f7\",\n \"thinkingMedium\": \"#2ac3de\",\n \"thinkingHigh\": \"#bb9af7\",\n \"thinkingXhigh\": \"#f7768e\",\n\n \"bashMode\": \"#2ac3de\",\n \"pythonMode\": \"#bb9af7\",\n\n \"statusLineBg\": \"#16161e\",\n \"statusLineSep\": 240,\n \"statusLineModel\": \"#bb9af7\",\n \"statusLinePath\": \"#7aa2f7\",\n \"statusLineGitClean\": \"#9ece6a\",\n \"statusLineGitDirty\": \"#e0af68\",\n \"statusLineContext\": \"#2ac3de\",\n \"statusLineSpend\": \"#7dcfff\",\n \"statusLineStaged\": \"#9ece6a\",\n \"statusLineDirty\": \"#e0af68\",\n \"statusLineUntracked\": \"#f7768e\",\n \"statusLineOutput\": \"#c0caf5\",\n \"statusLineCost\": \"#ff9e64\",\n \"statusLineSubagents\": \"#bb9af7\"\n }\n}\n```\n\n## Testing custom themes\n\nUse this workflow:\n\n1. Start interactive mode (watcher enabled from startup).\n2. Open settings and preview theme values (live `previewTheme`).\n3. For custom theme files, edit the JSON while running and confirm auto-reload on save.\n4. Exercise critical surfaces:\n - markdown rendering\n - tool blocks (pending/success/error)\n - diff rendering (added/removed/context)\n - status line readability\n - thinking level border changes\n - bash/python mode border colors\n5. Validate both symbol presets if your theme depends on glyph width/appearance.\n\n## Real constraints and caveats\n\n- All `colors` tokens are required for custom themes.\n- `export` and `symbols` are optional.\n- `$schema` in theme JSON is informational; runtime validation is enforced by a Zod schema in code.\n- `setTheme` failure falls back to `dark`; `previewTheme` failure does not replace current theme.\n- File watcher reload errors or temporary missing files keep the current loaded theme until a successful reload or explicit theme switch.\n",
76
+ "toolconv/anthropic.md": "# Anthropic Claude tool use (Messages API content blocks)\n\nAnthropic's Claude is a closed, hosted model family; there are no released weights and therefore no `--tool-call-parser` flag to set. The canonical tool-calling convention is the **Messages API** (`POST /v1/messages`, header `anthropic-version: 2023-06-01`): tools are advertised in a top-level `tools` array, the model returns structured `tool_use` **content blocks** with `stop_reason: \"tool_use\"`, and you feed results back as `tool_result` content blocks inside a `user` message. Tool use is \"enabled\" simply by including the `tools` parameter (optionally with `tool_choice`); the API then injects a tool-use system prompt and parses the model's output back into JSON blocks for you. This applies to all current models (Claude Opus / Sonnet / Haiku 3.x, 4, 4.x) and is mirrored by gateways such as LiteLLM and by third-party Claude-compatible servers.\n\nUnder the hood the model is trained to emit an **XML** function-call syntax (`<function_calls>` / `<invoke>` / `<parameter>`); the API serializes your JSON-Schema tools into a system prompt and converts the model's XML output into JSON `tool_use` blocks. That underlying format is documented as the *secondary* convention below, together with the older, now-retired prompt-based **legacy XML** format (`<tool_name>` / `<parameters>` / `<function_results>`) that pre-dates the Messages API and still surfaces when you do tool use purely through prompting.\n\nThe primary, authoritative shape for any parser/renderer is the JSON content-block format. The XML is informational (and the only thing visible if you reconstruct prompts at the token level).\n\n---\n\n## Content-block types & stop reasons\n\nAnthropic has no token-level tool delimiters in the public API. The unit is the **content block**: every `message.content` is an array of typed blocks. Tool calling adds two block types and one stop reason; streaming adds a delta type.\n\n| Item | Where | Shape / meaning |\n| --- | --- | --- |\n| `text` block | assistant & user | `{\"type\":\"text\",\"text\":\"...\"}`. Plain prose. Assistant may emit text *before* its tool calls. |\n| `tool_use` block | assistant | `{\"type\":\"tool_use\",\"id\":\"toolu_...\",\"name\":\"<tool>\",\"input\":{...}}`. The function call. `input` is a **nested JSON object** (already parsed), conforming to the tool's `input_schema`. |\n| `tool_result` block | user | `{\"type\":\"tool_result\",\"tool_use_id\":\"toolu_...\",\"content\":<string \\| block[]>,\"is_error\":<bool?>}`. The executed result, sent back in a `user` message. |\n| `server_tool_use` block | assistant | `{\"type\":\"server_tool_use\",\"id\":\"srvtoolu_...\",\"name\":\"web_search\",\"input\":{...}}`. Emitted for Anthropic-executed server tools; you do **not** return a `tool_result` for these. |\n| `web_search_tool_result` (and similar) | assistant | Server-tool output, injected by Anthropic inline in the assistant turn. |\n| `thinking` / `redacted_thinking` block | assistant | Extended-thinking reasoning blocks; carry a `signature`. Must be preserved verbatim across turns when thinking + tools are combined. |\n| `stop_reason: \"tool_use\"` | response top level | The model invoked one or more tools and is waiting for results. Drives the agentic loop. |\n| `stop_reason: \"end_turn\"` | response top level | Natural completion (no tool call); the loop exits. |\n| Other `stop_reason` | response top level | `\"max_tokens\"`, `\"stop_sequence\"`, `\"pause_turn\"` (long server-tool turn, resend as-is to continue), `\"refusal\"`. |\n| `id` prefixes | — | Messages `msg_…`; client tool calls `toolu_…`; server tool calls `srvtoolu_…`. |\n\nStreaming adds these SSE events / delta types (full list under [Roles / channels](#roles--channels--turn-structure) and [Tool-call format](#tool-call-format)):\n\n| Streaming item | Shape / meaning |\n| --- | --- |\n| `message_start` | Carries a `Message` skeleton with empty `content`, `stop_reason: null`. |\n| `content_block_start` | Opens a block at `index`. For a tool call: `content_block.{type:\"tool_use\",id,name,input:{}}` — `input` starts as an **empty object**. |\n| `content_block_delta` / `input_json_delta` | `{\"type\":\"input_json_delta\",\"partial_json\":\"<chunk>\"}` — a **partial JSON string** fragment of `tool_use.input`. |\n| `content_block_delta` / `text_delta` | `{\"type\":\"text_delta\",\"text\":\"...\"}`. |\n| `content_block_delta` / `thinking_delta`, `signature_delta` | Extended-thinking content / signature. |\n| `content_block_stop` | Closes the block at `index`; this is when accumulated `partial_json` is complete and safe to `JSON.parse`. |\n| `message_delta` | Top-level updates; carries the final `delta.stop_reason` (e.g. `\"tool_use\"`) and **cumulative** `usage`. |\n| `message_stop` | End of stream. |\n| `ping` / `error` | Keep-alive; `error` (e.g. `overloaded_error`) may appear mid-stream. |\n\n### Legacy XML tags (prompt-based, pre-Messages-API)\n\nThe retired prompt-based format used these tags. They are nested-element tags (no attributes), distinct from the modern attribute form (`<invoke name=\"…\">`). Verified against Anthropic's archived \"Legacy tool use\" doc (see [Sources](#sources)).\n\n| Tag | Role | Notes |\n| --- | --- | --- |\n| `<tools>` … `</tools>` | tool advertising | Container in the system prompt wrapping all `<tool_description>` entries. |\n| `<tool_description>` | tool advertising | One per tool: holds `<tool_name>`, `<description>`, `<parameters>`. |\n| `<tool_name>` | both | Function name (used in definitions, calls, and results). |\n| `<parameters>` / `<parameter>` | definition | `<parameters>` wraps `<parameter>` entries, each with `<name>`, `<type>`, `<description>`. |\n| `<function_calls>` | model output | Wraps one or more `<invoke>` blocks. |\n| `<invoke>` | model output | One function call; contains `<tool_name>` + a `<parameters>` block of `<paramName>value</paramName>` child tags. |\n| `<function_results>` | tool result (fed back) | Wraps `<result>` (success) or `<error>` (failure). |\n| `<result>` / `<stdout>` | tool result | `<result>` holds `<tool_name>` + `<stdout>`; the output text goes in `<stdout>`. |\n| `<error>` | tool result | Replaces `<result>` when the function raised. |\n| `</function_calls>` | stop sequence | Passed as `stop_sequence` so generation halts after a call. |\n| `<scratchpad>` / `<answer>` | model output | Conventionally used for chain-of-thought and final answer in legacy prompts. |\n\n---\n\n## Roles / channels / turn structure\n\nThe Messages API uses only two conversational roles, `user` and `assistant`, alternating. There is **no** dedicated `tool`/`function` role and **no** top-level `system` role — the system prompt is a separate top-level `system` parameter (string or text-block array). Tool data rides inside the normal roles:\n\n- `assistant` messages contain AI-generated `text`, `thinking`, and `tool_use` (and `server_tool_use`) blocks.\n- `user` messages contain your `text`/`image`/`document` content and `tool_result` blocks.\n\nThere are no named \"channels\". The closest analogue to a reasoning channel is the extended-thinking `thinking` content block (a first-class block with a cryptographic `signature`), kept separate from the user-visible `text` block. When thinking is enabled alongside tools, the `thinking` block(s) from a tool-calling turn must be passed back unmodified in the follow-up request.\n\nThe agentic loop is keyed on `stop_reason`:\n\n1. Send `tools` + the user message.\n2. Claude responds with `stop_reason: \"tool_use\"` and one or more `tool_use` blocks (optionally preceded by a `text` block).\n3. Execute each tool; build a `tool_result` block per call.\n4. Append the assistant message **and** a `user` message carrying all `tool_result` blocks; resend.\n5. Repeat while `stop_reason == \"tool_use\"`; exit on `end_turn` (or another terminal reason).\n\nStrict ordering rules (a 400 otherwise):\n- `tool_result` blocks must come **first** in the `user` message's `content` array (any text after them).\n- The `tool_result` `user` message must **immediately follow** the assistant `tool_use` message — nothing in between.\n- Every `tool_use.id` must be answered by a `tool_result.tool_use_id` in that next message.\n\n---\n\n## Tool definitions\n\nTools are passed in the top-level `tools` array. Each user-defined (client) tool is a **flat** object — no `{\"type\":\"function\", \"function\":{…}}` wrapper (that wrapper is OpenAI's). Fields:\n\n- `name` — matches `^[a-zA-Z0-9_-]{1,64}$`.\n- `description` — detailed plaintext (the single biggest driver of tool-call quality).\n- `input_schema` — a JSON Schema object (**not** `parameters`) describing the input the model must produce.\n- Optional: `input_examples`, `cache_control`, `strict`, `defer_loading`, `allowed_callers`.\n\n```json\n{\n \"name\": \"get_weather\",\n \"description\": \"Get the current weather in a given location\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"location\": {\n \"type\": \"string\",\n \"description\": \"The city and state, e.g. San Francisco, CA\"\n },\n \"unit\": {\n \"type\": \"string\",\n \"enum\": [\"celsius\", \"fahrenheit\"],\n \"description\": \"The unit of temperature, either 'celsius' or 'fahrenheit'\"\n }\n },\n \"required\": [\"location\"]\n }\n}\n```\n\nAnthropic-schema client tools (`bash`, `text_editor`, `computer`, `memory`) and server tools (`web_search`, `web_fetch`, `code_execution`, `tool_search`) instead carry a versioned `type`, e.g. `{\"type\": \"web_search_20250305\", \"name\": \"web_search\"}`.\n\n`tool_choice` controls invocation (four options):\n- `{\"type\":\"auto\"}` — model decides (default when `tools` present).\n- `{\"type\":\"any\"}` — must call some tool.\n- `{\"type\":\"tool\",\"name\":\"get_weather\"}` — must call that specific tool.\n- `{\"type\":\"none\"}` — no tools (default when no `tools`).\n\nWith `any` or `tool` the API prefills the assistant turn, so no leading natural-language text precedes the `tool_use` block. Add `\"disable_parallel_tool_use\": true` inside `tool_choice` to cap at one tool per turn. (Extended thinking only supports `auto`/`none`.)\n\n### How the API turns this into a prompt (the bridge to XML)\n\nWhen `tools` is present, the API constructs a tool-use system prompt with this skeleton (verified from \"Define tools\"):\n\n```text\nIn this environment you have access to a set of tools you can use to answer the user's question.\n{{ FORMATTING INSTRUCTIONS }}\nString and scalar parameters should be specified as is, while lists and objects should use JSON format. Note that spaces for string values are not stripped. The output is not expected to be valid XML and is parsed with regular expressions.\nHere are the functions available in JSONSchema format:\n{{ TOOL DEFINITIONS IN JSON SCHEMA }}\n{{ USER SYSTEM PROMPT }}\n{{ TOOL CONFIGURATION }}\n```\n\n`{{ TOOL DEFINITIONS IN JSON SCHEMA }}` is your `tools` array serialized to JSON Schema. `{{ FORMATTING INSTRUCTIONS }}` is the (unpublished) block teaching the model the XML syntax with `antml:` namespace prefixes (shown under [Tool-call format → underlying XML](#underlying-xml-modern-attribute-form-with-antml-namespace)). The note \"parsed with regular expressions\" is why output need not be well-formed XML.\n\n---\n\n## Tool-call format\n\nThe wire format your application consumes is JSON. A single call is one `tool_use` content block in the assistant message, with `stop_reason: \"tool_use\"` at the top level:\n\n```json\n{\n \"id\": \"msg_01Aq9w938a90dw8q\",\n \"type\": \"message\",\n \"role\": \"assistant\",\n \"model\": \"claude-opus-4-8\",\n \"content\": [\n {\n \"type\": \"text\",\n \"text\": \"I'll check the current weather in San Francisco for you.\"\n },\n {\n \"type\": \"tool_use\",\n \"id\": \"toolu_01A09q90qw90lq917835lq9\",\n \"name\": \"get_weather\",\n \"input\": { \"location\": \"San Francisco, CA\", \"unit\": \"celsius\" }\n }\n ],\n \"stop_reason\": \"tool_use\",\n \"stop_sequence\": null,\n \"usage\": { \"input_tokens\": 472, \"output_tokens\": 65 }\n}\n```\n\nKey facts for a parser:\n- `tool_use.input` is an already-parsed **object**, never a JSON string.\n- A leading `text` block is optional and informational; do not rely on its wording.\n- Match calls to results by `id` → `tool_use_id`.\n\n### Underlying XML (modern attribute form with antml: namespace)\n\nBefore the API converts it, the model literally emits an XML block. The current (Claude 3+) form is attribute-based:\n\n```text\n<function_calls>\n<invoke name=\"get_weather\">\n<parameter name=\"location\">San Francisco, CA</parameter>\n<parameter name=\"unit\">celsius</parameter>\n</invoke>\n</function_calls>\n```\n\nCurrent Claude models prefix these tags with an `antml:` XML namespace prefix (e.g. `antml:function_calls`, `antml:invoke name=\"…\"`, `antml:parameter name=\"…\"`). The API strips all of this and exposes only the JSON `tool_use` block; integrators should target the JSON, not the XML.\n\n---\n\n## Multiple / parallel tool calls\n\nParallel calls are the default. Claude emits **multiple `tool_use` blocks in a single assistant message**:\n\n```json\n{\n \"role\": \"assistant\",\n \"content\": [\n { \"type\": \"text\", \"text\": \"Let me check both cities.\" },\n {\n \"type\": \"tool_use\",\n \"id\": \"toolu_01weather_sf\",\n \"name\": \"get_weather\",\n \"input\": { \"location\": \"San Francisco, CA\" }\n },\n {\n \"type\": \"tool_use\",\n \"id\": \"toolu_02weather_nyc\",\n \"name\": \"get_weather\",\n \"input\": { \"location\": \"New York, NY\" }\n }\n ]\n}\n```\n\nYou return **all** results in **one** `user` message, one `tool_result` per call, results first:\n\n```json\n{\n \"role\": \"user\",\n \"content\": [\n {\n \"type\": \"tool_result\",\n \"tool_use_id\": \"toolu_01weather_sf\",\n \"content\": \"San Francisco: 68F, partly cloudy\"\n },\n {\n \"type\": \"tool_result\",\n \"tool_use_id\": \"toolu_02weather_nyc\",\n \"content\": \"New York: 45F, clear skies\"\n }\n ]\n}\n```\n\nCalls in one turn are **unordered** and may be run concurrently. If two batched calls turn out to depend on each other, return the natural error in a `tool_result` with `\"is_error\": true`; Claude reissues the dependent call on a later turn. (In the legacy XML format, parallelism is multiple `<invoke>` blocks inside one `<function_calls>`.)\n\n---\n\n## Tool-result format\n\nA result is a `tool_result` block inside a `user` message:\n\n- `tool_use_id` (required) — the `id` of the `tool_use` it answers.\n- `content` (optional) — a string, **or** an array of `text`/`image`/`document` blocks. Omit for an empty result.\n- `is_error` (optional) — `true` for execution failures; put a useful message in `content`.\n\n```json\n{\n \"role\": \"user\",\n \"content\": [\n {\n \"type\": \"tool_result\",\n \"tool_use_id\": \"toolu_01A09q90qw90lq917835lq9\",\n \"content\": \"15 degrees\"\n }\n ]\n}\n```\n\nError result:\n\n```json\n{\n \"role\": \"user\",\n \"content\": [\n {\n \"type\": \"tool_result\",\n \"tool_use_id\": \"toolu_01A09q90qw90lq917835lq9\",\n \"content\": \"ConnectionError: the weather service API is not available (HTTP 500)\",\n \"is_error\": true\n }\n ]\n}\n```\n\nRich result (text + image blocks):\n\n```json\n{\n \"role\": \"user\",\n \"content\": [\n {\n \"type\": \"tool_result\",\n \"tool_use_id\": \"toolu_01A09q90qw90lq917835lq9\",\n \"content\": [\n { \"type\": \"text\", \"text\": \"15 degrees\" },\n {\n \"type\": \"image\",\n \"source\": { \"type\": \"base64\", \"media_type\": \"image/jpeg\", \"data\": \"/9j/4AAQSkZJRg...\" }\n }\n ]\n }\n ]\n}\n```\n\nServer tools require **no** `tool_result` from you — Anthropic executes them and injects the result inline in the assistant turn. (Legacy XML feeds results back as `<function_results><result><tool_name>…</tool_name><stdout>…</stdout></result></function_results>`, or `<error>…</error>` on failure.)\n\n---\n\n## End-to-end example\n\nA complete multi-turn weather exchange. All JSON is valid.\n\n**Request 1 — system + tools + user question:**\n\n```json\n{\n \"model\": \"claude-opus-4-8\",\n \"max_tokens\": 1024,\n \"system\": \"You are a helpful weather assistant. Use the provided tools to answer.\",\n \"tools\": [\n {\n \"name\": \"get_weather\",\n \"description\": \"Get the current weather in a given location\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"location\": { \"type\": \"string\", \"description\": \"The city and state, e.g. San Francisco, CA\" },\n \"unit\": { \"type\": \"string\", \"enum\": [\"celsius\", \"fahrenheit\"], \"description\": \"Unit for the temperature\" }\n },\n \"required\": [\"location\"]\n }\n }\n ],\n \"messages\": [\n { \"role\": \"user\", \"content\": \"What's the weather in San Francisco?\" }\n ]\n}\n```\n\n**Response 1 — assistant requests the tool (`stop_reason: \"tool_use\"`):**\n\n```json\n{\n \"id\": \"msg_01Aq9w938a90dw8q\",\n \"type\": \"message\",\n \"role\": \"assistant\",\n \"model\": \"claude-opus-4-8\",\n \"content\": [\n { \"type\": \"text\", \"text\": \"I'll check the current weather in San Francisco for you.\" },\n {\n \"type\": \"tool_use\",\n \"id\": \"toolu_01A09q90qw90lq917835lq9\",\n \"name\": \"get_weather\",\n \"input\": { \"location\": \"San Francisco, CA\", \"unit\": \"celsius\" }\n }\n ],\n \"stop_reason\": \"tool_use\",\n \"stop_sequence\": null,\n \"usage\": { \"input_tokens\": 472, \"output_tokens\": 65 }\n}\n```\n\n**Request 2 — replay history, append the assistant turn and the `tool_result`:**\n\n```json\n{\n \"model\": \"claude-opus-4-8\",\n \"max_tokens\": 1024,\n \"system\": \"You are a helpful weather assistant. Use the provided tools to answer.\",\n \"tools\": [\n {\n \"name\": \"get_weather\",\n \"description\": \"Get the current weather in a given location\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"location\": { \"type\": \"string\", \"description\": \"The city and state, e.g. San Francisco, CA\" },\n \"unit\": { \"type\": \"string\", \"enum\": [\"celsius\", \"fahrenheit\"], \"description\": \"Unit for the temperature\" }\n },\n \"required\": [\"location\"]\n }\n }\n ],\n \"messages\": [\n { \"role\": \"user\", \"content\": \"What's the weather in San Francisco?\" },\n {\n \"role\": \"assistant\",\n \"content\": [\n { \"type\": \"text\", \"text\": \"I'll check the current weather in San Francisco for you.\" },\n {\n \"type\": \"tool_use\",\n \"id\": \"toolu_01A09q90qw90lq917835lq9\",\n \"name\": \"get_weather\",\n \"input\": { \"location\": \"San Francisco, CA\", \"unit\": \"celsius\" }\n }\n ]\n },\n {\n \"role\": \"user\",\n \"content\": [\n {\n \"type\": \"tool_result\",\n \"tool_use_id\": \"toolu_01A09q90qw90lq917835lq9\",\n \"content\": \"15 degrees Celsius, partly cloudy\"\n }\n ]\n }\n ]\n}\n```\n\n**Response 2 — assistant's final answer (`stop_reason: \"end_turn\"`):**\n\n```json\n{\n \"id\": \"msg_01EeFG3hijk2lmno4PqrSt\",\n \"type\": \"message\",\n \"role\": \"assistant\",\n \"model\": \"claude-opus-4-8\",\n \"content\": [\n { \"type\": \"text\", \"text\": \"It's currently 15 degrees Celsius and partly cloudy in San Francisco.\" }\n ],\n \"stop_reason\": \"end_turn\",\n \"stop_sequence\": null,\n \"usage\": { \"input_tokens\": 530, \"output_tokens\": 18 }\n}\n```\n\n### Streaming (SSE) shape of the tool call\n\nThe same tool call, streamed. Note `tool_use` opens with an empty `input`, the arguments arrive as `input_json_delta.partial_json` fragments, and the final `stop_reason` lands in `message_delta`. This block is reproduced verbatim from Anthropic's streaming docs:\n\n```text\nevent: message_start\ndata: {\"type\":\"message_start\",\"message\":{\"id\":\"msg_014p7gG3wDgGV9EUtLvnow3U\",\"type\":\"message\",\"role\":\"assistant\",\"model\":\"claude-opus-4-8\",\"stop_sequence\":null,\"usage\":{\"input_tokens\":472,\"output_tokens\":2},\"content\":[],\"stop_reason\":null}}\n\nevent: content_block_start\ndata: {\"type\":\"content_block_start\",\"index\":0,\"content_block\":{\"type\":\"text\",\"text\":\"\"}}\n\nevent: ping\ndata: {\"type\": \"ping\"}\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"text_delta\",\"text\":\"Okay\"}}\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"text_delta\",\"text\":\" let\"}}\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"text_delta\",\"text\":\"'s\"}}\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"text_delta\",\"text\":\" check\"}}\n\nevent: content_block_stop\ndata: {\"type\":\"content_block_stop\",\"index\":0}\n\nevent: content_block_start\ndata: {\"type\":\"content_block_start\",\"index\":1,\"content_block\":{\"type\":\"tool_use\",\"id\":\"toolu_01T1x1fJ34qAmk2tNTrN7Up6\",\"name\":\"get_weather\",\"input\":{}}}\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":1,\"delta\":{\"type\":\"input_json_delta\",\"partial_json\":\"\"}}\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":1,\"delta\":{\"type\":\"input_json_delta\",\"partial_json\":\"{\\\"location\\\":\"}}\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":1,\"delta\":{\"type\":\"input_json_delta\",\"partial_json\":\" \\\"San\"}}\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":1,\"delta\":{\"type\":\"input_json_delta\",\"partial_json\":\" Francisc\"}}\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":1,\"delta\":{\"type\":\"input_json_delta\",\"partial_json\":\"o,\"}}\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":1,\"delta\":{\"type\":\"input_json_delta\",\"partial_json\":\" CA\\\"}\"}}\n\nevent: content_block_stop\ndata: {\"type\":\"content_block_stop\",\"index\":1}\n\nevent: message_delta\ndata: {\"type\":\"message_delta\",\"delta\":{\"stop_reason\":\"tool_use\",\"stop_sequence\":null},\"usage\":{\"output_tokens\":89}}\n\nevent: message_stop\ndata: {\"type\":\"message_stop\"}\n```\n\nReassembly: concatenate every `partial_json` for a given `index` (`\"\" + \"{\\\"location\\\":\" + \" \\\"San\" + \" Francisc\" + \"o,\" + \" CA\\\"}\"` → `{\"location\": \"San Francisco, CA\"}`), then `JSON.parse` at that block's `content_block_stop`. Tool use also supports fine-grained streaming (`eager_input_streaming` per tool) for finer `partial_json` chunking.\n\n---\n\n## OpenAI-compatible API mapping\n\nAnthropic integrates tools into the `user`/`assistant` message structure rather than using OpenAI's separate `tool` role and `function` wrapper. Field-by-field:\n\n| Concept | Anthropic Messages API | OpenAI Chat Completions |\n| --- | --- | --- |\n| Tool definition wrapper | flat `{\"name\",\"description\",\"input_schema\"}` in `tools[]` | `{\"type\":\"function\",\"function\":{\"name\",\"description\",\"parameters\"}}` in `tools[]` |\n| Tool schema key | `input_schema` (JSON Schema) | `parameters` (JSON Schema) |\n| \"Must call a tool\" | `tool_choice:{\"type\":\"any\"}` / `{\"type\":\"tool\",\"name\":…}` | `tool_choice:\"required\"` / `{\"type\":\"function\",\"function\":{\"name\":…}}` |\n| Disable parallel calls | `tool_choice:{…,\"disable_parallel_tool_use\":true}` | `parallel_tool_calls:false` (top level) |\n| Assistant call container | `tool_use` **content block** in `content[]` | `tool_calls[]` on the assistant `message` |\n| Call id | `tool_use.id` = `toolu_…` | `tool_calls[].id` = `call_…` |\n| Function name | `tool_use.name` | `tool_calls[].function.name` |\n| Function arguments | `tool_use.input` = **nested JSON object** (parsed) | `tool_calls[].function.arguments` = **JSON string** (must `JSON.parse`) |\n| \"Tools were called\" signal | `stop_reason:\"tool_use\"` | `finish_reason:\"tool_calls\"` |\n| Result message role | `user` message containing `tool_result` block(s) | dedicated `{\"role\":\"tool\",…}` message(s) |\n| Result ↔ call linkage | `tool_result.tool_use_id` | `tool` message `tool_call_id` |\n| Result payload | `tool_result.content` = string **or** block array (text/image/document) | `tool` message `content` = string |\n| Error result | `tool_result` with `is_error:true` | no dedicated flag; encode in `content` |\n| System prompt | top-level `system` param (no `system` role) | `{\"role\":\"system\",…}` message |\n| Streamed args | `input_json_delta.partial_json` fragments | `tool_calls[].function.arguments` string deltas |\n\nConversion gotchas:\n- **Object vs string:** to emit OpenAI shape, `JSON.stringify(tool_use.input)`; to consume OpenAI shape into Anthropic, `JSON.parse(arguments)`.\n- **Role reshaping:** collapse N OpenAI `tool` messages into one Anthropic `user` message of N `tool_result` blocks (order them before any text), and vice-versa.\n- **No `type:\"function\"`** wrapper on Anthropic custom tools; add/remove it when translating.\n- Id prefixes differ (`toolu_` vs `call_`); never assume one format's id is valid in the other.\n\n---\n\n## Parsing notes & gotchas\n\n- **`input` is an object, not a string.** Unlike OpenAI's `arguments`, do not `JSON.parse` `tool_use.input` from a non-streamed response — it is already an object. Only the *streaming* `partial_json` fragments are strings.\n- **Streaming tool args need reassembly.** `content_block_start` for a `tool_use` always has `input: {}`. Buffer `partial_json` per `index` and parse only at `content_block_stop`; mid-stream fragments are not valid JSON on their own (e.g. `{\"location\":`). Current models emit one complete key/value at a time, so expect bursts and gaps.\n- **`stop_reason` placement.** In streaming, `stop_reason` is `null` in `message_start` and final value (`\"tool_use\"`/`\"end_turn\"`) arrives in `message_delta`, not `message_stop`. `usage` in `message_delta` is **cumulative**.\n- **Ordering is enforced.** `tool_result` blocks must be first in their `user` message and must immediately follow the assistant `tool_use` message; every `tool_use.id` needs a matching `tool_result.tool_use_id`, or you get HTTP 400 (\"tool_use ids were found without tool_result blocks immediately after\").\n- **`tool_choice:any`/`tool` suppress preamble.** The API prefills the assistant turn, so no leading `text` block appears before `tool_use` — don't write a parser that expects explanatory text.\n- **Parallel results in one message.** Splitting parallel `tool_result`s across multiple `user` messages breaks the contract; send them together.\n- **Treat result content as untrusted.** Tool results can carry indirect prompt injection; keep them inside `tool_result` blocks, never promote to `system`/`user` text.\n- **Server tools differ.** `server_tool_use` / `web_search_tool_result` blocks are produced and consumed by Anthropic; never synthesize `tool_result` for them. `stop_reason:\"pause_turn\"` means resend the response as-is to let a long server-tool turn continue.\n- **Extended thinking + tools.** Preserve `thinking`/`redacted_thinking` blocks (with their `signature`) verbatim across turns; forced `tool_choice` (`any`/`tool`) is rejected when thinking is on.\n- **Output is not valid XML.** The underlying model output is parsed by Anthropic with regular expressions, not an XML parser (\"The output is not expected to be valid XML\"). If you reconstruct prompts at token level, do not assume well-formedness; rely on the JSON the API returns.\n- **Legacy vs modern XML are different tag sets.** Legacy: `<invoke>` + child `<tool_name>` + `<parameters>` with per-name child tags; results in `<function_results>/<result>/<stdout>`. Modern: `<invoke name=\"…\">` + `<parameter name=\"…\">`. Mixing them up will misparse. The legacy format also required passing `</function_calls>` as a `stop_sequence` and is not optimized for Claude 3+.\n\n### Legacy XML format (secondary, prompt-based — fully verified, now retired)\n\nBefore the Messages API, tools were defined and called entirely in the prompt. Anthropic's archived \"Legacy tool use\" doc specifies it verbatim.\n\nTool definition (inside a `<tools>` block in the system prompt):\n\n```text\n<tool_description>\n<tool_name>get_weather</tool_name>\n<description>\nRetrieves the current weather for a specified location.\nReturns a dictionary with two fields:\n- temperature: float, the current temperature in Fahrenheit\n- conditions: string, a brief description of the current weather conditions\nRaises ValueError if the provided location cannot be found.\n</description>\n<parameters>\n<parameter>\n<name>location</name>\n<type>string</type>\n<description>The city and state, e.g. San Francisco, CA</description>\n</parameter>\n</parameters>\n</tool_description>\n```\n\nModel-emitted call (multiple `<invoke>` for parallel calls; pass `</function_calls>` as a `stop_sequence`):\n\n```text\n<function_calls>\n<invoke>\n<tool_name>get_weather</tool_name>\n<parameters>\n<location>San Francisco, CA</location>\n</parameters>\n</invoke>\n</function_calls>\n```\n\nResult fed back into the next user turn:\n\n```text\n<function_results>\n<result>\n<tool_name>get_weather</tool_name>\n<stdout>\n59 degrees Fahrenheit, partly cloudy\n</stdout>\n</result>\n</function_results>\n```\n\nError result:\n\n```text\n<function_results>\n<error>\nerror message goes here\n</error>\n</function_results>\n```\n\nThe legacy system-prompt preamble (verbatim from the archived doc) was:\n\n```text\nIn this environment you have access to a set of tools you can use to answer the user's question.\nYou may call them like this:\n<function_calls>\n<invoke>\n<tool_name>$TOOL_NAME</tool_name>\n<parameters>\n<$PARAMETER_NAME>$PARAMETER_VALUE</$PARAMETER_NAME>\n...\n</parameters>\n</invoke>\n</function_calls>\n\nHere are the tools available:\n<tools>\n...one <tool_description> per tool...\n</tools>\n```\n\nLegacy notes: no built-in tools (everything is prompt-defined); Anthropic recommended ≤3–5 tools; the model conventionally wrapped reasoning in `<scratchpad>` and final output in `<answer>`. This format is \"out of date\" and \"not optimized for Claude 3\" — use the JSON Messages API for anything current.\n\n---\n\n## Sources\n\n- Tool use overview — https://docs.claude.com/en/docs/agents-and-tools/tool-use/overview\n- How tool use works — https://docs.claude.com/en/docs/agents-and-tools/tool-use/how-tool-use-works\n- Define tools (tool schema, `input_schema`, `tool_choice`, constructed system prompt) — https://docs.claude.com/en/docs/agents-and-tools/tool-use/define-tools\n- Handle tool calls (`tool_use`/`tool_result`, `is_error`, ordering rules) — https://docs.claude.com/en/docs/agents-and-tools/tool-use/handle-tool-calls\n- Parallel tool use — https://docs.claude.com/en/docs/agents-and-tools/tool-use/parallel-tool-use\n- Streaming messages (SSE events, `input_json_delta`, verbatim tool-use stream) — https://docs.claude.com/en/docs/build-with-claude/streaming\n- Messages API reference (`stop_reason` enum, response shape, `tools`) — https://docs.claude.com/en/api/messages\n- Legacy tool use (archived; verbatim XML tags and prompt) — https://web.archive.org/web/20240528231249/https://docs.anthropic.com/en/docs/legacy-tool-use ; also live localized copies, e.g. https://docs.anthropic.com/de/docs/legacy-tool-use (English path now redirects to the tool-use overview)\n",
77
+ "toolconv/deepseek.md": "# DeepSeek tool-calling wire format\n\nDeepSeek's chat models (DeepSeek-V3, V3-0324, R1, R1-0528, and DeepSeek-V3.1) share a\nsingle tokenizer family and a distinctive envelope built from **fullwidth-pipe** special\ntokens such as `<|begin▁of▁sentence|>` and `<|User|>`. Tool calling is emitted as a run\nof dedicated special tokens (`<|tool▁calls▁begin|>` … `<|tool▁calls▁end|>`) rather than\nJSON-in-text or XML. This document centers on **DeepSeek-V3.1** (the current hybrid\nthinking/non-thinking model) and documents the older **DeepSeek-V3-0324** and\n**DeepSeek-R1-0528** format as an explicit version difference, because their on-the-wire\ntool syntax is *not* the same as V3.1's.\n\nAn inference server enables it with a chat template plus a tool-call parser:\n\n- vLLM V3.1: `--enable-auto-tool-choice --tool-call-parser deepseek_v31 --chat-template examples/tool_chat_template_deepseekv31.jinja` (optionally `--reasoning-parser deepseek_r1`).\n- vLLM V3-0324 / R1-0528: `--enable-auto-tool-choice --tool-call-parser deepseek_v3 --chat-template examples/tool_chat_template_deepseekv3.jinja` (V3-0324) or `tool_chat_template_deepseekr1.jinja` (R1-0528).\n- The model's own `tokenizer_config.json` `chat_template` (and the identical `assets/chat_template.jinja`) renders the V3.1 envelope, tool calls, and tool outputs; it does **not** synthesize the `## Tools` advertisement block, so vLLM ships a template that does (see below).\n\n> Verified against: the DeepSeek-V3.1 model card \"Chat Template\" / \"ToolCall\" sections, the\n> byte-identical `chat_template` in `tokenizer_config.json` and `assets/chat_template.jinja`,\n> the `added_tokens` in `tokenizer.json` (token IDs), `config.json` (bos/eos IDs), the\n> DeepSeek-V3-0324 and DeepSeek-R1-0528 `tokenizer_config.json` chat templates, the vLLM\n> `tool_chat_template_deepseekv31.jinja`, and the vLLM tool-calling / reasoning-outputs docs.\n\n## A note on the unusual Unicode (do not substitute ASCII)\n\nDeepSeek's markers do **not** use the ASCII vertical bar `|` (U+007C) or ASCII underscore\n`_`. They use:\n\n- `|` — **U+FF5C FULLWIDTH VERTICAL LINE**, as the delimiter just inside the angle brackets.\n- `▁` — **U+2581 LOWER ONE EIGHTH BLOCK** (the SentencePiece word-boundary glyph), as the\n separator *between words* inside a token, e.g. `begin▁of▁sentence`, `tool▁calls▁begin`.\n\nSo `<|tool▁calls▁begin|>` is `<` + `|`(FF5C) + `tool` + `▁`(2581) + `calls` + `▁`(2581) +\n`begin` + `|`(FF5C) + `>`. Copying these tokens as `<|tool_calls_begin|>` (ASCII pipe +\nunderscore) produces tokens the model never trained on and will silently break parsing and\ngeneration. The only DeepSeek markers that use ASCII brackets are the thinking tags\n`<think>` / `</think>` (plain `<`, `/`, `>`) and the rarely used `<|EOT|>` (ASCII pipes).\n\n## Special tokens\n\nToken IDs are from DeepSeek-V3.1 `tokenizer.json` (`added_tokens`); `vocab_size` is 129280.\nThe `special` column reflects the tokenizer's `\"special\"` flag (it governs\n`skip_special_tokens`); note that the role/think/tool markers are `special: false`.\n\n| Token (verbatim) | ID | `special` | Purpose |\n| --- | --- | --- | --- |\n| `<|begin▁of▁sentence|>` | 0 | true | BOS; prepended once at the very start of the prompt. |\n| `<|end▁of▁sentence|>` | 1 | true | EOS; ends every assistant/tool turn and is the stop token. |\n| `<|▁pad▁|>` | 2 | true | Padding (`pad_token`; the model card/config also reuse EOS as pad). |\n| `<|search▁begin|>` | 128796 | false | Search-agent query open (thinking-mode search tool). |\n| `<|search▁end|>` | 128797 | false | Search-agent query close. |\n| `<think>` | 128798 | false | Opens the reasoning/thinking span. ASCII brackets. |\n| `</think>` | 128799 | false | Closes the reasoning span; **also emitted in non-thinking mode** (see below). |\n| `<|fim▁hole|>` / `<|fim▁begin|>` / `<|fim▁end|>` | 128800–128802 | false | Fill-in-the-middle (not chat). |\n| `<|User|>` | 128803 | false | User role marker. |\n| `<|Assistant|>` | 128804 | false | Assistant role marker. |\n| `<\\|EOT\\|>` | 128805 | true | End-of-turn (legacy; ASCII pipes, rarely used in chat). |\n| `<|tool▁calls▁begin|>` | 128806 | false | Opens the assistant's batch of tool calls. |\n| `<|tool▁calls▁end|>` | 128807 | false | Closes the batch of tool calls. |\n| `<|tool▁call▁begin|>` | 128808 | false | Opens a single tool call inside the batch. |\n| `<|tool▁call▁end|>` | 128809 | false | Closes a single tool call. |\n| `<|tool▁outputs▁begin|>` | 128810 | false | Opens a batch of tool results (**R1-0528 / V3-0324 only**). |\n| `<|tool▁outputs▁end|>` | 128811 | false | Closes a batch of tool results (**R1-0528 / V3-0324 only**). |\n| `<|tool▁output▁begin|>` | 128812 | false | Opens a single tool result. |\n| `<|tool▁output▁end|>` | 128813 | false | Closes a single tool result. |\n| `<|tool▁sep|>` | 128814 | false | Separator inside a tool call (between name and arguments). |\n\n`config.json` confirms `bos_token_id: 0`, `eos_token_id: 1`.\n\n## Roles / channels / turn structure\n\nThere is no OpenAI-style `system`/`developer` channel token. Roles are inline markers and\nthe prompt is one flat string:\n\n```text\n<|begin▁of▁sentence|>{system_prompt}<|User|>{query}<|Assistant|>{response}<|end▁of▁sentence|>\n```\n\n- **System prompt** has no marker. All `system` messages are concatenated (joined with\n `\\n\\n` when there are several) and emitted immediately after `<|begin▁of▁sentence|>`,\n before the first `<|User|>`. When tools are present the `## Tools` block is appended to\n this system text (separated by `\\n\\n`).\n- **User turn**: `<|User|>` + content. (No EOS after the user text in V3.1; the assistant\n marker follows directly.)\n- **Assistant turn**: opens with `<|Assistant|>`, then a thinking tag, then content, then\n `<|end▁of▁sentence|>`.\n- **Thinking vs non-thinking (V3.1 hybrid)** — selected by the template, not by the model:\n - Non-thinking generation prefix: `…<|Assistant|></think>` — the model starts *after* a\n `</think>` it never had to open. Unlike DeepSeek-V3, V3.1 always injects this `</think>`.\n - Thinking generation prefix: `…<|Assistant|><think>` — the model emits its chain of\n thought, closes with `</think>`, then the answer.\n - In multi-turn context, **every** stored assistant turn keeps a `</think>`; only the last\n turn's leading thinking tag reflects the requested mode. When rendering a stored\n assistant message, any text up to and including `</think>` is stripped from `content`\n before re-emitting (the template does `content.split('</think>', 1)[1]`).\n- **Tool calling runs in non-thinking mode.** The model card states \"Toolcall is supported\n in non-thinking mode,\" and the V3.1 tool template opens the tool-call turn with\n `<|Assistant|></think>`. With vLLM, V3.1 reasoning is disabled by default; enable it via\n `chat_template_kwargs={\"thinking\": true}`.\n- **Search-agent channel**: a separate thinking-mode protocol using `<|search▁begin|>` /\n `<|search▁end|>` (see the model card's `assets/search_tool_trajectory.html`); out of\n scope for ordinary function calling.\n\n## Tool definitions\n\nTools are advertised as a **Markdown block injected into the system area** (after the system\nprompt, before the first `<|User|>`). The chat template in `tokenizer_config.json` does not\nbuild this block from a `tools=[…]` argument; the caller (or vLLM's\n`tool_chat_template_deepseekv31.jinja`) constructs it. Reproduced verbatim from the\nDeepSeek-V3.1 model card, the full layout is\n`<|begin▁of▁sentence|>{system prompt}\\n\\n{tool_description}<|User|>{query}<|Assistant|></think>`\nwhere `{tool_description}` is:\n\n```text\n## Tools\nYou have access to the following tools:\n\n### {tool_name1}\nDescription: {description}\n\nParameters: {json.dumps(parameters)}\n\nIMPORTANT: ALWAYS adhere to this exact format for tool use:\n<|tool▁calls▁begin|><|tool▁call▁begin|>tool_call_name<|tool▁sep|>tool_call_arguments<|tool▁call▁end|>{additional_tool_calls}<|tool▁calls▁end|>\n\nWhere:\n- `tool_call_name` must be an exact match to one of the available tools\n- `tool_call_arguments` must be valid JSON that strictly follows the tool's Parameters Schema\n- For multiple tool calls, chain them directly without separators or spaces\n```\n\nEach tool contributes one `### {name}` section with a `Description:` line and a\n`Parameters: {…}` line whose value is the compact JSON of the JSON-Schema parameters object\n(`json.dumps(parameters)` in the card, `parameters | tojson` in vLLM's template). The\n`IMPORTANT:` instruction block is appended once, after the last tool.\n\n## Tool-call format\n\nThe model emits one batch wrapper containing one or more calls. Each call is\n`name <|tool▁sep|> arguments`, where **arguments is a raw JSON object string** (no code\nfence). Minimal single call (what the model generates after the `<|Assistant|></think>`\nprefix):\n\n```text\n<|tool▁calls▁begin|><|tool▁call▁begin|>get_weather<|tool▁sep|>{\"location\": \"San Francisco, CA\"}<|tool▁call▁end|><|tool▁calls▁end|>\n```\n\nGrammar (V3.1):\n\n```text\n<|tool▁calls▁begin|><|tool▁call▁begin|>{name}<|tool▁sep|>{json_args}<|tool▁call▁end|>{…more calls…}<|tool▁calls▁end|>\n```\n\n- `{name}` must exactly match an advertised tool name. It comes **first**, immediately after\n `<|tool▁call▁begin|>`.\n- `{json_args}` is valid JSON conforming to the tool's parameter schema, inlined directly.\n- The whole assistant turn is then closed by the template/server with\n `<|end▁of▁sentence|>`.\n\n(V3.1 has **no** `type` field and **no** ` ```json ` fence around arguments — that is the\nolder R1/V3-0324 convention; see Version differences.)\n\n## Multiple / parallel tool calls\n\nAll calls live inside one `<|tool▁calls▁begin|>…<|tool▁calls▁end|>` wrapper. After the\nfirst `<|tool▁call▁begin|>…<|tool▁call▁end|>`, each additional call is **another\n`<|tool▁call▁begin|>…<|tool▁call▁end|>` chained directly, with no separator, newline, or\nspace between calls** (the card: \"chain them directly without separators or spaces\"):\n\n```text\n<|tool▁calls▁begin|><|tool▁call▁begin|>get_weather<|tool▁sep|>{\"location\": \"San Francisco, CA\"}<|tool▁call▁end|><|tool▁call▁begin|>get_weather<|tool▁sep|>{\"location\": \"Seattle, WA\"}<|tool▁call▁end|><|tool▁calls▁end|>\n```\n\nNote that `<|tool▁calls▁begin|>` (plural, id 128806) appears exactly once; each call uses\nthe singular `<|tool▁call▁begin|>` (id 128808) / `<|tool▁call▁end|>` (id 128809).\n\n## Tool-result format\n\nExecuted results are fed back as `tool`-role messages. In **V3.1** each result is wrapped in\nthe singular output tokens, with **no** plural `<|tool▁outputs▁…|>` wrapper, emitted right\nafter the assistant tool-call turn's `<|end▁of▁sentence|>`:\n\n```text\n<|tool▁output▁begin|>{result_text}<|tool▁output▁end|>\n```\n\n`{result_text}` is the raw tool output (typically a JSON string, but any text). For multiple\nresults, the V3.1 template emits one `<|tool▁output▁begin|>…<|tool▁output▁end|>` per `tool`\nmessage, concatenated directly. There is **no tool-call ID in the wire format** — results are\nmatched to calls **positionally** (order of outputs ↔ order of calls).\n\nThe model then produces its final answer **directly after `<|tool▁output▁end|>`** with no\n`<|Assistant|>` marker and no `</think>` (see Parsing notes — the V3.1 reference template\ndeliberately renders post-tool assistant content as just `content<|end▁of▁sentence|>`).\n\n> R1-0528 / V3-0324 differ: results are enclosed in a `<|tool▁outputs▁begin|>` …\n> `<|tool▁outputs▁end|>` batch wrapper, with each result as\n> `<|tool▁output▁begin|>…<|tool▁output▁end|>` and multiple results newline-separated.\n\n## End-to-end example\n\nA complete DeepSeek-V3.1 **non-thinking** multi-turn exchange. Everything is one flat string;\ninline `←` comments mark where the model's generation begins (they are not part of the\nstream). Whitespace inside the `## Tools` block is literal newlines.\n\n```text\n<|begin▁of▁sentence|>You are a helpful assistant.\n\n## Tools\nYou have access to the following tools:\n\n### get_weather\nDescription: Get the current weather for a location\n\nParameters: {\"type\": \"object\", \"properties\": {\"location\": {\"type\": \"string\", \"description\": \"City and state, e.g. San Francisco, CA\"}, \"unit\": {\"type\": \"string\", \"enum\": [\"celsius\", \"fahrenheit\"]}}, \"required\": [\"location\"]}\n\nIMPORTANT: ALWAYS adhere to this exact format for tool use:\n<|tool▁calls▁begin|><|tool▁call▁begin|>tool_call_name<|tool▁sep|>tool_call_arguments<|tool▁call▁end|>{additional_tool_calls}<|tool▁calls▁end|>\n\nWhere:\n- `tool_call_name` must be an exact match to one of the available tools\n- `tool_call_arguments` must be valid JSON that strictly follows the tool's Parameters Schema\n- For multiple tool calls, chain them directly without separators or spaces\n<|User|>What's the weather in San Francisco?<|Assistant|></think><|tool▁calls▁begin|><|tool▁call▁begin|>get_weather<|tool▁sep|>{\"location\": \"San Francisco, CA\", \"unit\": \"celsius\"}<|tool▁call▁end|><|tool▁calls▁end|><|end▁of▁sentence|><|tool▁output▁begin|>{\"temperature\": 18, \"unit\": \"celsius\", \"condition\": \"Foggy\"}<|tool▁output▁end|>It's currently 18°C and foggy in San Francisco.<|end▁of▁sentence|>\n```\n\nReading the spans:\n\n1. `<|begin▁of▁sentence|>` + system text + `\\n\\n` + `## Tools…` block — prompt prefix.\n2. `<|User|>What's the weather in San Francisco?` — user turn.\n3. `<|Assistant|></think>` — non-thinking generation prefix (prompt). **Model generates from here.**\n4. `<|tool▁calls▁begin|>…<|tool▁calls▁end|>` — the model's tool call; server appends `<|end▁of▁sentence|>` and stops with `finish_reason: \"tool_calls\"`.\n5. `<|tool▁output▁begin|>…<|tool▁output▁end|>` — your executed result, appended to the prompt.\n6. `It's currently 18°C and foggy in San Francisco.<|end▁of▁sentence|>` — **the model generates the final answer directly after the tool output** (no new `<|Assistant|>` marker), ending with EOS.\n\n## OpenAI-compatible API mapping\n\nWhen fronted by an OpenAI-compatible server (e.g. vLLM with `--tool-call-parser\ndeepseek_v31`):\n\n- **`finish_reason`**: `\"tool_calls\"` when the model emitted a `<|tool▁calls▁begin|>…`\n batch; otherwise `\"stop\"`.\n- **`message.tool_calls[]`**: one element per `<|tool▁call▁begin|>…<|tool▁call▁end|>`.\n - `.type` = `\"function\"`.\n - `.function.name` = the text between `<|tool▁call▁begin|>` and `<|tool▁sep|>`.\n - `.function.arguments` = the text between `<|tool▁sep|>` and `<|tool▁call▁end|>`, returned\n as a **JSON string** (per the OpenAI spec), not a nested object. The model already emits\n raw JSON there, so it is passed through.\n - `.id` = **synthesized by the server** (e.g. `chatcmpl-tool-…`). DeepSeek's wire format\n carries no call ID.\n- **Tool result messages**: `{\"role\": \"tool\", \"tool_call_id\": \"<id>\", \"content\": \"<result>\"}`.\n The server renders `content` into `<|tool▁output▁begin|>…<|tool▁output▁end|>`. Because the\n prompt has no IDs, `tool_call_id` is used only for client-side bookkeeping; **the model\n relies on ordering**, so preserve the order of results relative to the calls.\n- **Assistant replay**: when you send a prior assistant turn back with `tool_calls`, the\n template inlines `function.arguments`. The HF reference template inlines it **verbatim**\n (assumes it is already a JSON string); vLLM's `tool_chat_template_deepseekv31.jinja` pipes\n it through `| tojson`. Send `arguments` as a JSON **string** per the OpenAI spec (see the\n gotcha below about double-encoding).\n\n## Parsing notes & gotchas\n\n- **Unicode is load-bearing.** Match `|` = U+FF5C and `▁` = U+2581 exactly. ASCII\n `<|tool_calls_begin|>` will not tokenize to the special tokens. `<think>`/`</think>` use\n ASCII brackets; the rare `<|EOT|>` uses ASCII pipes.\n- **Tool/role markers are `special: false`.** Only `<|begin▁of▁sentence|>`,\n `<|end▁of▁sentence|>`, `<|▁pad▁|>`, and `<|EOT|>` are flagged `special: true`. So\n decoding with `skip_special_tokens=True` will **not** strip `<|tool▁calls▁begin|>`,\n `<|tool▁sep|>`, `<|Assistant|>`, `</think>`, etc. — they remain in the decoded string for\n the parser to find. (Conversely, do not assume special-token filtering removes them.)\n- **No code fence / no `type` field in V3.1.** A parser written for R1/V3-0324\n (`function<|tool▁sep|>name` + ` ```json ` block) will not parse V3.1, and vice-versa.\n V3.1 is `name<|tool▁sep|>raw_json`.\n- **Chaining has no delimiter in V3.1.** Calls abut directly:\n `…<|tool▁call▁end|><|tool▁call▁begin|>…`. Do not split on newlines/whitespace; split on\n the `<|tool▁call▁begin|>` / `<|tool▁call▁end|>` boundaries. (R1/V3-0324 put a `\\n` before\n each subsequent call.)\n- **No tool-call IDs on the wire.** Match results to calls by position. A server must\n generate synthetic `tool_call_id`s for the OpenAI shape.\n- **`</think>` appears even in non-thinking mode.** Strip the leading `</think>` (and any\n preceding reasoning) before treating the remainder as the visible answer; the template does\n `content.split('</think>', 1)[1]` when replaying stored turns.\n- **Post-tool generation prompt quirk.** The reference V3.1 chat template only appends the\n `<|Assistant|></think>` generation prefix when the **last message is `user`**. After a\n `tool` message it appends nothing and the model continues straight after\n `<|tool▁output▁end|>`. Agent loops that re-template a conversation ending in a tool result\n must not expect (or double-insert) an assistant marker there.\n- **`arguments` double-encoding risk.** On replay, vLLM's example template applies\n `arguments | tojson`. If `arguments` is already a JSON string (the OpenAI convention), that\n pipe will JSON-encode the string again (wrapping it in quotes and escaping it). Pass an\n object where the template expects `| tojson`, or a string where the template inlines\n verbatim — match the template you actually run.\n- **Streaming.** Tool calls arrive token-by-token; the name is complete only at\n `<|tool▁sep|>`, and arguments are partial JSON until `<|tool▁call▁end|>`. Buffer per call\n boundary; do not attempt to `json.loads` arguments before the closing tool-call token.\n- **Malformed output.** With `tool_choice=\"auto\"` and no structural-tag constraint\n (`VLLM_ENFORCE_STRICT_TOOL_CALLING=false`), the model can emit invalid JSON in\n `tool_call_arguments` or a `tool_call_name` that does not match any tool; the parser\n extracts best-effort. Named/`required` tool choice uses the structured-outputs backend and\n guarantees schema-valid arguments.\n\n## Version differences: V3.1 vs V3-0324 / R1-0528\n\nThe pre-V3.1 models (DeepSeek-V3-0324 and DeepSeek-R1-0528) share an older tool-call\nencoding, served in vLLM with `--tool-call-parser deepseek_v3`. The per-call body is:\n\n````text\n<|tool▁call▁begin|>function<|tool▁sep|>{name}\n```json\n{json_args}\n```<|tool▁call▁end|>\n````\n\nDifferences from V3.1:\n\n| Aspect | V3.1 (`deepseek_v31`) | V3-0324 / R1-0528 (`deepseek_v3`) |\n| --- | --- | --- |\n| Field order in a call | `{name}<|tool▁sep|>{args}` | `function<|tool▁sep|>{name}` (the literal `type`, then name) |\n| Arguments wrapping | raw JSON, inline | fenced ` ```json … ``` ` block (name and args separated by `\\n`) |\n| Chaining of calls | abut directly, **no separator** | each subsequent call prefixed with `\\n` |\n| Tool results | `<|tool▁output▁begin|>…<|tool▁output▁end|>` per message, no batch wrapper | wrapped in `<|tool▁outputs▁begin|>…<|tool▁outputs▁end|>`, results newline-separated |\n| User→assistant boundary | user turn = `<|User|>{q}`; `<|Assistant|></think>` added at generation | user turn = `<|User|>{q}<|Assistant|>` (assistant marker appended in the user branch) |\n| Thinking | hybrid; `thinking` kwarg toggles `<think>` vs `</think>` prefix | R1-0528 always reasoning (bare `<|Assistant|>` generation prefix, model opens `<think>` itself); V3-0324 non-reasoning |\n| vLLM parser | `--tool-call-parser deepseek_v31` | `--tool-call-parser deepseek_v3` |\n\nExample R1-0528 / V3-0324 parallel call with its result batch:\n\n````text\n<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>get_weather\n```json\n{\"location\": \"San Francisco, CA\"}\n```<|tool▁call▁end|>\n<|tool▁call▁begin|>function<|tool▁sep|>get_weather\n```json\n{\"location\": \"Seattle, WA\"}\n```<|tool▁call▁end|><|tool▁calls▁end|><|end▁of▁sentence|><|tool▁outputs▁begin|><|tool▁output▁begin|>{\"temperature\": 18}<|tool▁output▁end|>\n<|tool▁output▁begin|>{\"temperature\": 14}<|tool▁output▁end|><|tool▁outputs▁end|>\n````\n\nThe `deepseek_r1` **reasoning** parser (`--reasoning-parser deepseek_r1`) applies to the R1\nseries **and** to DeepSeek-V3.1; it extracts the `<think>…</think>` span into the response's\n`reasoning` field. It is independent of the tool-call parser.\n\n## Sources\n\n- DeepSeek-V3.1 model card (Chat Template / ToolCall sections): <https://huggingface.co/deepseek-ai/DeepSeek-V3.1>\n- DeepSeek-V3.1 `assets/chat_template.jinja`: <https://huggingface.co/deepseek-ai/DeepSeek-V3.1/resolve/main/assets/chat_template.jinja>\n- DeepSeek-V3.1 `tokenizer_config.json` (`chat_template`, byte-identical to the jinja): <https://huggingface.co/deepseek-ai/DeepSeek-V3.1/resolve/main/tokenizer_config.json>\n- DeepSeek-V3.1 `tokenizer.json` (`added_tokens` → token IDs and `special` flags): <https://huggingface.co/deepseek-ai/DeepSeek-V3.1/resolve/main/tokenizer.json>\n- DeepSeek-V3.1 `config.json` (`bos_token_id`, `eos_token_id`, `vocab_size`): <https://huggingface.co/deepseek-ai/DeepSeek-V3.1/resolve/main/config.json>\n- DeepSeek-R1-0528 model card and `tokenizer_config.json` (older tool format): <https://huggingface.co/deepseek-ai/DeepSeek-R1-0528> · <https://huggingface.co/deepseek-ai/DeepSeek-R1-0528/resolve/main/tokenizer_config.json>\n- DeepSeek-R1 model card: <https://huggingface.co/deepseek-ai/DeepSeek-R1>\n- DeepSeek-V3-0324 `tokenizer_config.json` (older tool format): <https://huggingface.co/deepseek-ai/DeepSeek-V3-0324/resolve/main/tokenizer_config.json>\n- vLLM tool-call template for V3.1 (`## Tools` injection + `| tojson`): <https://github.com/vllm-project/vllm/blob/main/examples/tool_chat_template_deepseekv31.jinja>\n- vLLM Tool Calling docs (`deepseek_v3`, `deepseek_v31` parser flags): <https://docs.vllm.ai/en/latest/features/tool_calling/>\n- vLLM Reasoning Outputs docs (`deepseek_r1` reasoning parser; V3.1 thinking default): <https://docs.vllm.ai/en/latest/features/reasoning_outputs/>\n",
78
+ "toolconv/glm-4.5.md": "# GLM-4.5 / GLM-4.6 tool-calling format\n\nNative tool-calling convention of Zhipu AI / Z.ai's **GLM-4.5** family (`zai-org/GLM-4.5` 355B-A32B and `zai-org/GLM-4.5-Air` 106B-A12B, `model_type: \"glm4_moe\"`), shared byte-for-byte by **GLM-4.6**. Unlike the JSON-in-a-tag conventions used by most families, GLM emits each tool call as an **XML-like** block: `<tool_call>{name}` followed by alternating `<arg_key>`/`<arg_value>` element pairs, closed by `</tool_call>`. The prompt is a GLM-style sequence opened by `[gMASK]<sop>` with turn markers `<|system|>`, `<|user|>`, `<|assistant|>`, `<|observation|>`. An inference server turns the raw stream into OpenAI-style `tool_calls` with a parser plus a reasoning parser: both vLLM and SGLang expose `--tool-call-parser glm45 --reasoning-parser glm45` (vLLM additionally needs `--enable-auto-tool-choice`). Tool calling and reasoning are driven entirely by the bundled `chat_template.jinja`; thinking mode is on by default and is disabled per-request with `chat_template_kwargs={\"enable_thinking\": false}`.\n\nThis document was verified against the authoritative `chat_template.jinja` from the HF repo (fetched raw and **rendered locally with Jinja2** — `trim_blocks=True, lstrip_blocks=True`, transformers' `tojson` filter — to produce the byte-exact streams below), `tokenizer_config.json` and `generation_config.json` for the exact token IDs and stop tokens, the model card, and the vLLM (`Glm4MoeModelToolParser`) and SGLang (`Glm4MoeDetector`) parser sources. The HF `resolve`/`blob` web paths redirect to the model-card API; the byte-exact source was obtained via the `resolve/main/...:raw` cache (template commit `cbb2c7cfb52fa128a9660cb1a7a78e017899e115`). The GLM-4.5 and GLM-4.6 `chat_template.jinja` files are identical (same content hash `41478957…`).\n\n## Special tokens\n\nToken IDs are from `tokenizer_config.json` (`added_tokens_decoder`). Note the split: the turn/role markers are registered as **special** tokens, whereas the structural tool-call and thinking tags are each a single dedicated vocabulary token but flagged **`special: false`** (they are emitted/printed as ordinary text, not stripped as control tokens).\n\n| Token (verbatim) | ID | `special` | Purpose |\n|---|---|---|---|\n| `[gMASK]` | 151331 | true | GLM prefix / blank-infilling sentinel; first token of every prompt |\n| `<sop>` | 151333 | true | \"Start of piece\" — immediately follows `[gMASK]` to open the sequence |\n| `<eop>` | 151334 | true | \"End of piece\" (not emitted by the chat template) |\n| `<\\|system\\|>` | 151335 | true | Opens a system turn (and the injected tools turn) |\n| `<\\|user\\|>` | 151336 | true | Opens a user turn (also an EOS id — see below) |\n| `<\\|assistant\\|>` | 151337 | true | Opens an assistant turn / generation prompt |\n| `<\\|observation\\|>` | 151338 | true | Opens a tool-result (observation) turn (also an EOS id) |\n| `<\\|endoftext\\|>` | 151329 | true | End-of-text; `eos_token` and `pad_token` |\n| `<think>` | 151350 | false | Opens the reasoning span inside an assistant turn |\n| `</think>` | 151351 | false | Closes the reasoning span |\n| `<tool_call>` | 151352 | false | Opens one tool call; function name follows on the same line |\n| `</tool_call>` | 151353 | false | Closes one tool call |\n| `<arg_key>` | 151356 | false | Opens an argument-name element |\n| `</arg_key>` | 151357 | false | Closes an argument-name element |\n| `<arg_value>` | 151358 | false | Opens an argument-value element |\n| `</arg_value>` | 151359 | false | Closes an argument-value element |\n| `<tool_response>` | 151354 | false | Wraps one tool result inside an observation turn |\n| `</tool_response>` | 151355 | false | Closes a tool result |\n| `/nothink` | 151360 | true | Soft switch appended to user text to suppress thinking |\n\nNotes on exactness:\n- All pipes are ASCII `|` (U+007C); GLM uses no fullwidth `|` (U+FF5C) or `▁` (U+2581) variants (unlike DeepSeek). Reproduce `<|system|>`, `<|user|>`, `<|assistant|>`, `<|observation|>` exactly, and `[gMASK]` with literal square brackets.\n- Because `<tool_call>`, `<arg_key>`, `<arg_value>`, `<tool_response>`, `<think>` (and their closers) each map to exactly **one** token ID, they cost one token apiece in the stream — but being `special: false` they round-trip through detokenization as plain text. Parsers therefore match them as literal substrings in the decoded text, not as control-token ids.\n- `eos_token_id` is a **list**: `[151329, 151336, 151338]` = `<|endoftext|>`, `<|user|>`, `<|observation|>` (from `generation_config.json`). This is how a tool-call turn ends: after `</tool_call>` the model emits `<|observation|>`, which is an EOS id, so generation halts and the server reports a tool call (see Turn structure).\n\n## Roles / channels / turn structure\n\nEvery prompt begins with the literal two-token prefix `[gMASK]<sop>` (no following newline). Turns are then concatenated, each introduced by its role marker; there is no per-turn terminator token in rendered history (the next marker, or an EOS id during generation, ends a turn).\n\n- **System** (`<|system|>`): role marker, newline, then the message text. When `tools` are supplied, a synthetic tools system turn is rendered **first**, before any user-supplied system turn (the two are separate `<|system|>` blocks — see Tool definitions).\n- **User** (`<|user|>`): role marker, newline, then text. If `enable_thinking` is false, the literal `/nothink` is appended to the user text (unless it already ends with `/nothink`).\n- **Assistant** (`<|assistant|>`): role marker, then a reasoning span and/or visible content and/or tool calls. The reasoning span is `\\n<think>{reasoning}</think>`; visible content follows on its own line; tool calls follow as `<tool_call>…</tool_call>` blocks.\n- **Tool result** (`<|observation|>`): role marker introducing one or more `<tool_response>…</tool_response>` blocks (see Tool-result format).\n\nThinking / reasoning channel:\n- Reasoning lives in `<think>…</think>` inside the assistant turn. The `--reasoning-parser glm45` extracts it into a separate `reasoning_content` field; the visible answer is whatever follows `</think>`.\n- **Only the reasoning of assistant turns after the last user message is kept.** The template renders every earlier assistant turn with an empty `<think></think>` and drops its `reasoning_content` (or any inline `<think>…</think>` embedded in `content`). This keeps stale chains of thought out of the context on later turns.\n- An assistant turn with neither preserved reasoning nor an explicit chain renders `\\n<think></think>` (empty), then content/tool calls.\n\nGeneration prompt (`add_generation_prompt=True`):\n- **Thinking mode (default):** the prompt ends with a bare `<|assistant|>`; the model continues with `\\n<think>…</think>` then its answer or tool calls.\n- **Non-thinking mode** (`enable_thinking=false`): the prompt ends with `<|assistant|>\\n<think></think>`, pre-filling an empty reasoning span so the model goes straight to the answer.\n\nHow a tool-call turn terminates: there is no dedicated \"stop after tool call\" token. The model emits `</tool_call>` and then `<|observation|>` (token 151338), which is one of the three EOS ids, so decoding stops. The server inspects the text, finds `<tool_call>`, and returns `finish_reason: \"tool_calls\"`.\n\n## Tool definitions\n\nWhen the request carries `tools`, the template prepends one `<|system|>` turn containing a fixed preamble, the tool list wrapped in `<tools>…</tools>`, and a literal description of the output format. Each tool is serialized with `tool | tojson(ensure_ascii=False)` — i.e. the **entire OpenAI tool object verbatim**, including the `{\"type\": \"function\", \"function\": {…}}` wrapper, with default JSON spacing (`\", \"` / `\": \"`). One tool per line.\n\n```text\n<|system|>\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>\n{\"type\": \"function\", \"function\": {\"name\": \"get_weather\", \"description\": \"Get current weather for a city\", \"parameters\": {\"type\": \"object\", \"properties\": {\"location\": {\"type\": \"string\", \"description\": \"City name\"}, \"unit\": {\"type\": \"string\", \"enum\": [\"celsius\", \"fahrenheit\"]}}, \"required\": [\"location\"]}}}\n</tools>\n\nFor each function call, output the function name and arguments within the following XML format:\n<tool_call>{function-name}\n<arg_key>{arg-key-1}</arg_key>\n<arg_value>{arg-value-1}</arg_value>\n<arg_key>{arg-key-2}</arg_key>\n<arg_value>{arg-value-2}</arg_value>\n...\n</tool_call>\n```\n\nThe `<tool_call>{function-name}` / `<arg_key>` / `<arg_value>` lines above are part of the **prompt text** (the format spec the model is told to follow), not an example call. This tools turn is emitted only when `tools` is non-empty, and it is closed implicitly by the next role marker (e.g. a user-supplied `<|system|>` or the first `<|user|>`), with no blank line between them.\n\n## Tool-call format\n\nThe model emits a call as an `<tool_call>` block: the function **name on the same line** as the opening tag, a newline, then one `<arg_key>…</arg_key>` + `<arg_value>…</arg_value>` pair per argument, closed by `</tool_call>`. Minimal single call (assistant generation in thinking mode; reasoning shown for realism):\n\n```text\n<think>The user wants the weather in Beijing. I'll call get_weather.</think>\n<tool_call>get_weather\n<arg_key>location</arg_key>\n<arg_value>Beijing</arg_value>\n<arg_key>unit</arg_key>\n<arg_value>celsius</arg_value>\n</tool_call>\n```\n\nAnatomy and value encoding (this is the single most error-prone part):\n\n- The function name is the text between `<tool_call>` and the first newline — there is **no** wrapping tag around it and **no** space after `<tool_call>`.\n- Each argument is two adjacent elements: `<arg_key>name</arg_key>` then `<arg_value>value</arg_value>`, conventionally one pair per line.\n- **Argument values are NOT uniformly JSON.** The template renders each value as `value | tojson(ensure_ascii=False) if value is not string else value`:\n - **string** values are emitted **raw, without surrounding quotes** → `<arg_value>Beijing</arg_value>` (not `\"Beijing\"`).\n - **non-string** values (number, boolean, null, object, array) are JSON-encoded → `<arg_value>3</arg_value>`, `<arg_value>true</arg_value>`, `<arg_value>{\"k\": 1}</arg_value>`.\n- A **zero-argument** call has no pairs: the name is followed by a newline and the closer — `<tool_call>get_time\\n</tool_call>`.\n\nBecause string values lose their quotes, a parser must decide per argument whether to JSON-decode or treat the value as a literal string. Both reference parsers do this by consulting the tool's JSON Schema: if the parameter's type is `string`, the raw text is taken as-is; otherwise the value is JSON-decoded (with `ast.literal_eval` and raw-string fallbacks). The model is trained to follow the schema, so it emits a bare string exactly when the parameter is string-typed.\n\n## Multiple / parallel tool calls\n\nTwo or more calls in one turn are emitted as consecutive `<tool_call>…</tool_call>` blocks separated by a single newline (no wrapper element around the set). Raw assistant emission for two parallel calls with mixed argument types:\n\n```text\n<think>Two cities. Call get_weather twice in parallel.</think>\n<tool_call>get_weather\n<arg_key>location</arg_key>\n<arg_value>Beijing</arg_value>\n<arg_key>unit</arg_key>\n<arg_value>celsius</arg_value>\n</tool_call>\n<tool_call>get_weather\n<arg_key>location</arg_key>\n<arg_value>Shanghai</arg_value>\n<arg_key>days</arg_key>\n<arg_value>3</arg_value>\n<arg_key>verbose</arg_key>\n<arg_value>true</arg_value>\n</tool_call>\n```\n\nNote `Beijing`/`Shanghai`/`celsius` (string) are bare, while `3` (number) and `true` (boolean) are JSON literals. Parsers split on the non-greedy `<tool_call>.*?</tool_call>` regex, so any number of calls is supported; each becomes a separate entry in `tool_calls[]`.\n\n## Tool-result format\n\nResults are returned in an **observation** turn. For a single result: the `<|observation|>` marker, a newline, then the result wrapped in `<tool_response>` / `</tool_response>`:\n\n```text\n<|observation|>\n<tool_response>\n{\"temperature\": 26, \"unit\": \"celsius\", \"condition\": \"Sunny\"}\n</tool_response>\n```\n\nThe content between the tags is inserted **verbatim** (callers typically pass a JSON string, but any text is allowed). For **multiple** results from a set of parallel calls, the `<|observation|>` marker appears **once** and each result gets its own `<tool_response>` block (consecutive `tool`-role messages are merged under a single observation turn):\n\n```text\n<|observation|>\n<tool_response>\n{\"temperature\": 26, \"condition\": \"Sunny\"}\n</tool_response>\n<tool_response>\n{\"temperature\": 30, \"condition\": \"Cloudy\"}\n</tool_response>\n```\n\nThe chat template reads **only** the tool message's `content` — it does not consult any `tool_call_id`. Results are therefore correlated to calls **positionally / by order**, not by an embedded id (GLM's wire format carries no per-call id; see API mapping).\n\n## End-to-end example\n\nA complete multi-turn weather exchange. These are the exact locally rendered streams; newlines inside a turn are literal and turns are otherwise contiguous (no separators between markers).\n\n**Stage 1 — prompt fed to the model** (`tools` set, one prior system message, `add_generation_prompt=True`, thinking mode):\n\n```text\n[gMASK]<sop><|system|>\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>\n{\"type\": \"function\", \"function\": {\"name\": \"get_weather\", \"description\": \"Get current weather for a city\", \"parameters\": {\"type\": \"object\", \"properties\": {\"location\": {\"type\": \"string\", \"description\": \"City name\"}, \"unit\": {\"type\": \"string\", \"enum\": [\"celsius\", \"fahrenheit\"]}}, \"required\": [\"location\"]}}}\n</tools>\n\nFor each function call, output the function name and arguments within the following XML format:\n<tool_call>{function-name}\n<arg_key>{arg-key-1}</arg_key>\n<arg_value>{arg-value-1}</arg_value>\n<arg_key>{arg-key-2}</arg_key>\n<arg_value>{arg-value-2}</arg_value>\n...\n</tool_call><|system|>\nYou are a helpful assistant.<|user|>\nWhat's the weather in Beijing?<|assistant|>\n```\n\n**Assistant generation** (model output; it ends by emitting `<|observation|>`, an EOS id, so decoding stops there; server returns `finish_reason: \"tool_calls\"`):\n\n```text\n<think>The user wants the weather in Beijing. I'll call get_weather.</think>\n<tool_call>get_weather\n<arg_key>location</arg_key>\n<arg_value>Beijing</arg_value>\n<arg_key>unit</arg_key>\n<arg_value>celsius</arg_value>\n</tool_call>\n```\n\n**Stage 2 — prompt for the next turn**, after appending the assistant tool-call turn and the tool result, then `add_generation_prompt=True`:\n\n```text\n[gMASK]<sop><|system|>\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>\n{\"type\": \"function\", \"function\": {\"name\": \"get_weather\", \"description\": \"Get current weather for a city\", \"parameters\": {\"type\": \"object\", \"properties\": {\"location\": {\"type\": \"string\", \"description\": \"City name\"}, \"unit\": {\"type\": \"string\", \"enum\": [\"celsius\", \"fahrenheit\"]}}, \"required\": [\"location\"]}}}\n</tools>\n\nFor each function call, output the function name and arguments within the following XML format:\n<tool_call>{function-name}\n<arg_key>{arg-key-1}</arg_key>\n<arg_value>{arg-value-1}</arg_value>\n<arg_key>{arg-key-2}</arg_key>\n<arg_value>{arg-value-2}</arg_value>\n...\n</tool_call><|system|>\nYou are a helpful assistant.<|user|>\nWhat's the weather in Beijing?<|assistant|>\n<think>The user wants the weather in Beijing. I'll call get_weather.</think>\n<tool_call>get_weather\n<arg_key>location</arg_key>\n<arg_value>Beijing</arg_value>\n<arg_key>unit</arg_key>\n<arg_value>celsius</arg_value>\n</tool_call><|observation|>\n<tool_response>\n{\"temperature\": 26, \"unit\": \"celsius\", \"condition\": \"Sunny\"}\n</tool_response><|assistant|>\n```\n\n**Final assistant generation** (natural-language answer, terminated by `<|endoftext|>`; `finish_reason: \"stop\"`):\n\n```text\n<think>Got it, 26C and sunny.</think>\nIt's 26°C and sunny in Beijing right now.\n```\n\nTwo subtleties visible above: (1) the reasoning of the assistant tool-call turn is **preserved** in Stage 2 only because it is the segment after the last user message; with another user turn after it, that `<think>…</think>` would be re-rendered empty. (2) The tool-call turn and the observation turn abut directly (`</tool_call><|observation|>`), and the observation abuts the next assistant marker (`</tool_response><|assistant|>`).\n\nFor **non-thinking** mode the user text carries the soft switch and the generation prompt pre-fills an empty think span:\n\n```text\n<|user|>\nHi there/nothink<|assistant|>\n<think></think>\n```\n\n## OpenAI-compatible API mapping\n\nWith a server parser active (`--tool-call-parser glm45 --reasoning-parser glm45`), the raw stream maps onto Chat Completions as follows:\n\n- `choices[].finish_reason` = `\"tool_calls\"` when the output contained at least one `<tool_call>` (otherwise `\"stop\"`).\n- `choices[].message.content` = the text **before** the first `<tool_call>` (normalized to `null` if empty/whitespace). The `<think>…</think>` reasoning is removed by the reasoning parser and surfaced separately as `message.reasoning_content`.\n- `choices[].message.tool_calls[]` — one entry per `<tool_call>…</tool_call>` block:\n - `.id` = a **server-generated** id (e.g. vLLM's `make_tool_call_id()`), **not** present in the model output. GLM emits no per-call id in the stream.\n - `.type` = `\"function\"`.\n - `.function.name` = the text after `<tool_call>` up to the first newline.\n - `.function.arguments` = a **JSON string** (an object), reconstructed from the `<arg_key>`/`<arg_value>` pairs with per-argument typing from the tool schema. vLLM returns `json.dumps(arg_dct, ensure_ascii=False)`, e.g. `\"{\\\"location\\\": \\\"Beijing\\\", \\\"unit\\\": \\\"celsius\\\"}\"`. Clients `json.loads()` it before use.\n- **Request side — tool results** are sent back as `role: \"tool\"` messages, e.g.:\n\n ```json\n {\"role\": \"tool\", \"tool_call_id\": \"call_abc123\", \"content\": \"{\\\"temperature\\\": 26, \\\"unit\\\": \\\"celsius\\\", \\\"condition\\\": \\\"Sunny\\\"}\"}\n ```\n\n The chat template renders only `content` (inside `<tool_response>`); `tool_call_id` is **ignored by the template** and matters only for the client's own bookkeeping. Order results to match the calls.\n- **Request side — assistant tool-call history**: the OpenAI shape carries `function.arguments` as a JSON **string**, but the chat template iterates `arguments.items()` and therefore needs an **object**. vLLM/SGLang parse the string back into a dict before rendering; if you call `tokenizer.apply_chat_template` directly, pass `arguments` as a dict (and optionally `reasoning_content` as a string) or the template will raise.\n- Disable thinking via `extra_body={\"chat_template_kwargs\": {\"enable_thinking\": false}}` (OpenAI Python client) — this flips the template to the `/nothink` + pre-filled `<think></think>` path.\n\n## Parsing notes & gotchas\n\n- **String values are unquoted; typing needs the schema.** The decisive rule: a `<arg_value>` is a literal string iff the parameter is string-typed in the tool's JSON Schema; otherwise it is JSON. vLLM's `_is_string_type` and SGLang's `get_argument_type` both walk `properties[arg].type` (handling `anyOf`/`oneOf`/`enum`/`allOf`/type-arrays). If the schema is missing/loose, they fall back to \"try `json.loads`, then `ast.literal_eval`, then treat as string\" — so a bare word like `celsius` survives as a string, while `26` becomes a number. A string value that *looks* like JSON (e.g. a parameter typed `string` whose value is `{\"a\":1}`) is correctly kept as the literal string only because the schema says `string`.\n- **Extraction regexes (GLM-4.5/4.6).** vLLM: calls via `<tool_call>.*?</tool_call>` (DOTALL); name/body via `<tool_call>([^\\n]*)\\n(.*)</tool_call>`; pairs via `<arg_key>(.*?)</arg_key>\\s*<arg_value>(.*?)</arg_value>`. The name regex **requires a newline** after the name — matching the 4.5/4.6 template. SGLang uses an equivalent `(?:\\\\n|\\n)` form so it also tolerates literal escaped `\\n`.\n- **`</arg_value>` in a value breaks parsing.** Values are captured non-greedily up to the next `</arg_value>`; a value whose text contains `</arg_value>` (or `</tool_call>`) truncates early. There is no escaping mechanism in the wire format.\n- **Tool calls are parsed from `content` only, not from reasoning.** A `<tool_call>` emitted inside `<think>…</think>` is ignored by the tool parser (vLLM's reasoning/tool parsers cooperate so only post-`</think>` content is scanned). Don't expect calls made \"while thinking\" to fire.\n- **Guided decoding is suppressed for GLM.** For `tool_choice: \"required\"` or a named tool, vLLM deliberately does **not** apply JSON structured-outputs/guided decoding, because that would force JSON output and conflict with GLM's XML syntax; the parser extracts from free-form XML instead.\n- **`skip_special_tokens` must be off.** Although the tool/think tags are `special: false`, vLLM forces `skip_special_tokens = False` when tools are enabled (defensive against transformers 5.x detokenization changes) so the literal `<tool_call>`/`</tool_call>` text survives for the regex.\n- **Streaming.** Long string arguments used to be buffered until the closing tag (vLLM issue #32829); the current parser re-parses the accumulated text each delta and emits only the diff, streaming incremental string content with an open-quote-then-fill strategy and holding back any partial trailing tag (`partial_tag_overlap`). The streamed tool name is the text before the first `\\n` or `<arg_key>`. SGLang implements the same as an explicit XML→JSON state machine (`INIT → IN_KEY → WAITING_VALUE → IN_VALUE`). Malformed tails (a missing `</arg_value>` before `</tool_call>`) are closed off heuristically.\n- **Lineage — GLM-4.5 vs GLM-4.6:** identical wire format and identical `chat_template.jinja` (same content hash); the same `glm45` parser serves both.\n- **Lineage — GLM-4.7 / GLM-5 changed the format.** Newer models drop the structural newlines: the function name may sit **directly** before the first `<arg_key>` (no newline), zero-argument calls may be `<tool_call>func</tool_call>`, and parallel calls may be emitted **back-to-back with no separator** (`…</tool_call><tool_call>…`). These require the distinct `Glm47MoeModelToolParser` (vLLM, `structural_tag_model=\"glm_4_7\"`) / `Glm47MoeDetector` (SGLang), whose `func_detail_regex` makes the newline and the argument section optional (`<tool_call>\\s*(\\S+?)\\s*(<arg_key>.*)?</tool_call>`). Do **not** use a GLM-4.7 stream to validate a GLM-4.5 parser or vice versa.\n\n## Sources\n\n- Chat template (authoritative; rendered locally for the byte-exact streams), GLM-4.5 commit `cbb2c7c…`: https://huggingface.co/zai-org/GLM-4.5/resolve/main/chat_template.jinja — the `blob`/web path redirects to the model-card API; verified via the raw `resolve/main` cache.\n- Identical GLM-4.6 template (same content hash, confirming shared format): https://huggingface.co/zai-org/GLM-4.6/resolve/main/chat_template.jinja\n- Special-token IDs and `special` flags (`added_tokens_decoder`, `additional_special_tokens`): https://huggingface.co/zai-org/GLM-4.5/resolve/main/tokenizer_config.json\n- Stop tokens (`eos_token_id = [151329, 151336, 151338]`): https://huggingface.co/zai-org/GLM-4.5/resolve/main/generation_config.json\n- Model card (server flags `--tool-call-parser glm45 --reasoning-parser glm45`, `enable_thinking` switch, parser links): https://huggingface.co/zai-org/GLM-4.5\n- vLLM GLM-4.5/4.6 tool parser (`Glm4MoeModelToolParser`: regexes, schema-driven string typing, JSON-string `arguments`, streaming, `skip_special_tokens`): https://github.com/vllm-project/vllm/blob/main/vllm/tool_parsers/glm4_moe_tool_parser.py\n- vLLM GLM-4.7 tool parser (`Glm47MoeModelToolParser`: same-line name, optional/zero args): https://github.com/vllm-project/vllm/blob/main/vllm/tool_parsers/glm47_moe_tool_parser.py\n- SGLang GLM-4.5/4.6 detector (`Glm4MoeDetector`: format docstring, XML→JSON state machine, argument typing): https://github.com/sgl-project/sglang/blob/main/python/sglang/srt/function_call/glm4_moe_detector.py\n- SGLang GLM-4.7 detector (`Glm47MoeDetector`: newline-less / back-to-back calls): https://github.com/sgl-project/sglang/blob/main/python/sglang/srt/function_call/glm47_moe_detector.py\n- vLLM tool-calling docs: https://docs.vllm.ai/en/latest/features/tool_calling/\n",
79
+ "toolconv/harmony.md": "# OpenAI Harmony response format\n\nHarmony is the response format OpenAI trained its open-weight `gpt-oss` models on (`gpt-oss-20b`, `gpt-oss-120b`, released August 2025). It defines the conversation envelope, the multi-channel reasoning/answer separation, and the function-calling wire syntax. The models will not work correctly if prompted without it. The format deliberately mirrors the OpenAI *Responses* API (roles, channels, recipients) rather than the older Chat Completions shape.\n\nTokens are produced with the `o200k_harmony` encoding (the `o200k_base` BPE vocab plus a block of Harmony special tokens; see the table below). The reference renderer/parser is the Rust crate `openai-harmony` (Python bindings: `pip install openai-harmony`; encoding name `HarmonyEncodingName.HARMONY_GPT_OSS`).\n\nYou only deal with raw Harmony if you build your own inference loop. Served through an OpenAI-compatible endpoint the server handles it for you:\n\n- **Ollama / LM Studio / HuggingFace**: Harmony is applied internally; you send normal OpenAI-style JSON.\n- **vLLM**: `vllm serve openai/gpt-oss-120b --enable-auto-tool-choice --tool-call-parser openai --reasoning-parser openai_gptoss`. Note the tool-call parser flag is `openai` (not `harmony`). vLLM also exposes a Harmony-native path through the `/v1/responses` endpoint.\n- **SGLang**: `python3 -m sglang.launch_server --model-path openai/gpt-oss-20b --reasoning-parser gpt-oss --tool-call-parser gpt-oss` (in NVIDIA Dynamo disaggregated mode: `--dyn-tool-call-parser harmony --dyn-reasoning-parser gpt_oss`).\n\nThe chat template shipped with the gpt-oss weights renders these same token sequences from the standard `messages`/`tools` arrays.\n\n## Special tokens\n\nAll Harmony control tokens have the literal form `<|type|>` (ASCII pipes `|`, U+007C — no unicode variants). They are real single tokens in `o200k_harmony`, not text that is BPE-split. The structurally meaningful ones:\n\n| Token (verbatim) | Token ID | Purpose |\n| :--------------- | :------- | :------ |\n| `<\\|start\\|>` | `200006` | Begins a message; immediately followed by the header (role, optional recipient/channel/content-type). |\n| `<\\|end\\|>` | `200007` | Ends a fully-formed message. |\n| `<\\|message\\|>` | `200008` | Header → content transition. Everything after it (until a stop/end token) is the message body. |\n| `<\\|channel\\|>` | `200005` | Introduces the channel field of the header (`analysis` / `commentary` / `final`). |\n| `<\\|constrain\\|>` | `200003` | Marks the content-type / constrained-decoding format in a tool-call header (e.g. `<\\|constrain\\|>json`). |\n| `<\\|return\\|>` | `200002` | Stop token: the model finished its final answer. Decode-time only (see normalization note). |\n| `<\\|call\\|>` | `200012` | Stop token: the model is emitting a tool call and wants it executed. |\n\n`<|return|>` and `<|call|>` are the two valid generation stop tokens — halt inference on either.\n\nThe encoding also defines (same `o200k_harmony` block, IDs `199998`–`200013`) `<|startoftext|>` (199998), `<|endoftext|>` (199999), and reserved slots `<|reserved_200000|>`, `<|reserved_200001|>`, `<|reserved_200004|>`, `<|reserved_200009|>`–`<|reserved_200011|>`, `<|reserved_200013|>`, plus a bulk reserved range `<|reserved_200014|>`…`<|reserved_201088|>`. The renderer additionally knows the names `<|refusal|>`, `<|untrusted|>`, `<|end_untrusted|>`, `<|meta_end|>` but they are not part of the committed gpt-oss vocabulary and do not appear in normal traffic.\n\n## Roles / channels / turn structure\n\n**Message envelope.** Every message is:\n\n```text\n<|start|>{header}<|message|>{content}<|end|>\n```\n\n`{header}` always begins with the role and may carry an optional recipient (`to=...`), channel, and content-type. A completed message ends with `<|end|>`; an assistant message being generated ends instead with a stop token (`<|return|>` or `<|call|>`).\n\n**Roles** (five). The instruction hierarchy used to resolve conflicts is `system` > `developer` > `user` > `assistant` > `tool`.\n\n| Role | Purpose |\n| :--- | :------ |\n| `system` | Identity, knowledge cutoff / current date, reasoning effort, valid-channels declaration, built-in tools. NOT the user-facing \"system prompt\". |\n| `developer` | The conventional \"system prompt\": instructions + the `# Tools` function declarations + (optional) structured-output schema. |\n| `user` | End-user input. |\n| `assistant` | Model output. Carries a channel and, for tool calls, a recipient. |\n| `tool` | Output of an executed tool. The message's *author/role is the tool's own name* (e.g. `functions.get_current_weather`), not the literal word `tool`. |\n\n**Channels** (assistant output only; the channel is mandatory on every assistant message):\n\n| Channel | Purpose |\n| :------ | :------ |\n| `analysis` | Raw chain-of-thought (reasoning). Not held to the same safety bar as `final`; do not show to end users. Built-in `python`/`browser` calls usually go here. |\n| `commentary` | Function tool calls, and user-visible \"preambles\" (action plans) before calling multiple tools. |\n| `final` | The user-facing answer. |\n\n**Reasoning effort** is set in the system message as `Reasoning: high` (or `medium` / `low`; default is medium). The model emits CoT into `analysis` and the answer into `final`.\n\n**CoT carry-over rule.** On the next turn, drop prior `analysis` messages *if* the last assistant turn ended in a `final` message. The exception is an in-progress tool-calling turn: the `analysis` that preceded a tool call MUST be fed back in alongside the tool result so the model can continue its reasoning (the `openai-harmony` renderer does this via `RenderConversationConfig { auto_drop_analysis: true }`).\n\n## Tool definitions\n\nFunction tools are advertised in the **developer** message under a `# Tools` section, inside a TypeScript-style `namespace functions { ... }`. (Built-in `browser`/`python` tools are instead declared in the **system** message under their own `# Tools` / `## browser` / `## python` headings.) The renderer converts each JSON Schema into a TS type with these rules:\n\n- No-arg function → `type name = () => any;`\n- With args → the single parameter is named `_` and its object type is inlined: `type name = (_: { ... }) => any;`\n- Return type is always `any`.\n- A property `description` becomes a `//` comment on the line *above* the field; a JSON Schema `title` renders as `// TITLE` followed by a `//` blank-comment line; `examples` render as `// Examples:` then `// - \"value\"` lines.\n- Optional (non-`required`) fields get a trailing `?`. A `default` renders as a trailing `// default: <value>` comment; an `enum` becomes a `\"a\" | \"b\"` union; `oneOf` becomes a multi-line `|` union; JSON `integer` maps to TS `number`.\n- One blank line separates function definitions; the block closes with `} // namespace functions`.\n\nIf the developer message has no instruction text, the `# Instructions` heading is omitted and the message is just the `# Tools` block. When any function is defined, the system message gains the routing line `Calls to these tools must go to the commentary channel: 'functions'.`\n\nVerbatim developer-message example (instructions + two functions), exactly as the renderer emits it:\n\n```text\n<|start|>developer<|message|># Instructions\n\nUse a friendly tone.\n\n# Tools\n\n## functions\n\nnamespace functions {\n\n// Gets the location of the user.\ntype get_location = () => any;\n\n// Gets the current weather in the provided location.\ntype get_current_weather = (_: {\n// The city and state, e.g. San Francisco, CA\nlocation: string,\nformat?: \"celsius\" | \"fahrenheit\", // default: celsius\n}) => any;\n\n// Gets the current weather in the provided list of locations.\ntype get_multiple_weathers = (_: {\n// List of city and state, e.g. [\"San Francisco, CA\", \"New York, NY\"]\nlocations: string[],\nformat?: \"celsius\" | \"fahrenheit\", // default: celsius\n}) => any;\n\n} // namespace functions<|end|>\n```\n\n## Tool-call format\n\nA function call is an **assistant** message on the **commentary** channel, addressed to the tool via recipient `to=functions.<name>`, with the JSON arguments as the body, terminated by the `<|call|>` stop token.\n\nThe recipient may appear in the *role section* or the *channel section* of the header — both are valid Harmony and the parser accepts either. The model commonly emits it in the channel section. The pi renderer omits the optional content-type marker:\n\n```text\n<|start|>assistant<|channel|>commentary to=functions.get_current_weather<|message|>{\"location\":\"San Francisco, CA\"}<|call|>\n```\n\nSome Harmony serializers include an explicit JSON content type and place the recipient in the role section instead:\n\n```text\n<|start|>assistant to=functions.get_current_weather<|channel|>commentary <|constrain|>json<|message|>{\"location\":\"San Francisco, CA\"}<|call|>\n```\n\nThe arguments body is a raw JSON object. The optional `<|constrain|>json` content-type signals JSON (and is the hook for constrained/grammar-based decoding); the content-type may also be a bare word such as `code` (seen with built-in tools). Built-in tools differ only in channel and recipient: they typically render on `analysis`, with recipient `browser.search` / `browser.open` / `browser.find` or always `python`.\n\n## Multiple / parallel tool calls\n\nHarmony has no special \"parallel\" wrapper. Multiple calls are just multiple consecutive messages. The model may first emit an optional **preamble** — a *user-visible* assistant message on the `commentary` channel (unlike `analysis`, this is meant to be shown) — then one tool-call message per function. Each individual call still ends with its own `<|call|>` stop token, so a host that stops on `<|call|>` collects calls one at a time, executes, feeds the result back, and resumes:\n\n```text\n<|channel|>analysis<|message|>{reasoning}<|end|><|start|>assistant<|channel|>commentary<|message|>**Action plan**:\n1. Generate an HTML file\n2. Generate a JavaScript for the Node.js server\n3. Start the server\n---\nWill start executing the plan step by step<|end|><|start|>assistant<|channel|>commentary to=functions.generate_file<|message|>{\"template\": \"basic_html\", \"path\": \"index.html\"}<|call|>\n```\n\n## Tool-result format\n\nThe executed tool's output is fed back as a message whose **author/role is the tool's name**, addressed back to the assistant (`to=assistant`), on the **commentary** channel, ending with `<|end|>`. This is the canonical (recommended) form:\n\n```text\n<|start|>functions.get_current_weather to=assistant<|channel|>commentary<|message|>{\"sunny\": true, \"temperature\": 20}<|end|>\n```\n\nThe header ordering is `{toolname} to=assistant<|channel|>commentary`. Built-in tool results follow the same shape (e.g. `<|start|>browser.search to=assistant<|channel|>commentary<|message|>{\"result\": \"https://openai.com/\"}<|end|>`). The minimal form the renderer accepts when channel/recipient are not set on the message is just `<|start|>{toolname}<|message|>{output}<|end|>`, but emitting the full `to=assistant<|channel|>commentary` header is what the reference parser round-trips and is recommended. After appending the result, restart generation by emitting the next `<|start|>assistant`.\n\n## End-to-end example\n\nComplete multi-turn weather exchange: system + developer prompt → user question → assistant analysis CoT → assistant commentary tool call → tool result → assistant final answer. This is a single contiguous token stream (newlines inside headers are only between top-level messages for readability; in practice messages are concatenated with no separator).\n\n```text\n<|start|>system<|message|>You are ChatGPT, a large language model trained by OpenAI.\nKnowledge cutoff: 2024-06\nCurrent date: 2025-06-28\n\nReasoning: high\n\n# Valid channels: analysis, commentary, final. Channel must be included for every message.\nCalls to these tools must go to the commentary channel: 'functions'.<|end|><|start|>developer<|message|># Instructions\n\nUse a friendly tone.\n\n# Tools\n\n## functions\n\nnamespace functions {\n\n// Gets the current weather in the provided location.\ntype get_current_weather = (_: {\n// The city and state, e.g. San Francisco, CA\nlocation: string,\nformat?: \"celsius\" | \"fahrenheit\", // default: celsius\n}) => any;\n\n} // namespace functions<|end|><|start|>user<|message|>What is the weather like in SF?<|end|><|start|>assistant<|channel|>analysis<|message|>User wants the weather in San Francisco. Use get_current_weather.<|end|><|start|>assistant<|channel|>commentary to=functions.get_current_weather<|message|>{\"location\":\"San Francisco, CA\"}<|call|><|start|>functions.get_current_weather to=assistant<|channel|>commentary<|message|>{\"sunny\": true, \"temperature\": 20}<|end|><|start|>assistant<|channel|>final<|message|>It's sunny and about 20°C in San Francisco right now.<|return|>\n```\n\nTurn boundaries:\n\n- The host stops generation at `<|call|>`, parses the `commentary` call, runs `get_current_weather`, and appends the `functions.get_current_weather to=assistant` result message.\n- It then appends `<|start|>assistant` and resumes. The preceding `analysis` message is kept (the turn ended in a tool call, not a `final`), so the model can continue its reasoning.\n- Generation stops at `<|return|>`. When this turn is persisted into history for a *later* turn, normalize the trailing `<|return|>` to `<|end|>` (see next note).\n\n**`<|return|>` normalization.** `<|return|>` is a decode-time stop token only. When you store the assistant's reply into history for the next turn, replace the trailing `<|return|>` with `<|end|>` so every stored message is a well-formed `<|start|>{header}<|message|>{content}<|end|>`. (For supervised training targets, ending the example with `<|return|>` is correct.)\n\n## OpenAI-compatible API mapping\n\nWhen a server (vLLM/SGLang/Ollama) bridges Harmony to Chat Completions JSON:\n\n- **`finish_reason`**: `tool_calls` when generation stopped on `<|call|>`; `stop` when it stopped on `<|return|>`.\n- **`message.tool_calls[]`**: one entry per `commentary` `to=functions.*` call. `function.name` is the recipient with the `functions.` namespace stripped (`get_current_weather`). `function.arguments` is a **JSON string** (the verbatim `<|message|>` body), matching OpenAI semantics — not a parsed object.\n- **`tool_call_id`**: Harmony has no native call ID. The server synthesizes one (e.g. `call_abc123`) and is responsible for correlating the follow-up `role:\"tool\"` message back to the Harmony tool-result envelope (recipient `to=functions.<name>` / call order).\n- **Tool result messages** (`{\"role\":\"tool\",\"tool_call_id\":...,\"content\":...}`) are rendered into `<|start|>{toolname} to=assistant<|channel|>commentary<|message|>{content}<|end|>`. The server maps `tool_call_id` → the original function name to build the `{toolname}` author.\n- **Reasoning**: `analysis`-channel text is surfaced as `reasoning_content` (vLLM/SGLang) or as a `reasoning`/`thinking` field, and is generally not echoed back on subsequent requests. `final`-channel text is the normal `message.content`. `commentary` preambles, if surfaced, also map to assistant content.\n- **`tools` / `tool_choice`** request fields are compiled by the chat template into the developer-message `namespace functions { ... }` block; the system message gains the commentary-routing line.\n\n## Parsing notes & gotchas\n\n- **Two stop tokens.** Always stop on both `<|return|>` and `<|call|>`. Stopping only on `<|return|>` will run past tool calls; stopping only on `<|end|>` is wrong for assistant generation.\n- **Recipient position varies.** `to=functions.<name>` may be in the role section (`<|start|>assistant to=...<|channel|>commentary`) or the channel section (`<|channel|>commentary to=...`). A parser must accept both.\n- **Channel is mandatory** on assistant messages; the system message even reminds the model (\"Channel must be included for every message.\"). Missing-channel output is malformed.\n- **Tool author, not `tool`.** The tool-result message's role is the tool's *name* (`functions.get_current_weather`), not the literal string `tool`. Splitting `functions.x` into namespace + function is the parser's job.\n- **CoT dropping is conditional.** Drop `analysis` only when the previous assistant turn ended on `final`. Dropping the `analysis` that immediately precedes a `<|call|>` breaks multi-step tool reasoning.\n- **`arguments` is a string.** Do not double-encode. The body after `<|message|>` is already serialized JSON; pass it through as the `arguments` string.\n- **Content-type variants.** `<|constrain|>json` is optional. If present, it is metadata, not a guarantee of valid JSON. Enforce JSON validity with constrained decoding / your own grammar — the prompt format alone does not guarantee schema adherence (same caveat applies to structured-output `# Response Formats`).\n- **Streaming.** Use a stateful parser (the library ships `StreamableParser`) so partial UTF-8 and the header/channel/recipient/content-type fields are reconstructed incrementally; a naive substring scan mishandles multi-byte splits and the optional header fields. `parse_messages_from_completion_tokens` takes `strict=True|False` — `strict=False` tolerates some malformed headers. Do not pass the trailing stop token into the parser.\n- **Encoding.** Use `o200k_harmony` (the `o200k_base` ranks plus the Harmony specials above). Treat the `<|...|>` tokens as atomic special tokens during both encode and decode; encoding them as ordinary text yields different ranks and corrupts the stream.\n\n## Sources\n\n- OpenAI Cookbook — OpenAI harmony response format: https://cookbook.openai.com/articles/openai-harmony\n- openai/harmony renderer (README): https://github.com/openai/harmony\n- openai/harmony canonical format guide: https://raw.githubusercontent.com/openai/harmony/main/docs/format.md\n- openai/harmony special-token registry (`o200k_harmony` IDs): https://raw.githubusercontent.com/openai/harmony/main/src/tiktoken_ext/public_encodings.rs\n- openai/harmony renderer/parser tests and schema→TS logic: https://raw.githubusercontent.com/openai/harmony/main/src/tests.rs , https://raw.githubusercontent.com/openai/harmony/main/src/encoding.rs\n- openai/harmony test fixtures (verbatim rendered streams): `test-data/test_render_functions_with_parameters.txt`, `test-data/test_does_not_drop_if_ongoing_analysis.txt`, `test-data/test_tool_response_parsing.txt`, `test-data/test_streamable_parser.txt`, `test-data/test_browser_and_function_tool.txt` (https://github.com/openai/harmony/tree/main/test-data)\n- vLLM tool calling / gpt-oss parser flags: https://docs.vllm.ai/en/latest/features/tool_calling/\n- SGLang gpt-oss usage (`--tool-call-parser gpt-oss`): https://docs.sglang.io/basic_usage/gpt_oss.html\n",
80
+ "toolconv/kimi-k2.md": "# Kimi K2 tool-calling format\n\nNative tool-calling convention of Moonshot AI's **Kimi K2** family (`moonshotai/Kimi-K2-Instruct` and `-Base`, `model_type: \"kimi_k2\"`, 1T-param MoE). It is a ChatML-like envelope built on a TikToken tokenizer (160K vocab): every turn is `<|im_{class}|>{name}<|im_middle|>{body}<|im_end|>`, and tool calls are emitted inside the assistant turn wrapped by a dedicated `<|tool_calls_section_begin|>…<|tool_calls_section_end|>` block. All control tokens are plain ASCII `<|…|>` forms (no fullwidth/unicode variants, unlike DeepSeek). An inference server turns the raw stream into OpenAI-style `tool_calls` with a parser: vLLM and SGLang both expose `--tool-call-parser kimi_k2` (vLLM additionally requires `--enable-auto-tool-choice`). The chat template (a standalone `chat_template.jinja` since the 2025.8.11 update) injects the tool schemas and renders the per-turn markers.\n\nThis document was verified against the model card, the official `docs/tool_call_guidance.md` and `docs/deploy_guidance.md` (GitHub `MoonshotAI/Kimi-K2`), the raw `chat_template.jinja` and `tokenizer_config.json` from the HF repo (rendered locally for the byte-exact streams below), and the vLLM `kimi_k2` tool parser source.\n\n## Special tokens\n\nThe five tool-call markers required for manual parsing, plus the ChatML envelope markers. Token IDs are from `tokenizer_config.json` (`added_tokens_decoder`).\n\n| Token (verbatim) | ID | Purpose |\n|---|---|---|\n| `<\\|tool_calls_section_begin\\|>` | 163595 | Opens the tool-call section inside an assistant turn |\n| `<\\|tool_call_begin\\|>` | 163597 | Opens one individual tool call |\n| `<\\|tool_call_argument_begin\\|>` | 163598 | Separates the tool-call ID from its JSON arguments |\n| `<\\|tool_call_end\\|>` | 163599 | Closes one individual tool call |\n| `<\\|tool_calls_section_end\\|>` | 163596 | Closes the tool-call section |\n| `<\\|im_system\\|>` | 163594 | Start marker for system-class turns (`system`, `tool`, `tool_declare`) |\n| `<\\|im_user\\|>` | 163587 | Start marker for a user turn |\n| `<\\|im_assistant\\|>` | 163588 | Start marker for an assistant turn |\n| `<\\|im_middle\\|>` | 163601 | Separates the role/name header from the message body |\n| `<\\|im_end\\|>` | 163586 | Ends any turn |\n| `[BOS]` | 163584 | Sequence-begin token (see notes; not emitted by the chat template) |\n| `[EOS]` | 163585 | Sequence-end token |\n\nNotes on exactness:\n- The five tool tokens use ASCII pipe `|` (U+007C) and underscores; reproduce them exactly. There are no fullwidth pipe (`|`) or `▁` variants in Kimi K2.\n- `<|im_middle|>` is the only envelope token whose ID (163601) is out of sequence with the others (163586–163599); a `163600` slot is unused.\n- Image inputs render via a content macro as the literal sequence `<|media_start|>image<|media_content|><|media_pad|><|media_end|>`. These media markers appear in the template but are **not** registered in `added_tokens_decoder`, so they tokenize as ordinary text rather than single special tokens. They are irrelevant to text tool calling and are listed here only for completeness.\n\n## Roles / channels / turn structure\n\nKimi K2 uses a ChatML-style envelope. Every message is rendered as:\n\n```text\n<|im_{class}|>{name}<|im_middle|>{body}<|im_end|>\n```\n\n- There are exactly **three** start-marker tokens, chosen by `role`:\n - `user` → `<|im_user|>`\n - `assistant` → `<|im_assistant|>`\n - everything else (`system`, `tool`, and the synthetic `tool_declare`) → `<|im_system|>`\n- The `{name}` segment between the marker and `<|im_middle|>` is `message.name or message.role`. This is the only \"channel\"/sub-role label Kimi K2 has. For ordinary turns it is literally `system`, `user`, or `assistant`; for a tool-result turn it is the tool's `name` (the function name) when supplied, otherwise `tool`; for the tool-schema turn it is the literal `tool_declare`.\n- `<|im_end|>` terminates every turn. The chat template does **not** emit `[BOS]`/`[EOS]`; turn boundaries are purely `<|im_*|>` markers (the tokenizer is TikToken-based with `add_bos_token`/`add_eos_token` unset, and the manual-parse flow feeds the rendered template straight to `/completions`).\n- **Default system prompt:** if the first message is not a `system` message, the template injects `<|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|>` before the first turn.\n- **Generation prompt:** with `add_generation_prompt=True` the template ends with `<|im_assistant|>assistant<|im_middle|>`, and the model generates from there.\n- **Thinking/reasoning:** `Kimi-K2-Instruct` is a \"reflex-grade\" model with no long thinking, so there is no reasoning channel in this format. (Thinking variants are handled separately — vLLM ships a distinct `kimi_k2` reasoning parser keyed on a `</think>` token — but that is out of scope for the Instruct tool-call format documented here.)\n\n## Tool definitions\n\nAvailable tools are advertised in a single dedicated turn placed at the very top of the prompt (before any system/user turn), using the synthetic `tool_declare` sub-role under the `<|im_system|>` marker:\n\n```text\n<|im_system|>tool_declare<|im_middle|>{TOOLS_JSON}<|im_end|>\n```\n\n`{TOOLS_JSON}` is the standard OpenAI-style `tools` array serialized to JSON with **compact separators** `(',', ':')` (no spaces). The array elements are passed through verbatim, i.e. each is `{\"type\":\"function\",\"function\":{\"name\":…,\"description\":…,\"parameters\":{…}}}` with a JSON-Schema `parameters` object. Example (single tool, exactly as emitted):\n\n```text\n<|im_system|>tool_declare<|im_middle|>[{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"description\":\"Get weather information. Call this tool when the user needs to get weather information\",\"parameters\":{\"type\":\"object\",\"required\":[\"city\"],\"properties\":{\"city\":{\"type\":\"string\",\"description\":\"City name\"}}}}}]<|im_end|>\n```\n\nThe `tool_declare` turn is rendered only when `tools` is non-empty.\n\n## Tool-call format\n\nWhen the model decides to call a function, it emits — inside the assistant turn, after any natural-language content — a tool-calls section. Minimal single call (this is the assistant generation that follows `<|im_assistant|>assistant<|im_middle|>`):\n\n```text\n<|tool_calls_section_begin|><|tool_call_begin|>functions.get_weather:0<|tool_call_argument_begin|>{\"city\": \"Beijing\"}<|tool_call_end|><|tool_calls_section_end|>\n```\n\nAnatomy of one call:\n\n```text\n<|tool_call_begin|> functions.{func_name}:{idx} <|tool_call_argument_begin|> {JSON arguments} <|tool_call_end|>\n```\n\n- The token between `<|tool_call_begin|>` and `<|tool_call_argument_begin|>` is the **tool-call ID**, with the fixed form `functions.{func_name}:{idx}`.\n - `functions.` is a literal prefix (it is not derived from the tool schema).\n - `{func_name}` is the called function's name; the function name is recovered by parsing it back out of this ID, not from a separate field.\n - `{idx}` is the **0-based call index** within the current assistant turn (`0` for the first call, `1` for the second, …).\n- After `<|tool_call_argument_begin|>` comes the raw JSON arguments object (e.g. `{\"city\": \"Beijing\"}`), terminated by `<|tool_call_end|>`.\n- All calls of the turn live between one `<|tool_calls_section_begin|>` / `<|tool_calls_section_end|>` pair. Any assistant text content precedes `<|tool_calls_section_begin|>`.\n- The whole assistant turn is still closed by `<|im_end|>` and the completion's `finish_reason` becomes `tool_calls`.\n\n## Multiple / parallel tool calls\n\nTwo or more calls in one turn are emitted as consecutive `<|tool_call_begin|>…<|tool_call_end|>` blocks inside a single section, with the index incrementing per call. Raw assistant emission for two parallel calls:\n\n```text\n<|tool_calls_section_begin|><|tool_call_begin|>functions.get_weather:0<|tool_call_argument_begin|>{\"city\": \"Beijing\"}<|tool_call_end|><|tool_call_begin|>functions.get_weather:1<|tool_call_argument_begin|>{\"city\": \"Shanghai\"}<|tool_call_end|><|tool_calls_section_end|>\n```\n\nNote the IDs `functions.get_weather:0` and `functions.get_weather:1` — same function, distinct trailing index. The index is per-turn (it resets to `0` in the next assistant turn).\n\n## Tool-result format\n\nTool execution results are fed back as a turn with `role: \"tool\"`. Because `tool` is not `user`/`assistant`, it renders under the `<|im_system|>` marker; the sub-role label is the message's `name` (the function name) when present, else `tool`. The body is a literal `## Return of {tool_call_id}` header line followed by the result content:\n\n```text\n<|im_system|>get_weather<|im_middle|>## Return of functions.get_weather:0\n{\"weather\": \"Sunny\"}<|im_end|>\n```\n\n- `{tool_call_id}` echoes the exact ID from the originating call (`functions.get_weather:0`), which is how the model correlates a result with the call that produced it.\n- The result `content` is inserted verbatim on the line after the header; callers typically pass a JSON string (e.g. `json.dumps(tool_result)`).\n- If the `tool` message omits `name`, the envelope becomes `<|im_system|>tool<|im_middle|>## Return of …`.\n\n## End-to-end example\n\nA complete multi-turn weather exchange. These are the exact rendered streams (system + user supplied explicitly; line breaks inside a turn are literal, turns are otherwise contiguous).\n\n**Stage 1 — prompt fed to the model** (`tools` set, `add_generation_prompt=True`):\n\n```text\n<|im_system|>tool_declare<|im_middle|>[{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"description\":\"Get weather information. Call this tool when the user needs to get weather information\",\"parameters\":{\"type\":\"object\",\"required\":[\"city\"],\"properties\":{\"city\":{\"type\":\"string\",\"description\":\"City name\"}}}}}]<|im_end|><|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|><|im_user|>user<|im_middle|>What's the weather like in Beijing today? Use the tool to check.<|im_end|><|im_assistant|>assistant<|im_middle|>\n```\n\n**Assistant generation** (model output; server reports `finish_reason: \"tool_calls\"`):\n\n```text\n<|tool_calls_section_begin|><|tool_call_begin|>functions.get_weather:0<|tool_call_argument_begin|>{\"city\": \"Beijing\"}<|tool_call_end|><|tool_calls_section_end|><|im_end|>\n```\n\n**Stage 2 — prompt for the next turn**, after appending the assistant tool-call turn and the tool result turn (`add_generation_prompt=True`):\n\n```text\n<|im_system|>tool_declare<|im_middle|>[{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"description\":\"Get weather information. Call this tool when the user needs to get weather information\",\"parameters\":{\"type\":\"object\",\"required\":[\"city\"],\"properties\":{\"city\":{\"type\":\"string\",\"description\":\"City name\"}}}}}]<|im_end|><|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|><|im_user|>user<|im_middle|>What's the weather like in Beijing today? Use the tool to check.<|im_end|><|im_assistant|>assistant<|im_middle|><|tool_calls_section_begin|><|tool_call_begin|>functions.get_weather:0<|tool_call_argument_begin|>{\"city\": \"Beijing\"}<|tool_call_end|><|tool_calls_section_end|><|im_end|><|im_system|>get_weather<|im_middle|>## Return of functions.get_weather:0\n{\"weather\": \"Sunny\"}<|im_end|><|im_assistant|>assistant<|im_middle|>\n```\n\n**Final assistant generation** (model produces natural-language answer terminated by `<|im_end|>`; `finish_reason: \"stop\"`):\n\n```text\nIt's sunny in Beijing today.<|im_end|>\n```\n\n## OpenAI-compatible API mapping\n\nWith a server parser active (`--tool-call-parser kimi_k2`), the raw stream maps onto the Chat Completions shape as follows:\n\n- `choices[].finish_reason` = `\"tool_calls\"` when the turn contained a tool-calls section (otherwise `\"stop\"`).\n- `choices[].message.tool_calls[]` — one entry per `<|tool_call_begin|>…<|tool_call_end|>` block:\n - `.id` = the raw call ID verbatim, e.g. `\"functions.get_weather:0\"`.\n - `.type` = `\"function\"`.\n - `.function.name` = the function name parsed out of the ID. vLLM computes `id.split(\":\")[0].split(\".\")[-1]` → `\"get_weather\"`.\n - `.function.arguments` = a **JSON string** (the raw text captured between `<|tool_call_argument_begin|>` and `<|tool_call_end|>`), e.g. `\"{\\\"city\\\": \\\"Beijing\\\"}\"`. Clients `json.loads()` it before use.\n- Tool results are sent back as messages of the form:\n\n ```json\n {\"role\": \"tool\", \"tool_call_id\": \"functions.get_weather:0\", \"name\": \"get_weather\", \"content\": \"{\\\"weather\\\": \\\"Sunny\\\"}\"}\n ```\n\n `tool_call_id` must equal the `id` returned for the call; `name` becomes the `<|im_system|>{name}<|im_middle|>` sub-role; `content` becomes the body after `## Return of …`.\n- Streaming: deltas arrive as `choices[].delta.tool_calls[]` with an `index`; the function `name`/`id` stream once the call header is complete, then `function.arguments` streams as incremental string fragments to be concatenated (standard OpenAI tool-call streaming assembly).\n\nMoonshot's hosted API (`platform.moonshot.ai`) exposes both OpenAI- and Anthropic-compatible endpoints; the Anthropic-compatible one scales temperature as `real_temperature = request_temperature * 0.6`. Recommended sampling temperature for `Kimi-K2-Instruct` is `0.6`.\n\n## Parsing notes & gotchas\n\n- **ID → name parsing differs between references.** The official `tool_call_guidance.md` extracts the name with `function_id.split('.')[1].split(':')[0]`, which assumes the ID is exactly `functions.{name}` with no extra dots. vLLM uses the more robust `function_id.split(\":\")[0].split(\".\")[-1]` (takes the last dot-segment before `:{idx}`). Prefer the vLLM form so function names containing `.` are handled.\n- **Extraction regexes differ too.** Guidance: `<\\|tool_call_begin\\|>\\s*(?P<tool_call_id>[\\w\\.]+:\\d+)\\s*<\\|tool_call_argument_begin\\|>\\s*(?P<function_arguments>.*?)\\s*<\\|tool_call_end\\|>`. vLLM: ID class is `[^<]+:\\d+` and the argument body uses a negative lookahead `(?:(?!<\\|tool_call_begin\\|>).)*?` so adjacent calls aren't merged. Both run with `DOTALL`.\n- **`skip_special_tokens` must be False.** The parser depends on the literal marker text surviving detokenization; vLLM forces `skip_special_tokens = False` when tools are enabled and `tool_choice != \"none\"`. If markers are stripped, no tool call is detected.\n- **Arguments are unvalidated raw text.** Whatever the model emits between the argument marker and `<|tool_call_end|>` is passed straight through as the `arguments` string; it must be valid JSON for downstream `json.loads`, and the model can emit malformed/truncated JSON. Validate before executing.\n- **Index semantics.** `{idx}` is the per-turn call counter starting at `0`; it is not a global counter and resets each assistant turn. Do not assume IDs are unique across turns — disambiguate by turn when persisting history.\n- **Streaming marker splits.** Section and call markers can be split across token boundaries. vLLM holds back any trailing suffix that partially matches a marker (`partial_tag_overlap`) to avoid leaking marker bytes into streamed content, and only streams a call's name once its header is fully received.\n- **`finish_reason` varies by engine.** The official guide explicitly warns the terminal `finish_reason` for tool calls \"may vary across different engines\"; loop on `finish_reason == \"tool_calls\"` but be defensive.\n- **Engine fallback.** Kimi K2 reuses the DeepSeek-V3 architecture; `config.json` sets `model_type: \"kimi_k2\"` so engines apply the right parser. If you force `model_type: \"deepseek_v3\"` as a compatibility workaround, no native Kimi tool parser is available and you must parse the `<|tool_calls_section_*|>` markers manually.\n- **Parser availability.** vLLM ships both a Python (`KimiK2ToolParser`) and a newer Rust tool parser; SGLang implements its own `kimi_k2` parser. All key off the same five markers and the `functions.{name}:{idx}` ID convention documented here.\n- **Whitespace artifact.** When no `system` message is supplied, the template injects the default system prompt and a small `\\n ` (newline + two spaces) can appear before the first `<|im_user|>` marker. It is harmless (tokenizes around the markers), but supplying an explicit system message yields the clean streams shown above.\n\n## Sources\n\n- Model card (Tool Calling section, OpenAI-style example, deployment/API notes): https://huggingface.co/moonshotai/Kimi-K2-Instruct\n- Official tool-call guidance (markers, ID convention, manual parser, `extract_tool_call_info`): https://raw.githubusercontent.com/MoonshotAI/Kimi-K2/main/docs/tool_call_guidance.md (the HF `resolve`/`blob` paths redirected to the model card; verified against this GitHub raw file)\n- Deployment guide (`--tool-call-parser kimi_k2`, `--enable-auto-tool-choice`, SGLang flag, `model_type` fallback): https://raw.githubusercontent.com/MoonshotAI/Kimi-K2/main/docs/deploy_guidance.md\n- Chat template (`chat_template.jinja`, rendered locally for byte-exact streams): https://huggingface.co/moonshotai/Kimi-K2-Instruct/resolve/main/chat_template.jinja\n- Tokenizer config (special-token IDs in `added_tokens_decoder`): https://huggingface.co/moonshotai/Kimi-K2-Instruct/resolve/main/tokenizer_config.json\n- vLLM `kimi_k2` tool parser (markers, regex, name-parsing, `skip_special_tokens`, streaming): https://github.com/vllm-project/vllm/blob/main/vllm/tool_parsers/kimi_k2_tool_parser.py\n- vLLM PR adding the parser: https://github.com/vllm-project/vllm/pull/20789\n- vLLM tool-calling docs: https://docs.vllm.ai/en/latest/features/tool_calling/\n",
81
+ "toolconv/pi-native.md": "# pi-native tool-call format\n\nThe **pi-native** format is the tool-call serialization used by the omp / pi coding agent. Unlike the JSON-in-a-tag conventions (Hermes/Qwen, Harmony) and unlike the fully separate JSON content-block channel (Anthropic Messages API), pi-native serializes each call as an **XML-flavored block** whose tag carries the tool name — `<call:NAME>…</call:NAME>` — and whose arguments are child elements named after the parameters. It is **schema-driven**: the tool's JSON Schema decides how each value is typed (string vs number vs object vs array) and which compact spellings are legal.\n\nThis document is a **specification** of the format (it is the contract a renderer must emit and a parser must accept), not a reverse-engineering of trained model weights. It is designed around four goals:\n\n- **Token economy** — the common cases (a single scalar argument; a single string payload) collapse to one short line.\n- **Verbatim payloads** — a large multi-line string argument (a patch body, a file's contents, a shell script) is carried **raw**, with no JSON string-escaping and no entity-encoding, terminated by the call's own unique closing tag.\n- **Human legibility** — a call reads like the function it denotes; nesting maps to nesting.\n- **Lenient parsing** — the tags are plain text matched by a tolerant parser (regex / streaming state machine), not a strict XML parser; output is *not* required to be well-formed XML.\n\nScope: pi-native specifies only the **tool-call (and argument) serialization**. It is **envelope-agnostic** — the `<call:…>` blocks are emitted as ordinary assistant text and embed unchanged in any conversation envelope (ChatML, Harmony, the Anthropic two-role shape, …). Reasoning channels, role markers, and result delivery are the host envelope's concern; the one envelope-level requirement pi-native imposes is in [Tool-result correlation](#tool-result-correlation).\n\nLineage: the attribute spelling (`<call:read path=\"…\"/>`) follows Anthropic's modern attribute XML (`<invoke name=\"…\">` / `<parameter name=\"…\">`); the schema-driven **unquoted** value rule (a bare string value carries no quotes; non-strings are JSON) follows GLM-4.5's `<arg_value>` convention. pi-native folds both into one recursive, name-on-the-tag grammar and adds the verbatim **inline body** for bulk string arguments. See [`anthropic.md`](./anthropic.md) and [`glm-4.5.md`](./glm-4.5.md) in this folder.\n\n## Structural tags\n\npi-native has **no special tokens**. Every marker is plain UTF-8 text that BPE-splits like any other text and survives detokenization unchanged; a parser matches the tags as literal substrings (and MUST work even when the surrounding stream is not valid XML). All brackets are ASCII `<` `>` `/` and the literal colon `:`. There are no namespaces, no `<?xml?>` prolog, no entity expansion, and no CDATA sections.\n\n| Tag (verbatim) | Role |\n|---|---|\n| `<call:NAME …>` … `</call:NAME>` | One tool call. `NAME` is the tool/recipient name; it is repeated on the closing tag. |\n| `<call:NAME …/>` | Self-closing tool call (all arguments supplied as attributes). |\n| `<KEY>` … `</KEY>` | One argument (or one nested field). `KEY` is the parameter name. |\n| `<KEY …/>` | Self-closing argument: an object-valued field whose scalar sub-fields are attributes, or an empty value. |\n| `KEY=\"…\"` / `KEY='…'` / `KEY=…` | An attribute: a scalar field given inline on a tag. Quotes are **delimiters, not type markers** (see [value coercion](#value-coercion)). |\n\nNames (tool names and parameter names) match `^[A-Za-z_][A-Za-z0-9_-]*$`. The `call:` prefix is a literal four-character marker plus the colon; the colon is what distinguishes a call block from a nested argument element of the same name.\n\n## Tool-call forms\n\nA single call has three interchangeable surface forms. Which forms are legal for a given tool is decided by its parameter schema; a renderer SHOULD pick the most compact legal form, and a parser MUST accept all three.\n\n### 1. Element form (canonical, fully general)\n\nEach top-level argument is a child element named after the parameter; the element body is the value. This form expresses every schema — scalars, strings, arrays, and nested objects:\n\n```text\n<call:read>\n<path>src/server/auth.ts</path>\n<offset>50</offset>\n</call:read>\n```\n\n→ `read({ \"path\": \"src/server/auth.ts\", \"offset\": 50 })` (`offset` is JSON because the schema types it as a number; `path` is a verbatim string).\n\n### 2. Attribute form (compact scalars)\n\nWhen the arguments being passed are **top-level scalars** (string, number, integer, boolean, null), they MAY be written as attributes on the call tag. With every argument as an attribute the tag is self-closing:\n\n```text\n<call:read path=\"src/server/auth.ts\"/>\n```\n\n→ `read({ \"path\": \"src/server/auth.ts\" })`.\n\nAttributes and child elements MAY be combined on a non-self-closing call tag — attributes carry the scalars, child elements carry anything structured:\n\n```text\n<call:read path=\"src/server/auth.ts\">\n<offset>50</offset>\n</call:read>\n```\n\nAn attribute whose value cannot be represented as a scalar (an object or array argument) MUST use the element form instead — there is no attribute spelling for structured values on a call tag.\n\n### 3. Inline-body form (verbatim string payload)\n\nWhen the tool's parameters are **all strings** — most often a single string parameter — the call body MAY be the argument value written **verbatim**, with no child element tags:\n\n```text\n<call:edit>\n*** Begin Patch\n@@ src/server/auth.ts\n- return user;\n+ return user ?? null;\n*** End Patch\n</call:edit>\n```\n\n→ `edit({ \"input\": \"*** Begin Patch\\n@@ src/server/auth.ts\\n- return user;\\n+ return user ?? null;\\n*** End Patch\" })`.\n\nRules for the inline body:\n\n- The body fills the **first parameter not already supplied by an attribute**. With no attributes that is simply the first parameter (the \"first argument verbatim\").\n- It is permitted only when that target parameter is **string**-typed (the enabling condition \"the type only contains string arguments\"); any *other* parameters set on the same call MUST be scalars given as attributes.\n- The value is captured **verbatim** up to the call's own closing tag `</call:NAME>`. No JSON escaping, no entity decoding. The body MAY freely contain `<`, `>`, `&`, quotes, JSON, even other `<call:…>`-looking text — the only sequence it MUST NOT contain is the literal closer `</call:NAME>`. Because that closer carries the tool name, collisions are far rarer than with a short generic delimiter.\n- Whitespace: a single newline immediately after the opening `>` and a single newline immediately before the closing `</` are treated as block delimiters and are **not** part of the value; all other whitespace (indentation, blank lines, trailing spaces) is preserved exactly.\n\nA multi-string tool can still use the inline body for its bulk argument by passing the others as attributes — the body then fills the first parameter left unset:\n\n```text\n<call:write path=\"notes/todo.md\">\n# TODO\n- ship pi-native parser\n</call:write>\n```\n\n→ `write({ \"path\": \"notes/todo.md\", \"content\": \"# TODO\\n- ship pi-native parser\" })` (here `path` is given by attribute, so the body fills the next string parameter, `content`).\n\nInline-eligible tools may always fall back to the element form; `<call:edit><input>…</input></call:edit>` and the inline `<call:edit>…</call:edit>` are equivalent.\n\n## Value model\n\nThe body of a call (and of any nested element) maps to JSON by a single recursive rule set. **Typing is driven by the parameter's JSON Schema**; the parser only falls back to syntactic heuristics when no schema is available.\n\n### Value coercion\n\nLet `coerce(text, type)` produce the JSON value for a captured scalar `text`:\n\n- `type == \"string\"` → the value is `text`, **verbatim** (never JSON-parsed, never unquoted). This is why `<path>4</path>` for a string parameter is the string `\"4\"`, and a Windows path `C:\\new\\tab` survives intact.\n- `type` is a non-string scalar (`number` / `integer` / `boolean` / `null`) → `JSON.parse(text)` (so `<offset>50</offset>` → `50`, `<recursive>true</recursive>` → `true`).\n- `type` unknown (no schema) → **best-effort JSON coercion**: try `JSON.parse(text)`; on success use the parsed value (number, boolean, null, quoted-string, object, or array); on failure treat `text` as a literal string. So a bare `4` becomes the number `4`, `foo.ts` (not valid JSON) becomes `\"foo.ts\"`.\n\nThe same `coerce` applies to **attribute values** after the surrounding quotes (if any) are stripped — the quotes are XML delimiters only. Hence both spellings below are identical, and both yield the **number** `4` (not the string `\"4\"`) when `y` is untyped/numeric:\n\n```text\n<object y=4/> → { \"object\": { \"y\": 4 } }\n<object y=\"4\"/> → { \"object\": { \"y\": 4 } }\n```\n\nConsequence to internalize: under loose/no schema, quoting does **not** force a string — `\"4\"` still coerces to `4`. To carry a numeric-looking value *as a string*, the parameter MUST be `string`-typed in the schema (then the verbatim rule keeps `\"4\"` → `\"4\"`). Unquoted attribute values run until whitespace or the closing `>` / `/>`; spaces around `=` are tolerated; a bare attribute with no `=value` (e.g. `<call:tool dry_run/>`) denotes boolean `true`.\n\n### Scalars and strings\n\nA scalar argument is one element (or one attribute). String values are unquoted and verbatim; non-string scalars are JSON literals:\n\n```text\n<call:bash command=\"ls -la\" timeout=30/>\n```\n\n→ `bash({ \"command\": \"ls -la\", \"timeout\": 30 })`.\n\n### Arrays — repeat the element\n\nAn array-typed field is expressed by **repeating** its element; each occurrence contributes one item, in order:\n\n```text\n<list>x</list>\n<list>y</list>\n```\n\n→ `\"list\": [\"x\", \"y\"]`.\n\nA field the schema types as an **array always yields an array, even for a single occurrence** — so one `<list>x</list>` under an array-typed `list` is `[\"x\"]`, not `\"x\"`. When no schema is available the parser falls back to a count heuristic: a name appearing **2+ times among its siblings** is an array; a name appearing **once** is a scalar (so schema typing is the only way to express a one-element array with the heuristic alone). Item values coerce by the array's item type (`<ports>80</ports><ports>443</ports>` → `[80, 443]` for a `number[]`); arrays of objects repeat a nested block (see below). There is no attribute spelling for an array (attributes cannot repeat) — arrays require element form.\n\n### Objects — a nested block\n\nAn object-typed field opens its own block and follows the **same rules recursively**: its child elements become its properties, repeated children become arrays, and nested object children open further blocks.\n\n```text\n<object>\n<list>x</list>\n</object>\n```\n\n→ `\"object\": { \"list\": [\"x\"] }` (with `object` typed object and `list` typed array).\n\nAn object's **scalar** sub-fields MAY instead be written as attributes — `<object y=4/>` is shorthand for `<object><y>4</y></object>`. Attributes and child elements may be combined on the same object element (attributes for scalars, children for structured sub-fields). An empty object is `<object/>` or `<object></object>` → `{}`.\n\n### Recursion\n\nThe call body, an object element's body, and an array item's body are all parsed by the identical procedure. Parsing element `E` (tag = field name `F`, schema type `T`):\n\n1. Gather `E`'s attributes → scalar properties via `coerce`.\n2. Determine `E`'s body shape from `T` (or, with no schema, from whether the body's first non-whitespace content is a child tag):\n - `T` object → properties from child elements (+ the attributes from step 1).\n - `T` array (item type `Ti`) → collect **all** siblings named `F`; each occurrence is one item parsed as `Ti`.\n - `T` scalar/string → the body is captured text; value = `coerce(text, T)`.\n3. The call itself is element `E` with no enclosing key: its attributes + child elements **are** the arguments object directly (the tool name on `<call:NAME>` is the recipient, not a key).\n\n## Multiple / parallel tool calls\n\nThere is no wrapper element around a set of calls. Parallel calls are simply **consecutive `<call:…>` blocks** in one assistant turn (separated by whitespace/newlines; interleaved prose is allowed and is ordinary content):\n\n```text\n<call:read path=\"src/a.ts\"/>\n<call:read path=\"src/b.ts\"/>\n```\n\nA parser returns these as `tool_calls[0]`, `tool_calls[1]`, … in emission order. The host executes them and returns one result per call, in the same order (see correlation, next).\n\n## Tool definitions and schema dependence\n\npi-native does not prescribe how tools are advertised; a host typically lists them as JSON Schema, exactly as the OpenAI / Anthropic / Hermes families do. What pi-native **requires** is that the parser have access to each tool's parameter schema, because the schema is what disambiguates:\n\n- string (verbatim, unquoted) vs other scalar (JSON) values;\n- a one-element array vs a scalar (a single `<list>…</list>`);\n- which body shape (text vs nested members) a non-self-closing element carries;\n- whether the inline-body form is legal (first unset parameter is a string).\n\nWithout a schema the parser MUST degrade gracefully to the syntactic fallbacks named above (JSON-coerce scalars; repetition-counts for arrays; child-tag presence for object bodies). The fallbacks are lossy at exactly the ambiguous points the schema would resolve, so production hosts SHOULD always supply the schema.\n\n## Tool-result correlation\n\npi-native calls carry **no per-call wire id** (like GLM and Qwen, unlike Anthropic's `toolu_…`). Results are therefore correlated to calls **positionally, by emission order**: the host delivers tool outputs in the same order the `<call:…>` blocks appeared, using whatever its envelope provides for tool output (a `tool`/`user` turn, a Harmony tool message, an Anthropic `tool_result` block, …). When a transport requires an id (e.g. an OpenAI-compatible bridge), the host synthesizes one and maintains the call↔result mapping itself; the id never appears in the pi-native text.\n\n## End-to-end example\n\nA short agent turn exercising all three forms plus nesting. Schemas in play: `read(path: string, offset?: number)`, `bash(command: string, timeout?: number)`, `edit(input: string)`, and a synthetic `configure(object: { list: string[]; y?: number })`.\n\n```text\nI'll inspect the file, run the tests, then apply the fix.\n\n<call:read path=\"src/server/auth.ts\"/>\n\n<call:bash command=\"bun test src/server/auth.test.ts\" timeout=120/>\n\n<call:configure>\n<object y=4>\n<list>alpha</list>\n<list>beta</list>\n</object>\n</call:configure>\n\n<call:edit>\n*** Begin Patch\n@@ src/server/auth.ts\n- return user;\n+ return user ?? null;\n*** End Patch\n</call:edit>\n```\n\nParses to four calls, in order:\n\n```json\n[\n { \"name\": \"read\", \"arguments\": { \"path\": \"src/server/auth.ts\" } },\n { \"name\": \"bash\", \"arguments\": { \"command\": \"bun test src/server/auth.test.ts\", \"timeout\": 120 } },\n { \"name\": \"configure\", \"arguments\": { \"object\": { \"y\": 4, \"list\": [\"alpha\", \"beta\"] } } },\n { \"name\": \"edit\", \"arguments\": { \"input\": \"*** Begin Patch\\n@@ src/server/auth.ts\\n- return user;\\n+ return user ?? null;\\n*** End Patch\" } }\n]\n```\n\nNote: `timeout=120` and `y=4` are JSON numbers (numeric/untyped scalars), `path` and the `list` items are verbatim strings (string-typed), `object` opens a nested block whose `y` rides as an attribute while `list` repeats into an array, and the `edit` body is captured verbatim up to `</call:edit>` despite containing `@@`, `-`/`+`, and other non-XML text.\n\n## Grammar (lenient EBNF)\n\nThis is the shape a tolerant parser accepts; it is intentionally looser than XML (mismatched-but-recoverable tails are closed heuristically — see gotchas).\n\n```ebnf\nstream ::= ( text | call )*\ncall ::= self-call | block-call\nself-call ::= \"<call:\" Name attr* ws? \"/>\"\nblock-call ::= \"<call:\" Name attr* \">\" call-body \"</call:\" Name \">\"\ncall-body ::= members | inline-text ; inline-text only if first param is string\nmembers ::= ( ws | element )*\nelement ::= self-element | block-element\nself-element ::= \"<\" Name attr* ws? \"/>\" ; object via attrs, or empty value\nblock-element::= \"<\" Name attr* \">\" ( members | scalar-text ) \"</\" Name \">\"\nattr ::= ws Name ( ws? \"=\" ws? attr-val )? ; bare Name → boolean true\nattr-val ::= '\"' dq-chars '\"' | \"'\" sq-chars \"'\" | bareword\nName ::= [A-Za-z_] [A-Za-z0-9_-]*\nscalar-text ::= < any chars up to the matching close tag, verbatim >\ninline-text ::= < any chars up to \"</call:\" Name \">\", verbatim >\n```\n\n## Parsing notes & gotchas\n\n- **Schema decides string-vs-JSON.** A `string`-typed value is verbatim and unquoted; everything else is JSON. With no schema, scalars best-effort JSON-coerce and fall back to string. This is the single most error-prone rule (identical to GLM-4.5's unquoted strings): emitting `\"San Francisco\"` for a string parameter yields the literal value *including the quote characters*.\n- **Quotes are delimiters, not types.** `y=\"4\"` and `y=4` both coerce to the number `4` under loose/no schema. Quoting an attribute never makes it a string; only a `string` schema type does.\n- **Arrays = repetition; single-element arrays need the schema.** Two same-named siblings is unambiguously an array. One occurrence is a scalar under the count heuristic and an array only because the schema says so — a parser without the schema cannot tell `<list>x</list>` (scalar) from a one-element array.\n- **Verbatim bodies are delimited by the named closer.** The inline body and any `string`-typed element body are captured up to their matching `</call:NAME>` / `</KEY>`. A body that contains that exact closing sequence truncates early; there is no escaping mechanism. The inline body's risk is minimal because the delimiter includes the tool name (`</call:edit>`), but a short string-typed *element* (e.g. `<note>…</note>`) is more exposed — prefer the inline-body form for any value that might contain markup, or keep such values in the single-string inline payload.\n- **Element form vs inline body.** A block call whose body's first non-whitespace content is a child tag matching a known parameter is parsed as element form; otherwise (all-string tool) it is the inline body. A string value that legitimately *starts* with a `<param>`-looking token is the one ambiguity — emit such a tool in element form, or rely on the schema (a tool with structured params is never inline-eligible).\n- **No ids; order is the contract.** Calls carry no id; results MUST be returned in call order. Reordering results silently misattributes them.\n- **Lenient, not strict XML.** Do not feed pi-native to an XML parser: tag names contain a colon (`call:read`), attribute values may be unquoted, bodies are not entity-encoded, and the stream need not be balanced beyond each call's own open/close. Match the tags as literals (regex / streaming state machine).\n- **Streaming.** A stateful parser emits the tool name as soon as `<call:NAME` closes, then streams attribute/child deltas; for an inline body it streams body text incrementally and holds back any partial trailing `</call:` until it can decide whether it is the closer. Coercion of a scalar can only finalize at the value's close tag (a partial number/boolean is not yet valid JSON).\n- **Whitespace.** Element/inline bodies preserve all whitespace except one leading and one trailing newline that delimit the block. Attribute and indentation whitespace between tags is insignificant.\n\n## Sources\n\npi-native is specified here; it is not derived from a published model template. Its two direct influences are documented in this folder:\n\n- Anthropic attribute XML (`<invoke name=\"…\">` / `<parameter name=\"…\">`, \"parsed with regular expressions\", not required to be valid XML): [`anthropic.md`](./anthropic.md).\n- GLM-4.5 schema-driven, **unquoted** string values vs JSON non-strings, and positional (id-less) call↔result correlation: [`glm-4.5.md`](./glm-4.5.md).\n",
82
+ "toolconv/qwen3.md": "# Qwen3 tool-calling format (Hermes convention)\n\nTool-calling convention of Alibaba's **Qwen3** family (`Qwen/Qwen3-*`: dense `0.6B–32B` and MoE `30B-A3B`/`235B-A22B`; same template line as `Qwen2.5-*` and `QwQ-32B`). It is the **Hermes** convention — the XML+JSON format originated by NousResearch's Hermes 2 Pro and adopted verbatim by Qwen, plus a long tail of community fine-tunes. The envelope is **ChatML**: every turn is `<|im_start|>{role}\\n{body}<|im_end|>\\n`. Available tools are advertised in the system turn inside a `<tools>…</tools>` block (one JSON spec per line); the model emits each call as a `<tool_call>\\n{json}\\n</tool_call>` block whose `arguments` is a **nested JSON object** (not a stringified JSON); tool results are fed back inside `<tool_response>…</tool_response>`. Hybrid reasoning is carried in `<think>…</think>`. The format ships in the model's own `chat_template`, so an inference server enables it with no extra template: vLLM uses `--enable-auto-tool-choice --tool-call-parser hermes` (pair with `--reasoning-parser deepseek_r1` for the thinking split); SGLang exposes the matching parsers (e.g. `--reasoning-parser qwen3`).\n\nVerified against: Qwen's canonical function-calling guide (`qwen.readthedocs.io/en/latest/framework/function_call.html`, read in full incl. the Qwen-Agent + vLLM sections), the byte-exact `chat_template` field of `Qwen/Qwen3-8B`'s `tokenizer_config.json` (HF resolve-cache commit `b968826d9c46dd6066d109eabc6255188de91218`, rendered locally with Jinja2 for the raw streams below) and its `added_tokens_decoder` for token IDs, the NousResearch `Hermes-Function-Calling` README, and the vLLM tool-calling docs (`hermes` parser + Qwen models section).\n\n## Special tokens\n\nOnly the three ChatML markers are \"special\" control tokens (`special=true`, skipped by `skip_special_tokens`). The reasoning and tool markers are also single vocabulary tokens (one ID each) but are registered with `special=false`, i.e. they render as ordinary text and are **not** stripped by `skip_special_tokens`. The `<tools>`/`</tools>` wrapper has **no** dedicated token at all — it is plain text that BPE-splits into several tokens. IDs are from `Qwen/Qwen3-8B` `added_tokens_decoder`.\n\n| Token (verbatim) | ID | `special` | Purpose |\n|---|---|---|---|\n| `<\\|im_start\\|>` | 151644 | true | Start of a turn; followed immediately by the role name + `\\n` |\n| `<\\|im_end\\|>` | 151645 | true | End of a turn; the chat stop token |\n| `<\\|endoftext\\|>` | 151643 | true | Base EOS / pad token |\n| `<think>` | 151667 | false | Opens the reasoning block |\n| `</think>` | 151668 | false | Closes the reasoning block |\n| `<tool_call>` | 151657 | false | Opens one tool call |\n| `</tool_call>` | 151658 | false | Closes one tool call |\n| `<tool_response>` | 151665 | false | Opens one tool result |\n| `</tool_response>` | 151666 | false | Closes one tool result |\n| `<tools>` … `</tools>` | — | — | Plain text wrapper around the tool list in the system turn (not a single token) |\n\nNotes on exactness:\n- All markers use the ASCII pipe `|` (U+007C) and ASCII angle brackets. Qwen3 has **no** fullwidth (`|` U+FF5C) or `▁` (U+2581) variants — that is DeepSeek/SentencePiece territory, not Qwen.\n- `<|im_start|>` and `<|im_end|>` are the only tokens that matter for splitting turns. Because `<tool_call>`, `</tool_call>`, `<tool_response>`, `<think>`, `</think>` are `special=false`, they survive a `skip_special_tokens=True` decode, which is exactly why the regex-based `hermes` parser can recover them from decoded text.\n- The model card confirms `</think>` = token `151668` (used by the reference parsing snippet `output_ids[::-1].index(151668)`).\n\n## Roles / channels / turn structure\n\nChatML. Each message renders as:\n\n```text\n<|im_start|>{role}\n{body}<|im_end|>\n```\n\n- Roles: `system`, `user`, `assistant`, `tool`. There is no separate \"channel\" concept; the only sub-stream is the `<think>` reasoning block inside an assistant turn.\n- `<|im_end|>\\n` terminates every turn. With `add_generation_prompt=True` the prompt ends with `<|im_start|>assistant\\n` and the model continues from there.\n- **System turn:** if the caller supplies a `system` message it becomes the first turn. When `tools` are present, the tool advertisement is merged **into** that same system turn (the user's system text first, then `\\n\\n`, then the `# Tools` block — see below). Qwen3 injects no default system prompt when none is given.\n- **Tool-result turns use the `user` envelope.** Qwen3's template maps every `role: \"tool\"` message into a `<|im_start|>user` turn carrying `<tool_response>` blocks (consecutive tool messages are coalesced into one user turn). This differs from classic Hermes 2 Pro, which used a dedicated `<|im_start|>tool` turn for results — Qwen folds them into `user`.\n- **Thinking/reasoning:** carried in `<think>…</think>` at the start of an assistant turn (see the Parsing notes for the toggle and the history-rerender rule).\n\n## Tool definitions\n\nTools are advertised inside the system turn. The template emits a fixed preamble, then each tool object serialized with `tool | tojson` (`json.dumps(..., ensure_ascii=False)`) on **its own line**, then a fixed trailer. Each list element is the full OpenAI tool object `{\"type\": \"function\", \"function\": {...}}` (with a JSON-Schema `parameters` object). The exact, verbatim wrapper Qwen3 produces:\n\n```text\n<|im_start|>system\n{optional original system content}\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>\n{\"type\": \"function\", \"function\": {\"name\": \"get_current_temperature\", \"description\": \"Get current temperature at a location.\", \"parameters\": {\"type\": \"object\", \"properties\": {\"location\": {\"type\": \"string\", \"description\": \"The location to get the temperature for, in the format \\\"City, State, Country\\\".\"}, \"unit\": {\"type\": \"string\", \"enum\": [\"celsius\", \"fahrenheit\"], \"description\": \"The unit to return the temperature in. Defaults to \\\"celsius\\\".\"}}, \"required\": [\"location\"]}}}\n{\"type\": \"function\", \"function\": {\"name\": \"get_temperature_date\", \"description\": \"Get temperature at a location and date.\", \"parameters\": {\"type\": \"object\", \"properties\": {\"location\": {\"type\": \"string\", \"description\": \"The location to get the temperature for, in the format \\\"City, State, Country\\\".\"}, \"date\": {\"type\": \"string\", \"description\": \"The date to get the temperature for, in the format \\\"Year-Month-Day\\\".\"}, \"unit\": {\"type\": \"string\", \"enum\": [\"celsius\", \"fahrenheit\"], \"description\": \"The unit to return the temperature in. Defaults to \\\"celsius\\\".\"}}, \"required\": [\"location\", \"date\"]}}}\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n```\n\n- If the first message is a `system` message, its content is placed before `# Tools` (separated by a blank line); otherwise the turn opens straight into `# Tools`.\n- The trailing instruction is a literal part of the prompt, including the placeholder line `{\"name\": <function-name>, \"arguments\": <args-json-object>}` (those angle-bracket tokens are instructions, not emitted output).\n- Version note: the original Hermes 2 Pro system prompt additionally embedded a `FunctionCall` pydantic schema line (`{\"title\": \"FunctionCall\", \"type\": \"object\", \"properties\": {\"name\": …, \"arguments\": …}}`). Qwen3 dropped that line; the wrapper above is exactly what Qwen3 emits.\n\n## Tool-call format\n\nThe model emits each call as a `<tool_call>` line, a single-line JSON object, then `</tool_call>`. Minimal single call:\n\n```text\n<tool_call>\n{\"name\": \"get_current_temperature\", \"arguments\": {\"location\": \"San Francisco, CA, USA\", \"unit\": \"celsius\"}}\n</tool_call>\n```\n\n- `arguments` is a **nested JSON object**, not a JSON-encoded string. On the wire it is `\"arguments\": {\"location\": \"...\"}` — never `\"arguments\": \"{\\\"location\\\": ...}\"`. (The template renders a dict argument via `tojson`; only if a caller stored `arguments` as a pre-serialized string does it pass through verbatim.)\n- The call object has exactly two keys, `name` (string) and `arguments` (object). There is no per-call ID on the wire — the OpenAI-style `tool_call_id` is minted by the server, not the model (see API mapping).\n- A tool-calling assistant turn may also contain natural-language `content` before the first `<tool_call>`; the template inserts a `\\n` between that content and the first call.\n\n## Multiple / parallel tool calls\n\nParallel calls are emitted as consecutive `<tool_call>…</tool_call>` blocks within a single assistant turn, each separated by a newline:\n\n```text\n<|im_start|>assistant\n<tool_call>\n{\"name\": \"get_current_temperature\", \"arguments\": {\"location\": \"San Francisco, CA, USA\"}}\n</tool_call>\n<tool_call>\n{\"name\": \"get_temperature_date\", \"arguments\": {\"location\": \"San Francisco, CA, USA\", \"date\": \"2024-10-01\"}}\n</tool_call><|im_end|>\n```\n\nThe parser returns these as `tool_calls[0]`, `tool_calls[1]`, … in emission order. The application must execute them and return one `<tool_response>` per call, in the same order.\n\n## Tool-result format\n\nEach executed result is wrapped in `<tool_response>…</tool_response>`. Qwen3 places them inside a **`user`** turn, and **coalesces** consecutive tool results into one turn (one `<tool_response>` block per result, newline-separated, a single closing `<|im_end|>`):\n\n```text\n<|im_start|>user\n<tool_response>\n{\"temperature\": 26.1, \"location\": \"San Francisco, CA, USA\", \"unit\": \"celsius\"}\n</tool_response>\n<tool_response>\n{\"temperature\": 25.9, \"location\": \"San Francisco, CA, USA\", \"date\": \"2024-10-01\", \"unit\": \"celsius\"}\n</tool_response><|im_end|>\n```\n\n- The body between the tags is the tool's return value (typically a JSON string, but any text is allowed). The function name is **not** repeated inside Qwen3's `<tool_response>` — ordering ties results to calls. (Classic Hermes 2 Pro instead nested `{\"name\": ..., \"content\": ...}` inside `<tool_response>` under a `tool` turn; Qwen3's template emits the bare content under a `user` turn.)\n- At the OpenAI API layer a result message is `{\"role\": \"tool\", \"content\": \"...\", \"tool_call_id\": \"...\"}`; the template renders only its `content` into a `<tool_response>` block.\n\n## End-to-end example\n\nComplete multi-turn weather exchange in **non-thinking mode** (`enable_thinking=False`), exactly as `apply_chat_template` renders it for the live flow. With thinking disabled, each generation step injects an empty `<think>\\n\\n</think>\\n\\n` after `<|im_start|>assistant\\n`; the model then emits its tool call / final answer. Copy-pasteable, byte-exact:\n\n```text\n<|im_start|>system\nYou are a helpful assistant. Current Date: 2024-09-30.\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>\n{\"type\": \"function\", \"function\": {\"name\": \"get_current_temperature\", \"description\": \"Get current temperature at a location.\", \"parameters\": {\"type\": \"object\", \"properties\": {\"location\": {\"type\": \"string\", \"description\": \"The location to get the temperature for, in the format \\\"City, State, Country\\\".\"}, \"unit\": {\"type\": \"string\", \"enum\": [\"celsius\", \"fahrenheit\"], \"description\": \"The unit to return the temperature in. Defaults to \\\"celsius\\\".\"}}, \"required\": [\"location\"]}}}\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n<|im_start|>user\nWhat's the temperature in San Francisco now?<|im_end|>\n<|im_start|>assistant\n<think>\n\n</think>\n\n<tool_call>\n{\"name\": \"get_current_temperature\", \"arguments\": {\"location\": \"San Francisco, CA, USA\", \"unit\": \"celsius\"}}\n</tool_call><|im_end|>\n<|im_start|>user\n<tool_response>\n{\"temperature\": 26.1, \"location\": \"San Francisco, CA, USA\", \"unit\": \"celsius\"}\n</tool_response><|im_end|>\n<|im_start|>assistant\n<think>\n\n</think>\n\nThe current temperature in San Francisco is 26.1°C.<|im_end|>\n```\n\nIn **thinking mode** (`enable_thinking=True`, the default) the generation prompt instead ends with a bare `<|im_start|>assistant\\n` and the model itself produces the `<think>…real reasoning…</think>` block before the `<tool_call>`. (When re-rendering stored history, the template keeps the `<think>` block only for the last assistant message or messages that carry `reasoning_content`, and strips reasoning from earlier turns — see Parsing notes.)\n\n## OpenAI-compatible API mapping\n\nWith `--enable-auto-tool-choice --tool-call-parser hermes`, vLLM converts the raw stream into a standard Chat Completions response:\n\n- `finish_reason`: `\"tool_calls\"` when the turn ended on tool calls (otherwise `\"stop\"`).\n- `message.role`: `\"assistant\"`; `message.content`: `null` for a pure tool-call turn (any pre-call prose becomes `content`).\n- `message.tool_calls[]`: one entry per `<tool_call>` block, each:\n - `id`: server-generated, e.g. `\"chatcmpl-tool-924d705adb044ff88e0ef3afdd155f15\"` (the model emits no ID).\n - `type`: `\"function\"`.\n - `function.name`: the call's `name`.\n - `function.arguments`: a **JSON string** at the API boundary, e.g. `'{\"location\": \"San Francisco, CA, USA\"}'`. The wire format is a nested object, but the server re-serializes it to a string here (`json.loads(...)` it before use), matching OpenAI and Qwen-Agent.\n- With thinking + `--reasoning-parser deepseek_r1`, the `<think>…</think>` content is split out into `message.reasoning_content` and removed from `content`.\n- Feeding results back: append `{\"role\": \"tool\", \"content\": <result>, \"tool_call_id\": <id-from-the-call>}` for each result. `tool_call_id` links a result to its call (Qwen3's template ignores the id when rendering — ordering is what reaches the model — but the API still requires it).\n\nExample assistant message returned for the two-call query:\n\n```text\nfinish_reason='tool_calls'\nmessage.content = None\nmessage.tool_calls = [\n {id:'chatcmpl-tool-924d…', type:'function', function:{name:'get_current_temperature', arguments:'{\"location\": \"San Francisco, CA, USA\"}'}},\n {id:'chatcmpl-tool-7e30…', type:'function', function:{name:'get_temperature_date', arguments:'{\"location\": \"San Francisco, CA, USA\", \"date\": \"2024-10-01\"}'}},\n]\n```\n\n## Parsing notes & gotchas\n\n- **Arguments object vs string:** on the wire `arguments` is a nested JSON object; the OpenAI layer hands it back as a JSON string. Code that reads the raw stream must parse an object; code that reads the API must `json.loads` the string. Do not double-encode.\n- **`<tools>` is not a token.** Only count on `<|im_start|>`/`<|im_end|>` (and the `*tool_call*`/`*tool_response*`/`*think*` single tokens) being atomic. `<tools>`/`</tools>` are plain text.\n- **Regex/streaming parse:** the vLLM `hermes` parser (`vllm/tool_parsers/hermes_tool_parser.py`, `Hermes2ProToolParser`) keys on the literal `<tool_call>` / `</tool_call>` substrings and JSON-decodes the body, supporting multiple blocks per turn. In streaming it buffers from `<tool_call>` until it can incrementally parse `name` then `arguments`; partial argument JSON is emitted as argument deltas. Text before the first `<tool_call>` is streamed as ordinary content.\n- **Thinking toggle:** `enable_thinking=False` (passed via `chat_template_kwargs={\"enable_thinking\": False}` over the OpenAI API, or `tokenizer.apply_chat_template(..., enable_thinking=False)`) injects an empty `<think>\\n\\n</think>\\n\\n` into the generation prompt, hard-suppressing reasoning. Soft switches `/think` and `/no_think` in a user/system message flip it per-turn when thinking is enabled. Greedy decoding is discouraged for Qwen3 (repetition risk).\n- **History rerender asymmetry:** when `apply_chat_template` re-renders a stored conversation, it emits the `<think>` block only for the final assistant message or messages carrying `reasoning_content`; reasoning from earlier turns is dropped. So a stored intermediate tool-call assistant turn shows no `<think>` block, while the live generation step that produced it was prefixed with one (in non-thinking mode). Reasoning is preserved only within the current multi-step tool sequence (after the last real user query).\n- **Reasoning models + stopword templates:** Qwen warns against ReAct-style stopword tool templates for Qwen3, since reasoning text may contain the stopwords and corrupt parsing — use this native Hermes template instead.\n- **Robustness:** the format is prompt/template-driven, so malformed output is possible (truncated JSON, missing `</tool_call>`, prose mixed into a call, an array serialized as a string). Production parsers should tolerate and, on failure, fall back to treating the text as content. Named / `required` tool_choice routes through vLLM's structured-outputs backend for guaranteed-parseable arguments.\n- **Version/scope:** this `hermes` template covers `Qwen3-*`, `Qwen2.5-*`, and `QwQ-32B`. It does **not** cover `Qwen3-Coder`, which uses a different XML scheme parsed by vLLM's `qwen3_xml` parser — a separate convention.\n\n## Sources\n\n- Qwen function-calling guide: https://qwen.readthedocs.io/en/latest/framework/function_call.html\n- Qwen3-8B chat template + token IDs (`tokenizer_config.json`, `chat_template` + `added_tokens_decoder`): https://huggingface.co/Qwen/Qwen3-8B/resolve/main/tokenizer_config.json (verified via HF resolve-cache commit `b968826d9c46dd6066d109eabc6255188de91218`)\n- Qwen3-8B model card (thinking modes, `enable_thinking`, `</think>`=151668): https://huggingface.co/Qwen/Qwen3-8B\n- NousResearch Hermes-Function-Calling (origin of the convention): https://github.com/NousResearch/Hermes-Function-Calling\n- vLLM tool-calling docs (`hermes` parser, Qwen models, auto tool choice): https://docs.vllm.ai/en/latest/features/tool_calling/\n",
76
83
  "tools/ask.md": "# ask\n\n> Prompts the interactive user for one or more option-picker or free-form answers.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/ask.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/ask.md`\n- Key collaborators:\n - `packages/coding-agent/src/config/settings-schema.ts` — `ask.timeout` / `ask.notify` defaults\n - `packages/coding-agent/src/modes/theme/theme.ts` — checkbox and tree glyphs for TUI rendering\n - `packages/coding-agent/src/tui.ts` — status-line rendering\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `questions` | `Question[]` | Yes | One or more questions. Empty arrays are rejected by schema and also guarded at runtime. |\n\n### `Question`\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `id` | `string` | Yes | Stable identifier used in multi-question results. |\n| `question` | `string` | Yes | Prompt text shown to the user. |\n| `options` | `{ label: string; description?: string }[]` | Yes | Option labels for the picker, each with optional explanatory `description` text shown below the label. The schema does not require a minimum length; the UI always appends `Other (type your own)`, and callers must not include it. |\n| `multi` | `boolean` | No | Enables multi-select mode. Default: `false`. |\n| `recommended` | `number` | No | Zero-based recommended option index. In single-select mode the label gets ` (Recommended)` appended in the UI. |\n\n## Outputs\n- Single-shot result.\n- `content[0].text` is plain text:\n - single question: `User selected: ...` and/or `User provided custom input: ...`\n - multiple questions: `User answers:` followed by one line per `id`\n- `details`:\n - single question: `{ question, options, multi, selectedOptions, customInput?, timedOut? }`\n - multiple questions: `{ results: QuestionResult[] }`, where each item includes `id`, `question`, `options`, `multi`, `selectedOptions`, and optional `customInput` and `timedOut`\n- Cancellation and headless cases throw instead of returning a structured success result.\n\n## Flow\n1. `AskTool.createIf()` only registers the tool when `session.hasUI` is true; headless sessions never get it.\n2. `execute()` requires `context.ui`; if missing it aborts the context and throws `ToolAbortError(\"Ask tool requires interactive mode\")`.\n3. It reads `ask.timeout` from settings, converts seconds to milliseconds (`0` disables timeout), and disables timeout entirely while plan mode is enabled (`packages/coding-agent/src/tools/ask.ts`).\n4. If `ask.notify` is not `off`, it sends a terminal notification: `Waiting for input`.\n5. For each question, `askSingleQuestion()` drives either:\n - single-select list + optional editor for `Other`\n - multi-select checkbox loop + `Done selecting` sentinel + optional editor for `Other`\n6. In multi-question mode, left/right arrow handlers enable back/forward navigation between questions and preserve prior selections.\n7. If a timeout fires before any selection/custom input, the tool auto-selects the recommended option, or the first option when no valid `recommended` index exists; the result text gets an ` (auto-selected after timeout)` suffix and `details.timedOut` is set.\n8. If the user cancels without timeout, `execute()` aborts the tool context and throws `ToolAbortError(\"Ask tool was cancelled by the user\")`.\n9. On success it formats human-readable text plus structured `details`; the TUI renderer uses `details` for rich display.\n\n## Modes / Variants\n- Single question: returns flattened `details` fields for one question.\n- Multiple questions: returns `details.results[]` and allows back/forward navigation across questions.\n- Single-select: one option or custom input.\n- Multi-select: toggled checkbox list, `Done selecting` sentinel only when forward navigation is not active.\n\n## Side Effects\n- User-visible prompts / interactive UI\n - Opens a selection dialog via `context.ui.select(...)`.\n - Opens a text editor dialog via `context.ui.editor(...)` for `Other`.\n - Sends a terminal notification unless `ask.notify=off`.\n- Session state\n - Reads plan-mode state to disable timeouts.\n - Calls `context.abort()` on headless use or user cancellation.\n- Background work / cancellation\n - Wraps UI waits in `untilAborted(...)` so abort signals interrupt pending dialogs.\n\n## Limits & Caps\n- `questions` must contain at least 1 item (`askSchema` in `packages/coding-agent/src/tools/ask.ts`).\n- `ask.timeout` default is `0` seconds, which disables timeout (`packages/coding-agent/src/config/settings-schema.ts`). Configured non-zero values are seconds.\n- Prompt guidance says provide 2-5 options, but code only requires the `options` array field and does not enforce a minimum or maximum length (`packages/coding-agent/src/prompts/tools/ask.md`).\n- Timeout only applies to the option picker; once the user chooses `Other`, the editor has no timeout (`promptForCustomInput()` in `packages/coding-agent/src/tools/ask.ts`).\n- `AskTool.concurrency = \"exclusive\"`: the tool runs alone in its tool batch because the selector/editor UI surface is shared and concurrent `ask` calls would clobber each other.\n\n## Errors\n- Missing interactive UI: throws `ToolAbortError(\"Ask tool requires interactive mode\")`.\n- User cancels picker/editor without timeout: throws `ToolAbortError(\"Ask tool was cancelled by the user\")`.\n- Abort signal during input: converted to `ToolAbortError(\"Ask input was cancelled\")`.\n- Empty `questions` at runtime returns a text error payload instead of throwing: `Error: questions must not be empty`.\n\n## Notes\n- `recommended` is only a UI hint; invalid indexes are ignored.\n- In single-select mode the returned `selectedOptions` value strips the appended ` (Recommended)` suffix.\n- Multi-select results preserve selection order by `Set` insertion order, not original option order after arbitrary toggles.\n- Option labels and prompt text are returned verbatim in `details`; the tool does not interpret them beyond UI affordances like `Other` and ` (Recommended)`.\n",
77
84
  "tools/ast-edit.md": "# ast_edit\n\n> Preview and apply structural rewrites over source files via native ast-grep.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/ast-edit.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/ast-edit.md`\n- Key collaborators:\n - `crates/pi-natives/src/ast.rs` — native rewrite planning and file mutation\n - `crates/pi-ast/src/language/mod.rs` — language aliases and extension inference used by the native wrapper.\n - `packages/coding-agent/src/tools/path-utils.ts` — path/glob parsing and multi-path resolution\n - `packages/coding-agent/src/tools/resolve.ts` — preview/apply queueing\n - `packages/coding-agent/src/tools/render-utils.ts` — parse-error dedupe and display caps\n - `packages/coding-agent/src/utils/file-display-mode.ts` — hashline vs line-number diff references\n - `packages/hashline/src/format.ts` — stable hashline header formatting for preview anchors\n - `packages/natives/native/index.d.ts` — JS-visible native binding contract\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `ops` | `{ pat: string; out: string }[]` | Yes | One or more rewrite rules. `pat` must be non-empty. Duplicate `pat` values fail before native execution. Empty `out` deletes the matched node. |\n| `paths` | `string[]` | Yes | One or more files, directories, globs, or internal URLs with backing files. Empty entries are rejected. Globs are forbidden for internal URLs. |\n\nShared AST pattern grammar and language catalog: see [`ast_grep`](./ast-grep.md#inputs).\n\n- `ast_edit` uses the same `$NAME`, `$_`, `$$$NAME`, and `$$$` metavariable semantics.\n- The tool prompt adds rewrite-specific constraints:\n - metavariable names must be uppercase and must stand for whole AST nodes,\n - captures from `pat` are substituted into `out`,\n - each rewrite is a 1:1 structural substitution; one capture cannot expand into multiple sibling nodes unless the grammar itself permits that expansion at that position.\n\n## Outputs\n- Single-shot preview result from `ast_edit` itself.\n- Model-facing `content` is one text block showing proposed edits, grouped by file for directory/multi-file runs.\n - Each change renders as two lines. Hashline mode uses `-LINE:before` / `+LINE:after` under a `[PATH#TAG]` header; plain mode uses `-LINE:COLUMN before` / `+LINE:COLUMN after`.\n - Only the first line of each `before`/`after` snippet is shown, truncated to 120 characters in the wrapper.\n - `Limit reached; narrow paths.` and formatted parse issues are appended when applicable.\n- If no rewrites match, text is `No replacements made` plus formatted parse issues when present.\n- `details` includes aggregate preview metadata:\n - `totalReplacements`, `filesTouched`, `filesSearched`, `applied`, `limitReached`\n - optional `parseErrors`, `scopePath`, `files`, `fileReplacements`, `displayContent`, `meta`\n- The tool always previews first (`applied: false` in the direct result). Actual file writes happen only later through `resolve(action: \"apply\", ...)`.\n- When preview produced replacements, `ast_edit` also queues a pending `resolve` action. Successful apply returns a separate `resolve` result, not another `ast_edit` result.\n\n## Flow\n1. `AstEditTool.execute()` validates each op in `packages/coding-agent/src/tools/ast-edit.ts`:\n - empty `pat` fails,\n - at least one op is required,\n - duplicate `pat` values fail,\n - ops are converted to a `Record<pattern, replacement>`.\n2. The wrapper reads `PI_MAX_AST_FILES` via `$envpos(..., 1000)` and uses that as the native `maxFiles` cap for both preview and apply.\n3. Path normalization, internal URL handling, missing-path partitioning, and multi-path resolution follow the same `path-utils.ts` flow as `ast_grep`.\n4. The wrapper stats the resolved base path to decide whether to render grouped directory output.\n5. `runAstEditOnce(...)` always runs native `astEdit(...)` with `dryRun: true` and `failOnParseError: false` on the first pass.\n6. Native `ast_edit` in `crates/pi-natives/src/ast.rs`:\n - normalizes the rewrite map and sorts rules by pattern string,\n - resolves strictness (`smart` by default),\n - collects candidate files from a file or gitignore-aware directory scan,\n - infers a single language for the whole call unless `lang` was supplied,\n - compiles every rewrite pattern for that language,\n - parses each file, skips files with syntax-error trees, collects `replace_by(...)` edits for every match, enforces replacement and file caps, and returns textual before/after slices plus source ranges.\n7. The TS wrapper deduplicates parse errors, groups changes by file, and renders preview diff lines.\n8. If preview found replacements and `applied` is false, `queueResolveHandler(...)` registers a forced `resolve` action and injects a `resolve-reminder` steering message.\n9. On `resolve(action: \"apply\")`, the queued callback reruns the same rewrite set with `dryRun: false`, recomputes counts, and returns an error result if the live result no longer matches the preview (`stalePreview`). The current implementation compares replacement totals and per-file counts after the rerun; if the new run has already written different counts, the result is marked error.\n10. On a non-stale apply, the callback returns `Applied N replacements in M files.` (in hashline mode followed by fresh `[path#tag]` snapshot headers re-recorded from the post-apply content); on discard, `resolve` returns a discard message without mutating files.\n\n## Modes / Variants\n- Single file: preview or apply against one file.\n- Directory + optional glob: native scan walks the directory, then filters by compiled glob.\n- Multiple explicit paths/globs: wrapper unions them into one synthetic scope or runs per-target native calls when paths only meet at root.\n- Internal URL inputs: only supported when the router resolves them to a backing file path.\n- Preview mode: always the direct `ast_edit` tool result.\n- Apply mode: only reachable through the queued `resolve` callback after a preview.\n- Hashline output mode vs plain line/column mode: controlled by `resolveFileDisplayMode()`.\n\n## Side Effects\n- Filesystem\n - Preview reads files and scans directories.\n - Apply rewrites files in place with `std::fs::write(...)`, but only when the computed output differs from the original source.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Queues a one-shot forced `resolve` tool choice through `queueResolveHandler(...)`.\n - Adds a `resolve-reminder` steering message.\n- User-visible prompts / interactive UI\n - Direct `ast_edit` results are previews.\n - Follow-up apply/discard is exposed through the hidden `resolve` tool.\n- Background work / cancellation\n - Native preview/apply work runs on a blocking worker via `task::blocking(...)`.\n - Cancellation and optional native timeout are cooperative through `CancelToken::heartbeat()`.\n\n## Limits & Caps\n- File cap exposed by the wrapper: `PI_MAX_AST_FILES`, default `1000`, in `packages/coding-agent/src/tools/ast-edit.ts`.\n- Native `maxFiles` and `maxReplacements` are both clamped to at least `1` when provided in `crates/pi-natives/src/ast.rs`.\n- The wrapper never sets `maxReplacements`; native behavior therefore defaults to effectively unbounded replacements for a run.\n- Parse issues are rendered with at most `PARSE_ERRORS_LIMIT = 20` lines in `packages/coding-agent/src/tools/render-utils.ts`; `details.parseErrors` is deduplicated but not capped.\n- Directory scans use `include_hidden: true`, `use_gitignore: true`, and skip `node_modules` unless the glob text explicitly mentions `node_modules` in `crates/pi-natives/src/ast.rs`.\n- No separate glob-expansion count cap exists. Candidate count is whatever the resolved path/glob expands to after gitignore filtering, then native `maxFiles` stops mutations after the configured number of touched files.\n- Preview text truncates each rendered `before` and `after` first line to 120 characters in `packages/coding-agent/src/tools/ast-edit.ts`.\n\n## Errors\n- TS wrapper throws `ToolError` for empty patterns, duplicate rewrite patterns, empty path entries, unsupported internal-URL globs, internal URLs without `sourcePath`, and missing paths.\n- Native code returns hard errors for:\n - inability to infer one language across all candidates when `lang` is absent,\n - unsupported explicit `lang`,\n - bad glob compilation or unreadable search roots,\n - overlapping computed edits (`Overlapping replacements detected; refine pattern to avoid ambiguous edits`),\n - out-of-bounds edit ranges or non-UTF-8 replacement text,\n - write failures during apply,\n - cancellation or timeout.\n- With `failOnParseError: false` (the wrapper always uses this), pattern compile failures and file parse failures become `parseErrors` instead of aborting the whole run.\n- If every rewrite pattern fails to compile, native `ast_edit` returns a successful zero-replacement result with `parseErrors` populated.\n- Files containing tree-sitter error nodes are skipped for rewriting; they do not get partial edits.\n- Apply can fail after a successful preview if the preview becomes stale. The resolve callback compares replacement totals and per-file counts and returns an error result rather than silently reporting success for a mismatched preview.\n\n## Notes\n- `ast_edit` does not expose the native `lang`, `strictness`, `selector`, `maxReplacements`, `failOnParseError`, or `timeoutMs` fields to the model. The runtime fixes the call shape to a preview-first, smart-strictness, best-effort parse mode.\n- Because the wrapper does not expose `lang`, mixed-language rewrites only succeed when every candidate infers to the same canonical language. This is stricter than `ast_grep`.\n- Idempotency is not enforced syntactically. A rewrite like `foo($A) -> foo($A)` previews zero changes because output equals input; a rewrite that keeps matching its own output may still produce replacements on repeated calls.\n- Rewrites are accumulated per file, then applied from the end of the file backward after an overlap check. Independent matches can coexist; overlapping matches abort the run.\n- Native rewrite rule order is by pattern-string sort, not by the original `ops` array order, because `normalize_rewrite_map(...)` sorts the `(pattern, rewrite)` pairs.\n- Preview/apply parity is validated by totals and per-file counts after the apply rerun, not by a byte-for-byte diff of every replacement payload.",
78
85
  "tools/ast-grep.md": "# ast_grep\n\n> Structural code search over supported source files via native ast-grep.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/ast-grep.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/ast-grep.md`\n- Key collaborators:\n - `crates/pi-natives/src/ast.rs` — native scan, parse, match engine\n - `crates/pi-ast/src/language/mod.rs` — language aliases and extension inference used by the native wrapper.\n - `packages/coding-agent/src/tools/path-utils.ts` — path/glob parsing and multi-path resolution\n - `packages/coding-agent/src/tools/render-utils.ts` — parse-error dedupe and display caps\n - `packages/coding-agent/src/tools/match-line-format.ts` — hashline match rendering\n - `packages/coding-agent/src/utils/file-display-mode.ts` — hashline vs line-number output mode\n - `packages/natives/native/index.d.ts` — JS-visible native binding contract\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `pat` | `string` | Yes | Single AST pattern. The wrapper trims it and rejects empty strings. |\n| `paths` | `string[]` | Yes | One or more files, directories, globs, or internal URLs with backing files. Empty entries are rejected. Globs are forbidden for internal URLs. |\n| `skip` | `number` | No | Match offset. Defaults to `0`, then `Math.floor(...)`; negatives and non-finite values fail. |\n\nPattern grammar and language support exposed to the model:\n- `$NAME` — capture one AST node.\n- `$_` — match one AST node without binding.\n- `$$$NAME` — capture zero or more AST nodes; ast-grep stops lazily at the next satisfiable node.\n- `$$$` — match zero or more AST nodes without binding.\n- Metavariable names must be uppercase and must stand for whole AST nodes, not partial tokens or string fragments.\n- Reusing the same metavariable requires identical code at each occurrence.\n- Patterns must parse as one valid AST node for the inferred target language.\n- Supported canonical languages come from `SupportLang::all_langs()` in `crates/pi-ast/src/language/mod.rs`: `astro`, `bash`, `c`, `cmake`, `cpp`, `csharp`, `dart`, `clojure`, `css`, `diff`, `dockerfile`, `elixir`, `erlang`, `go`, `graphql`, `haskell`, `hcl`, `html`, `ini`, `java`, `javascript`, `json`, `just`, `julia`, `kotlin`, `lua`, `make`, `markdown`, `nix`, `objc`, `ocaml`, `odin`, `perl`, `php`, `powershell`, `protobuf`, `python`, `r`, `regex`, `ruby`, `rust`, `scala`, `solidity`, `sql`, `starlark`, `svelte`, `swift`, `toml`, `tlaplus`, `tsx`, `typescript`, `verilog`, `vue`, `xml`, `yaml`, `zig`.\n\n## Outputs\n- Single-shot tool result.\n- Model-facing `content` is one text block:\n - grouped by file for directory/multi-file searches,\n - match lines rendered under `[PATH#HASH]` as `*LINE:text` in hashline mode or `*LINE|text` otherwise,\n - continuation lines for multi-line matches rendered with a leading space,\n - optional `meta: NAME=value` lines when ast-grep captured metavariables.\n- If no matches are found, text is `No matches found` or `No matches found. Parse issues mean the query may be mis-scoped; narrow paths before concluding absence.` plus formatted parse issues.\n- If the wrapper truncates visible results, the text ends with `Result limit reached; narrow paths or increase limit.`\n- `details` includes counts and metadata, not full match payloads:\n - `matchCount`, `fileCount`, `filesSearched`, `limitReached`\n - optional `parseErrors`, `scopePath`, `files`, `fileMatches`, `displayContent`, `meta`\n- Native ranges (`byteStart`, `byteEnd`, `startLine`, `startColumn`, `endLine`, `endColumn`) exist only inside the native result; the wrapper does not emit them directly to the model.\n\n## Flow\n1. `AstGrepTool.execute()` validates `pat`, normalizes `skip`, and normalizes each `paths` entry in `packages/coding-agent/src/tools/ast-grep.ts`.\n2. Internal URLs are resolved through `session.internalRouter`; entries without `sourcePath` fail, and internal-URL globs fail early.\n3. For multiple path inputs, `partitionExistingPaths()` drops missing bases only when at least one surviving base remains; if all bases are missing the call fails.\n4. `parseSearchPath()` splits a single path into `basePath` plus optional `glob`. `resolveExplicitSearchPaths()` collapses multiple inputs into a common base plus a brace-union glob, or separate `targets` when the only common base is a filesystem root.\n5. The wrapper stats the resolved base path to decide whether output should be grouped as a directory result.\n6. Execution dispatches to either:\n - one native `astGrep(...)` call for a single resolved base, or\n - `runMultiTargetAstGrep(...)`, which calls the native binding once per target, rebases paths back to the common root, sorts globally, then applies `skip` and the wrapper limit.\n7. Native `ast_grep` in `crates/pi-natives/src/ast.rs`:\n - normalizes and deduplicates patterns,\n - resolves a `MatchStrictness` (`smart` by default),\n - collects candidate files from a file or gitignore-aware directory scan,\n - infers language per candidate from extension unless `lang` was provided,\n - compiles the pattern separately for each language present,\n - reads each file, reports syntax-error trees as parse issues, runs `find_all`, and optionally captures metavariable bindings.\n8. Native results are sorted by path and source position, then paged by `offset`/`limit`.\n9. The TS wrapper normalizes parse-error strings, deduplicates them, groups matches by formatted path, renders anchor lines, appends limit/parse notices, and returns `toolResult(...).text(...).done()`.\n\n## Modes / Variants\n- Single file: native path is the file; output is a flat list of rendered match lines.\n- Directory + optional glob: native scan walks the directory, then filters by compiled glob.\n- Multiple explicit paths/globs: wrapper unions them into one synthetic scope or runs per-target native calls when paths only meet at root.\n- Internal URL inputs: only supported when the router can resolve them to a backing file path.\n- Hashline output mode vs plain line-number mode: controlled by `resolveFileDisplayMode()`; hashline mode requires the edit tool and hashline edit mode, and per-file anchors additionally require a successful whole-file snapshot (`recordFileSnapshot()`) — over-cap or unreadable files fall back to plain output.\n\n## Side Effects\n- Filesystem\n - Stats input paths in the TS wrapper.\n - Native code reads matched files and scans directories through `fs_cache`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - None beyond normal tool transcript/result metadata.\n- Background work / cancellation\n - Native work runs on a blocking worker via `task::blocking(...)`.\n - Cancellation and optional native timeout are cooperative through `CancelToken::heartbeat()`.\n\n## Limits & Caps\n- Wrapper-visible result cap: `DEFAULT_AST_LIMIT = 50` in `packages/coding-agent/src/tools/ast-grep.ts`.\n - Single-target calls rely on the native default limit of 50 in `crates/pi-natives/src/ast.rs`.\n - Multi-target calls fetch `skip + 50 + 1` matches per target, then re-page after global sort.\n- Native `limit` is clamped to at least `1`; omitted `offset` defaults to `0` in `crates/pi-natives/src/ast.rs`.\n- Parse issues are rendered with at most `PARSE_ERRORS_LIMIT = 20` lines in `packages/coding-agent/src/tools/render-utils.ts`; `details.parseErrors` itself is only deduplicated, not capped.\n- Directory scans use `include_hidden: true`, `use_gitignore: true`, and skip `node_modules` unless the glob text explicitly mentions `node_modules` in `crates/pi-natives/src/ast.rs`.\n- No hard file-count cap is applied by the wrapper or native `ast_grep`; candidate count is whatever the resolved path/glob expands to after gitignore filtering.\n- Multi-path union deduplicates identical path inputs before resolution in `resolveExplicitSearchPaths()`.\n\n## Errors\n- TS wrapper throws `ToolError` for empty patterns, invalid `skip`, empty path entries, unsupported internal-URL globs, internal URLs without `sourcePath`, and missing paths.\n- Native code returns hard errors for:\n - unsupported explicit `lang`,\n - inability to infer language for a candidate when `lang` is not supplied,\n - invalid AST pattern compilation for every relevant language,\n - unreadable search roots or bad glob compilation,\n - cancellation (`Aborted: Signal`) or timeout (`Aborted: Timeout`).\n- File-level parse failures and many per-language pattern compile failures are non-fatal: they are accumulated in `parseErrors` and surfaced alongside successful matches.\n- `no matches` is not an error, even when parse issues were recorded.\n\n## Notes\n- `pat` is always wrapped into a one-element `patterns` array by the TS tool; the model cannot send multiple patterns through `ast_grep` even though the native binding supports it.\n- `ast_grep` can search mixed-language trees because native compilation happens per discovered language, but the prompt still tells the model to keep calls single-language when possible to reduce parse noise.\n- Pattern compilation is per language present in the candidate set. One pattern can succeed for some languages and generate per-file parse errors for others in the same run.\n- A file with tree-sitter error nodes still gets searched; the syntax warning is additive, not a skip condition.\n- For glob semantics, `*.ts` matches only direct children while `**/*.ts` recurses; this is covered by native tests in `crates/pi-natives/src/ast.rs`.\n- Output anchors are intended for follow-up tools, but the exact anchor format depends on session edit mode (`hashline` vs line-number mode).",
@@ -80,7 +87,7 @@ export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
80
87
  "tools/browser.md": "# browser\n\n> Open, reuse, close, and script Puppeteer tabs against headless Chromium or CDP-attached apps.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/browser.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/browser.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/browser/tab-supervisor.ts` — global tab registry; worker lifecycle; run/close coordination.\n - `packages/coding-agent/src/tools/browser/tab-worker.ts` — executes `run` code; implements the `tab` helper API.\n - `packages/coding-agent/src/tools/browser/tab-worker-entry.ts` — worker-thread transport bootstrap.\n - `packages/coding-agent/src/tools/browser/registry.ts` — browser-handle registry keyed by browser kind.\n - `packages/coding-agent/src/tools/browser/launch.ts` — Puppeteer loading, Chromium resolution/download, headless launch, stealth injection.\n - `packages/coding-agent/src/tools/browser/attach.ts` — CDP attach/reuse, target picking, spawned-app process handling.\n - `packages/coding-agent/src/tools/browser/tab-protocol.ts` — worker init/run/result message schema.\n - `packages/coding-agent/src/tools/browser/readable.ts` — `tab.extract()` readability extraction.\n - `packages/coding-agent/src/tools/browser/render.ts` — TUI rendering for `open`/`close` status lines and `run` JS cells.\n - `packages/coding-agent/src/tools/puppeteer/00_stealth_tampering.txt` — mask patched functions/descriptors as native.\n - `packages/coding-agent/src/tools/puppeteer/01_stealth_activity.txt` — synthesize visibility/focus/scroll activity.\n - `packages/coding-agent/src/tools/puppeteer/02_stealth_hairline.txt` — fix Modernizr hairline detection.\n - `packages/coding-agent/src/tools/puppeteer/03_stealth_botd.txt` — spoof `navigator.webdriver`, `window.chrome`, and Chrome fingerprint surfaces.\n - `packages/coding-agent/src/tools/puppeteer/04_stealth_iframe.txt` — patch iframe `contentWindow`/`srcdoc` behavior.\n - `packages/coding-agent/src/tools/puppeteer/05_stealth_webgl.txt` — spoof WebGL vendor/renderer/precision.\n - `packages/coding-agent/src/tools/puppeteer/06_stealth_screen.txt` — normalize screen/viewport/device-pixel-ratio values.\n - `packages/coding-agent/src/tools/puppeteer/07_stealth_fonts.txt` — spoof local fonts and perturb canvas text rendering.\n - `packages/coding-agent/src/tools/puppeteer/08_stealth_audio.txt` — spoof audio latency/sample-rate and perturb offline rendering.\n - `packages/coding-agent/src/tools/puppeteer/09_stealth_locale.txt` — force locale/languages/timezone/date strings.\n - `packages/coding-agent/src/tools/puppeteer/10_stealth_plugins.txt` — synthesize `navigator.plugins`/`navigator.mimeTypes`.\n - `packages/coding-agent/src/tools/puppeteer/11_stealth_hardware.txt` — spoof `navigator.hardwareConcurrency`.\n - `packages/coding-agent/src/tools/puppeteer/12_stealth_codecs.txt` — spoof media codec support.\n - `packages/coding-agent/src/tools/puppeteer/13_stealth_worker.txt` — carry UA/platform spoofing into `Worker`/`SharedWorker`.\n\n## Inputs\n\n### Shared fields\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `action` | `\"open\" \\| \"close\" \\| \"run\"` | Yes | Dispatches to the open/close/run path. |\n| `name` | `string` | No | Tab id. Defaults to `\"main\"`. Tabs live in a process-global map, so the same name is reused across later calls and in-process subagents until closed. |\n| `timeout` | `number` | No | Tool wall-clock timeout in seconds. Defaults to `30`; clamped to the browser tool range before execution. |\n\n### `action: \"open\"`\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `url` | `string` | No | Navigate after the tab is ready. Existing reusable tabs also navigate when `url` is supplied. |\n| `viewport` | `{ width: number; height: number; scale?: number }` | No | Requested viewport. For headless launch this becomes the initial viewport; for a page it is applied with `page.setViewport()`. `scale` maps to Puppeteer `deviceScaleFactor`. |\n| `wait_until` | `\"load\" \\| \"domcontentloaded\" \\| \"networkidle0\" \\| \"networkidle2\"` | No | Navigation wait condition. Defaults to `\"load\"` where omitted, including `open` navigation and later `tab.goto(...)`. |\n| `dialogs` | `\"accept\" \\| \"dismiss\"` | No | Installs a page `dialog` handler that auto-accepts or auto-dismisses dialogs. Omitted means no handler. |\n| `app` | `{ path?: string; cdp_url?: string; args?: string[]; target?: string }` | No | Selects browser kind. No `app` uses the session `browser.headless` setting. `app.path` is resolved against the session cwd and used as the executable path for spawn/attach reuse. `app.cdp_url` connects to an existing CDP endpoint. `args` are appended only when spawning `app.path`. `target` is only used for attached/spawned-app page selection. |\n\n### `action: \"close\"`\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `all` | `boolean` | No | Close every known tab. Omitted closes only `name`. |\n| `kill` | `boolean` | No | When a tab release drops a spawned-app browser handle to refcount 0, also terminate its process tree. Has no effect on headless shutdown and only disconnects connected CDP browsers. |\n\n### `action: \"run\"`\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `code` | `string` | Yes | Async-function body executed in a VM context with `page`, `browser`, `tab`, `display`, `assert`, `wait`, `console`, timers, `URL`, `TextEncoder`, `TextDecoder`, and `Buffer` in scope. |\n\n## Outputs\nThe tool returns one result per call; no streaming partial output is emitted from the browser implementation itself.\n\n- `open`: text content with `Opened` or `Reused`, browser description, URL, and optional title. `details` includes `action`, `name`, `browser`, `url`, `viewport`, and the same text in `details.result`.\n- `close`: text content with either `Closed ...` or `No tab named ...`. `details` includes `action`, `name`, and `details.result`.\n- `run`: ordered `content` array built as:\n 1. every `display(value)` call in execution order,\n 2. final return value, JSON-stringified unless already a string,\n 3. or `Ran code on tab \"...\"` if nothing else was produced.\n- `display(value)` coercion in `packages/coding-agent/src/tools/browser/tab-worker.ts`:\n - `{ type: \"image\", data: string, mimeType: string }` becomes image content,\n - `string` becomes text content,\n - other values become pretty JSON text when serializable, else `String(value)`.\n- `tab.screenshot()` also appends text plus an image content item unless `silent: true`; `details.screenshots` records persisted screenshot metadata `{ dest, mimeType, bytes, width, height }`.\n- `run` `details` includes `action`, `name`, current `browser`/`url` when the tab exists, optional `screenshots`, and `details.result` containing only the concatenated text outputs. Combined run text is capped at the inline byte limit via `enforceInlineByteCap()`; over-cap text is saved as a session artifact (`saveBrowserOutputArtifact()`) and the capped text replaces it in content and `details.result`.\n\n## Flow\n1. `BrowserTool.execute()` (`packages/coding-agent/src/tools/browser.ts`) abort-checks, clamps `timeout` via `clampTimeout(\"browser\", ...)`, defaults `name` to `\"main\"`, and dispatches on `action`.\n2. `open` resolves browser kind with `resolveBrowserKind()`:\n - `app.cdp_url` → `{ kind: \"connected\" }` after trimming trailing slashes.\n - `app.path` → `{ kind: \"spawned\" }` after resolving against session cwd.\n - otherwise → `{ kind: \"headless\", headless: session.settings.get(\"browser.headless\") }`.\n3. `open` rejects reusing the same tab name across different browser kinds (`sameBrowserKind()`); callers must close first.\n4. `open` acquires a browser handle through `acquireBrowser()` (`packages/coding-agent/src/tools/browser/registry.ts`):\n - existing connected handle is reused by browser-kind key;\n - stale disconnected handles are disposed and recreated;\n - headless launches via `launchHeadlessBrowser()`;\n - `connected` waits for `${cdpUrl}/json/version`, then `puppeteer.connect()`;\n - `spawned` first tries `findReusableCdp()`, else kills same-path processes, allocates a free loopback port, spawns the executable with `--remote-debugging-port=<port>`, waits for CDP, then connects.\n5. `open` acquires a tab through `acquireTab()` (`packages/coding-agent/src/tools/browser/tab-supervisor.ts`):\n - same-name + same-browser + alive tab is reused unless `dialogs` changed;\n - same-name but different browser handle, dead state, or changed dialog policy forces release and recreation;\n - reusing with a new `url` navigates by issuing `await tab.goto(...)` through the worker, defaulting to `waitUntil: \"load\"` when `wait_until` is omitted.\n6. New tabs build a `WorkerInitPayload` in `buildInitPayload()`:\n - headless mode sends `url`, `waitUntil`, `viewport`, `dialogs`, and timeout; the worker defaults missing `waitUntil` to `\"load\"`.\n - attach mode resolves a page with `pickElectronTarget()`, gets its target id, and sends `targetId` plus `dialogs`.\n7. `acquireTab()` spawns a dedicated Bun `Worker` from `tab-worker-entry.ts`; if that fails it falls back to inline execution in the main thread (`spawnInlineWorker()`), preserving behavior but losing protection against synchronous infinite loops.\n8. `WorkerCore.#init()` (`packages/coding-agent/src/tools/browser/tab-worker.ts`) connects back to the browser websocket endpoint. Headless mode opens a new page, applies stealth patches, applies viewport, installs dialog handling if requested, and optionally navigates. Attach mode resolves the requested target page and optionally installs dialog handling.\n9. On success the worker sends `ready` with `{ url, title, viewport, targetId }`; the supervisor stores a `TabSession`, increments browser-handle refcount with `holdBrowser()`, and keeps the tab in a process-global `Map<string, TabSession>`.\n10. `run` requires non-empty `code`, looks up the tab with `getTab()`, then delegates to `runInTab()`.\n11. `runInTabWithSnapshot()` rejects dead tabs and concurrent runs (`Tab ... is busy`), captures session cwd plus optional `browser.screenshotDir`, registers an abort hook, sends a `run` message to the worker, and races the result against `timeoutMs + 750` ms. Timeouts force-kill the tab worker and, for headless tabs, close the orphaned page target.\n12. `WorkerCore.#run()` creates a VM context, exposes the raw Puppeteer `page`/`browser` plus a synthetic `tab` API, and executes `(async () => { ...code... })()` via `vm.runInContext()`.\n13. The `tab` helper API implemented in `#createTabApi()` is:\n - `tab.name: string`\n - `tab.page: Page`\n - `tab.signal?: AbortSignal`\n - `tab.url(): string`\n - `tab.title(): Promise<string>`\n - `tab.goto(url, { waitUntil? })`\n - `tab.observe({ includeAll?, viewportOnly? })`\n - `tab.screenshot({ selector?, fullPage?, save?, silent? })`\n - `tab.extract(format = \"markdown\")`\n - `tab.click(selector)`\n - `tab.type(selector, text)`\n - `tab.fill(selector, value)`\n - `tab.press(key, { selector? })`\n - `tab.scroll(deltaX, deltaY)`\n - `tab.drag(from, to)`\n - `tab.waitFor(selector)`\n - `tab.evaluate(fn, ...args)`\n - `tab.scrollIntoView(selector)`\n - `tab.select(selector, ...values)`\n - `tab.uploadFile(selector, ...filePaths)`\n - `tab.waitForUrl(pattern, { timeout? })`\n - `tab.waitForResponse(pattern, { timeout? })`\n - `tab.id(n)`\n14. Selector handling in `normalizeSelector()` accepts plain CSS and Puppeteer query handlers, and rewrites legacy Playwright-style prefixes `p-text/`, `p-xpath/`, `p-pierce/`, `p-aria/`; other `p-*` prefixes throw a `ToolError`.\n15. `tab.observe()` clears the element cache, takes a Puppeteer accessibility snapshot, filters to interactive nodes unless `includeAll`, optionally filters to viewport-visible nodes, assigns numeric ids, caches `ElementHandle`s, and returns URL/title/viewport/scroll metadata plus `elements`.\n16. `tab.id(n)` resolves the cached `ElementHandle`, verifies `el.isConnected`, and throws a stale-id error after cache invalidation if the DOM changed or the cache was cleared.\n17. `tab.goto()` clears the cached element ids before navigating. Any new `tab.observe()` also clears and rebuilds the cache.\n18. `tab.click()` uses a custom retry loop for `text/...` selectors to find an actionable visible match; other selectors use `page.locator(...).click()` with the run timeout.\n19. `tab.screenshot()` captures either the whole page or a selector PNG, downsizes a copy for model output, chooses a persistence path, writes the image to disk, records metadata, and optionally emits text + image display entries.\n20. `display()` calls accumulate in an array. After code finishes, the worker posts `{ displays, returnValue, screenshots }`; `BrowserTool.#run()` appends the return value as trailing text content when not `undefined`.\n21. `close` releases one tab or all tabs via `releaseTab()` / `releaseAllTabs()`. Each tab aborts pending runs, asks the worker to close, waits up to `750` ms for a `closed` ack, terminates the worker, decrements browser refcount, and disposes the browser handle when refcount reaches zero.\n\n## Modes / Variants\n- **Action dispatch**\n - `open` — acquire/reuse browser + tab.\n - `close` — release one tab or all tabs.\n - `run` — execute JS inside the tab worker.\n- **Browser kind**\n - **Headless**: launches local Chromium with Puppeteer, applies stealth patches, and creates a fresh page per tab.\n - **Spawned app (`app.path`)**: reuses an existing CDP-enabled process for that executable when possible; otherwise kills same-path processes, spawns the executable with remote debugging enabled, then attaches. No stealth patches are injected.\n - **Connected browser (`app.cdp_url`)**: attaches to an already-running CDP endpoint. No process ownership; close only disconnects.\n- **Target selection for attached/spawned browsers**\n - With `app.target`, `pickElectronTarget()` returns the first page whose URL or title contains the case-insensitive substring.\n - Without `app.target`, it skips titles/URLs matching `request handler|devtools|background page|background host|service worker` and otherwise falls back to the first page.\n- **Worker mode**\n - **Dedicated worker**: normal path; user code runs off the main thread and can be aborted even when it blocks synchronously.\n - **Inline fallback**: activated when Bun worker spawn fails; behavior matches, but synchronous infinite loops on user code cannot be interrupted.\n- **Dialog policy**\n - No `dialogs` field: no auto-handler.\n - `accept`/`dismiss`: page `dialog` events are handled automatically.\n - Changing dialog policy on an existing live tab forces tab recreation instead of mutating the worker in place.\n- **Screenshot persistence**\n - `save` provided: persist full-resolution PNG at the resolved cwd-relative or absolute path.\n - `browser.screenshotDir` session setting set: persist full-resolution PNG under that directory with a timestamped filename.\n - Neither set: persist the resized image to a temp-file path under the OS temp dir.\n\n## Side Effects\n- Filesystem\n - `loadPuppeteer()` writes `{}` to `<puppeteer-safe-dir>/package.json` before importing `puppeteer-core`.\n - First headless launch may download Chromium into the Puppeteer cache directory returned by `getPuppeteerDir()`.\n - `tab.screenshot()` creates parent directories and writes image files.\n - `tab.uploadFile()` resolves supplied paths against the session cwd.\n- Network\n - CDP attach paths poll `http://127.0.0.1:<port>/json/version` or the supplied `cdp_url` `/json/version`.\n - Headless/browser-attach sessions create CDP websocket connections.\n - Headless first-use Chromium download uses `@puppeteer/browsers`.\n - User `page` / `tab` operations perform normal browser network traffic.\n- Subprocesses / native bindings\n - Headless mode launches Chromium through Puppeteer.\n - `app.path` mode may spawn the target executable via `Bun.spawn()`.\n - `killExistingByPath()` / `gracefulKillTreeOnce()` use `@oh-my-pi/pi-natives` process inspection/termination.\n - Worker mode uses Bun `Worker`; fallback mode does not.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Browser handles are cached in a process-global `Map` keyed by browser kind in `packages/coding-agent/src/tools/browser/registry.ts`.\n - Tabs are cached in a process-global `Map` keyed by `name` in `packages/coding-agent/src/tools/browser/tab-supervisor.ts`.\n - `run` captures session cwd and optional `browser.screenshotDir` for screenshot/save path resolution.\n - `restartForModeChange()` drops only headless tabs.\n- User-visible prompts / interactive UI\n - None beyond normal tool output. Dialog auto-handling is invisible unless it fails and emits debug logs.\n- Background work / cancellation\n - `open`, `run`, CDP waits, and browser actions thread through abort signals.\n - A timed-out `run` aborts the worker execution path and can tear down the tab.\n\n## Limits & Caps\n- Tool timeout clamp: default `30` s, min `1` s, max `300` s (`TOOL_TIMEOUTS.browser` in `packages/coding-agent/src/tools/tool-timeouts.ts`).\n- Supervisor grace period around init/run/close: `750` ms (`GRACE_MS` in `packages/coding-agent/src/tools/browser/tab-supervisor.ts`).\n- Puppeteer protocol timeout for launch/connect operations: `60_000` ms (`BROWSER_PROTOCOL_TIMEOUT_MS` in `packages/coding-agent/src/tools/browser/launch.ts`).\n- Connected-browser CDP readiness wait: `5_000` ms before `puppeteer.connect()` (`packages/coding-agent/src/tools/browser/registry.ts`).\n- Spawned-app CDP readiness wait after spawn: `30_000` ms (`packages/coding-agent/src/tools/browser/registry.ts`).\n- CDP polling cadence: 150 ms in `waitForCdp()` (`packages/coding-agent/src/tools/browser/attach.ts`).\n- Headless default viewport: `1365x768` at `deviceScaleFactor: 1.25` (`DEFAULT_VIEWPORT` in `packages/coding-agent/src/tools/browser/launch.ts`).\n- Screenshot model-attachment resize cap: `maxWidth 1024`, `maxHeight 1024`, `maxBytes 150 * 1024`, `jpegQuality 70` (`packages/coding-agent/src/tools/browser/tab-worker.ts`).\n- `tab.waitForUrl()` polling interval: `200` ms (`packages/coding-agent/src/tools/browser/tab-worker.ts`).\n- Drag simulation uses `12` mouse-move steps (`packages/coding-agent/src/tools/browser/tab-worker.ts`).\n\n## Errors\n- `BrowserTool.execute()` converts DOM-style `AbortError` into `ToolAbortError`; other errors propagate.\n- `run` hard-fails on missing code: `Missing required parameter 'code' for action 'run'.`\n- `open` fails when reusing a name across browser kinds: `Tab \"...\" is bound to a different browser (...). Close it first.`\n- `runInTabWithSnapshot()` fails when the tab is absent/dead (`Tab \"...\" is not alive. Reopen it.`) or already running (`Tab \"...\" is busy`).\n- Worker init failures and run failures are serialized through `RunErrorPayload`; `ToolError` and abort state are reconstructed on the host side by `errorFromPayload()`.\n- Attached-target mismatches surface as:\n - `No page targets available on the attached browser`\n - `No page target matched \"...\". Available pages:\\n...`\n - `Target ... is no longer available on the attached browser`\n- Spawned-app path validation requires an absolute executable path after cwd resolution, not an app bundle directory path.\n- Spawn/attach failures are wrapped into `ToolError`s such as `Timed out waiting for CDP endpoint ...`, `Failed to attach to ...`, or `Connected to ... but puppeteer.connect failed: ...`.\n- `tab` helper errors are user-visible `ToolError`s, including unsupported selector prefix, stale/unknown element id, invalid drag target, missing upload files, non-`<select>` for `tab.select()`, non-file-input for `tab.uploadFile()`, and screenshot selector misses.\n- On run timeout, the worker reports `Browser code execution timed out after <ms>ms`; the supervisor may escalate to `Browser code execution hung past grace; tab killed` if the worker does not respond after the grace window.\n\n## Notes\n- `loadPuppeteer()` and `loadPuppeteerInWorker()` temporarily redirect `cwd` to a safe Puppeteer directory before importing `puppeteer-core`, because Puppeteer probes the current working directory during module load.\n- Headless launch prefers a detected system Chrome/Chromium, then `PUPPETEER_EXECUTABLE_PATH`, and only then downloads Chromium.\n- Headless launch always passes `--no-sandbox`, `--disable-setuid-sandbox`, `--disable-blink-features=AutomationControlled`, and a `--window-size=...` matching the initial viewport. It also ignores Puppeteer default args `--disable-extensions`, `--disable-default-apps`, and `--disable-component-extensions-with-background-pages`.\n- Proxy-related env vars only affect headless launch: `PUPPETEER_PROXY`, `PUPPETEER_PROXY_BYPASS_LOOPBACK`, and `PUPPETEER_PROXY_IGNORE_CERT_ERRORS`.\n- Stealth patches are applied only in headless mode. Spawned or externally connected browsers are intentionally left untouched.\n- `applyStealthPatches()` also strips Puppeteer's `//# sourceURL=__puppeteer_evaluation_script__` suffix from CDP `Runtime.evaluate` / `Runtime.callFunctionOn` payloads.\n- `tab.extract()` reads `page.content()`, runs Readability first, then falls back to `main article`/`article`/`main`/`[role='main']`/`body`, and returns `null` if neither extraction path yields content.\n- `close(all: true, kill: false)` disconnects from spawned/connected browsers when the last tab closes but leaves spawned app processes running.\n- Headless orphan cleanup is best-effort: if a worker dies before closing its page, the supervisor searches browser targets by `targetId` and closes that page.\n- Console methods inside `run` do not appear in tool output; they are forwarded as debug/warn/error logs through the worker transport.",
81
88
  "tools/checkpoint.md": "# checkpoint\n\n> Mark the current top-level conversation state so later `rewind` can collapse exploratory context into a report.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/checkpoint.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/checkpoint.md`\n- Key collaborators:\n - `packages/coding-agent/src/session/agent-session.ts` — captures the active checkpoint after tool success.\n - `packages/coding-agent/src/session/session-manager.ts` — persists the normal session entry stream; not the active checkpoint marker.\n - `packages/coding-agent/src/tools/index.ts` — registers the tool and gates it behind `checkpoint.enabled`.\n - `packages/coding-agent/src/config/settings-schema.ts` — defines the disabled-by-default feature flag.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `goal` | `string` | Yes | Investigation goal. Required by the schema and echoed in the tool result. |\n\n## Outputs\nThe tool returns a single text result plus structured details:\n\n- text body:\n - `Checkpoint created.`\n - `Goal: <goal>`\n - `Run your investigation, then call rewind with a concise report.`\n- `details`:\n - `goal: string`\n - `startedAt: string` — ISO timestamp created inside `CheckpointTool.execute()`\n\nNo checkpoint ID, artifact URI, job handle, file path, or restore token is returned.\n\n## Flow\n1. `CheckpointTool.createIf()` in `packages/coding-agent/src/tools/checkpoint.ts` returns `null` for subagents by checking `session.taskDepth`; only top-level sessions can see the tool.\n2. `CheckpointTool.execute()` rejects subagent calls again with `ToolError(\"Checkpoint not available in subagents.\")`.\n3. It rejects nested checkpoints with `ToolError(\"Checkpoint already active.\")` when `session.getCheckpointState?.()` is already set.\n4. It creates `startedAt = new Date().toISOString()` and returns a normal `toolResult()` payload. The tool itself does not persist anything.\n5. On the later `tool_execution_end` event, `AgentSession` in `packages/coding-agent/src/session/agent-session.ts` detects successful `checkpoint` execution and captures three in-memory fields:\n - `checkpointMessageCount` — current `agent.state.messages.length`, after the checkpoint tool result has already been appended\n - `checkpointEntryId` — `sessionManager.getEntries().at(-1)?.id ?? null`, i.e. the last persisted session entry ID at checkpoint time\n - `startedAt` — copied from tool details or regenerated\n6. `AgentSession` stores that object in its private `#checkpointState` field and clears `#pendingRewindReport`.\n\n## Side Effects\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Sets `AgentSession.#checkpointState` in memory.\n - Records the checkpoint boundary as a message count plus a session entry ID.\n - Enables the later yield guard: if a checkpoint is active and no rewind report is pending, `#enforceRewindBeforeYield()` injects a developer-role warning and schedules another turn.\n- User-visible prompts / interactive UI\n - The tool result tells the model to call `rewind` after the investigation.\n - If the agent tries to `yield` first, `AgentSession` injects:\n\n```text\n<system-warning>\nYou are in an active checkpoint. You MUST call rewind with your investigation findings before yielding. Do NOT yield without completing the checkpoint.\n</system-warning>\n```\n\n## Limits & Caps\n- Availability is gated by `checkpoint.enabled`, default `false`, in `packages/coding-agent/src/config/settings-schema.ts`.\n- The tool is registered as discoverable in `packages/coding-agent/src/tools/index.ts`.\n- Only one active checkpoint is allowed per top-level session.\n- Checkpoint state is not persisted as a dedicated session entry. If the process exits, a resumed session can reload the conversation history, but not the live `#checkpointState` guard.\n- Session persistence still applies to the ordinary checkpoint tool call message. Global session persistence truncation is `MAX_PERSIST_CHARS = 500_000` in `packages/coding-agent/src/session/session-manager.ts`.\n\n## Errors\n- `ToolError(\"Checkpoint not available in subagents.\")` — thrown for subagent sessions.\n- `ToolError(\"Checkpoint already active.\")` — thrown when a prior checkpoint has not been rewound or cleared.\n- The tool body has no local `try/catch`; unexpected exceptions propagate.\n\n## Notes\n- Despite the summary string `Create a git-based checkpoint to save and restore session state`, the implementation does not call git and does not snapshot filesystem state.\n- Captured state is conversation/session metadata only:\n - in-memory message count\n - session entry ID in the session tree\n - timestamp\n- Not captured:\n - working tree contents\n - staged changes\n - artifacts\n - blob-store contents\n - SQLite history rows from `packages/coding-agent/src/session/history-storage.ts`\n - auth or agent records from `packages/coding-agent/src/session/agent-storage.ts`\n",
82
89
  "tools/debug.md": "# debug\n\n> Drive one DAP debug session; adjacent debug UI code reuses the same subsystem for logs, raw SSE capture, reports, profiling, and system diagnostics.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/debug.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/debug.md`\n- Key collaborators:\n - `packages/coding-agent/src/dap/session.ts` — session lifecycle, breakpoint/state cache\n - `packages/coding-agent/src/dap/client.ts` — adapter process/socket transport, DAP message loop\n - `packages/coding-agent/src/dap/config.ts` — adapter resolution and auto-selection\n - `packages/coding-agent/src/dap/defaults.json` — built-in adapter definitions\n - `packages/coding-agent/src/dap/types.ts` — request/response/capability shapes\n - `packages/coding-agent/src/tools/tool-timeouts.ts` — per-tool timeout clamp\n - `packages/coding-agent/src/debug/index.ts` — interactive debug selector menu\n - `packages/coding-agent/src/debug/log-viewer.ts` — recent-log TUI viewer\n - `packages/coding-agent/src/debug/raw-sse.ts` — raw SSE TUI viewer\n - `packages/coding-agent/src/debug/raw-sse-buffer.ts` — bounded SSE capture buffer\n - `packages/coding-agent/src/debug/profiler.ts` — CPU/heap profiling helpers\n - `packages/coding-agent/src/debug/report-bundle.ts` — `.tar.gz` report bundling, log source, cache cleanup\n - `packages/coding-agent/src/debug/system-info.ts` — system snapshot collection and env redaction\n - `packages/coding-agent/src/debug/terminal-info.ts` — terminal state collection/formatting\n - `packages/coding-agent/src/debug/protocol-probe.ts` — terminal protocol probe panel and sample image\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `action` | `\"launch\" \\| \"attach\" \\| \"set_breakpoint\" \\| \"remove_breakpoint\" \\| \"set_instruction_breakpoint\" \\| \"remove_instruction_breakpoint\" \\| \"data_breakpoint_info\" \\| \"set_data_breakpoint\" \\| \"remove_data_breakpoint\" \\| \"continue\" \\| \"step_over\" \\| \"step_in\" \\| \"step_out\" \\| \"pause\" \\| \"evaluate\" \\| \"stack_trace\" \\| \"threads\" \\| \"scopes\" \\| \"variables\" \\| \"disassemble\" \\| \"read_memory\" \\| \"write_memory\" \\| \"modules\" \\| \"loaded_sources\" \\| \"custom_request\" \\| \"output\" \\| \"terminate\" \\| \"sessions\"` | Yes | Dispatch key for the tool switch in `packages/coding-agent/src/tools/debug.ts`. |\n| `program` | `string` | No | Launch target path. Required for `launch`. Resolved relative to `cwd` if provided, otherwise session cwd. |\n| `args` | `string[]` | No | Program argv for `launch`. |\n| `adapter` | `string` | No | Explicit adapter name. Otherwise `selectLaunchAdapter()` / `selectAttachAdapter()` auto-pick from `packages/coding-agent/src/dap/config.ts`. |\n| `cwd` | `string` | No | Launch/attach working directory. Defaults to session cwd. |\n| `file` | `string` | No | Source file path for source breakpoints. |\n| `line` | `number` | No | Source line for source breakpoints. |\n| `function` | `string` | No | Function breakpoint name. Mutually exclusive with `file`+`line` in breakpoint actions. |\n| `name` | `string` | No | Data breakpoint info target name. Required for `data_breakpoint_info`. |\n| `condition` | `string` | No | Conditional expression for source/function/instruction/data breakpoints. |\n| `hit_condition` | `string` | No | Hit-count condition for instruction/data breakpoints. |\n| `expression` | `string` | No | Expression or raw debugger command. Required for `evaluate`. |\n| `context` | `string` | No | Evaluate context. Defaults to `\"repl\"`. Passed through as DAP evaluate context. |\n| `frame_id` | `number` | No | Frame selector for `evaluate`, `scopes`, `data_breakpoint_info`. `scopes` and `evaluate` default to the current stopped frame when omitted. |\n| `scope_id` | `number` | No | Variables reference from a scope. Accepted by `variables`; also used as a fallback variables reference for `data_breakpoint_info`. |\n| `variable_ref` | `number` | No | Variables reference for `variables`; preferred over `scope_id` when both are present. |\n| `pid` | `number` | No | Local process id for `attach`. `attach` requires `pid` or `port`. |\n| `port` | `number` | No | Remote attach port. If no adapter is forced, attach prefers `debugpy` when `port` is present. |\n| `host` | `string` | No | Remote attach host for `attach`. |\n| `levels` | `number` | No | Max stack frames for `stack_trace`. |\n| `memory_reference` | `string` | No | Memory reference/address for `disassemble`, `read_memory`, `write_memory`. `disassemble` uses this when provided; otherwise it falls back to the current stopped location's instruction-pointer reference if the adapter supplied one. |\n| `instruction_reference` | `string` | No | Instruction breakpoint reference; required for instruction breakpoint actions. Not used by `disassemble`. |\n| `instruction_count` | `number` | No | Required for `disassemble`. |\n| `instruction_offset` | `number` | No | Instruction offset for `disassemble`. |\n| `count` | `number` | No | Byte count for `read_memory`. Required there. |\n| `data` | `string` | No | Base64 payload for `write_memory`. Required there. |\n| `data_id` | `string` | No | Data breakpoint id. Required for `set_data_breakpoint` / `remove_data_breakpoint`. |\n| `access_type` | `\"read\" \\| \"write\" \\| \"readWrite\"` | No | Access filter for `set_data_breakpoint`. |\n| `command` | `string` | No | Custom DAP request command. Required for `custom_request`. |\n| `arguments` | `Record<string, unknown>` | No | Custom DAP request body for `custom_request`. |\n| `offset` | `number` | No | Offset for instruction breakpoints, disassembly, memory read, memory write. |\n| `resolve_symbols` | `boolean` | No | `disassemble` symbol-resolution flag. |\n| `allow_partial` | `boolean` | No | `write_memory` partial-write allowance. |\n| `start_module` | `number` | No | Modules pagination start index for `modules`. |\n| `module_count` | `number` | No | Modules pagination count for `modules`. |\n| `timeout` | `number` | No | Per-request timeout in seconds. Default `30`, clamped to `5..300`. |\n\n### Action-specific requirements\n- `launch`: `program`\n- `attach`: `pid` or `port`\n- `set_breakpoint` / `remove_breakpoint`: `function`, or `file` + `line`\n- `set_instruction_breakpoint` / `remove_instruction_breakpoint`: `instruction_reference`\n- `data_breakpoint_info`: `name`\n- `set_data_breakpoint` / `remove_data_breakpoint`: `data_id`\n- `evaluate`: `expression`\n- `variables`: `variable_ref` or `scope_id`\n- `disassemble`: capability `supportsDisassembleRequest`, plus `instruction_count`, and either `memory_reference` or a current stopped location with `instructionPointerReference`\n- `read_memory`: capability `supportsReadMemoryRequest`, plus `memory_reference` and `count`\n- `write_memory`: capability `supportsWriteMemoryRequest`, plus `memory_reference` and `data`\n- `modules`: capability `supportsModulesRequest`\n- `loaded_sources`: capability `supportsLoadedSourcesRequest`\n- `custom_request`: `command`\n\n### Interactive selector values\n`packages/coding-agent/src/debug/index.ts` also exposes a fixed UI-only selector with values `open-artifacts`, `performance`, `work`, `dump`, `memory`, `logs`, `system`, `terminal`, `protocols`, `raw-sse`, `transcript`, `clear-cache`. These are not model-callable through `debugSchema`; they are local TUI menu routes.\n\n## Outputs\nThe agent tool returns a standard `toolResult()` payload from `packages/coding-agent/src/tools/debug.ts`:\n- `content`: one text block. Every action renders human-readable text; there is no structured JSON block in `content`.\n- `details.action`: echoed action.\n- `details.success`: always initialized `true`; failures surface by throwing before a result is returned.\n- `details.snapshot`: present for actions that operate on or create a session, using `DapSessionSummary` from `packages/coding-agent/src/dap/types.ts`.\n- Action-specific `details` fields:\n - `launch` / `attach`: `adapter`\n - breakpoint actions: `breakpoints`, `functionBreakpoints`, `instructionBreakpoints`, `dataBreakpoints`\n - `data_breakpoint_info`: `dataBreakpointInfo`\n - `continue` / `step_*`: `state`, `timedOut`\n - `threads`: `threads`\n - `stack_trace`: `stackFrames`\n - `scopes`: `scopes`\n - `variables`: `variables`\n - `evaluate`: `evaluation`\n - `disassemble`: `disassembly`\n - `read_memory`: `memoryAddress`, `memoryData`, `unreadableBytes`\n - `write_memory`: `bytesWritten`\n - `modules`: `modules`\n - `loaded_sources`: `sources`\n - `custom_request`: `customBody`\n - `output`: `output`\n - `sessions`: `sessions`\n\nStreaming/UI behavior:\n- The tool renderer merges call and result (`mergeCallAndResult: true`) and renders inline.\n- `debug.ts` itself does not emit progress updates through `_onUpdate`; result delivery is single-shot.\n- Approval is action-sensitive: read-only actions (`output`, `threads`, `stack_trace`, `scopes`, `variables`, `disassemble`, `read_memory`, `loaded_sources`, `modules`, `sessions`) request read approval; all other actions request exec approval.\n- The interactive selector is UI-driven instead of model-driven. It swaps TUI components, appends status lines to the chat pane, opens files in external viewers, or writes archives/temp files.\n\nSide-channel artifacts outside the model tool result:\n- `createReportBundle()` writes `omp-report-<timestamp>.tar.gz` under the reports dir and returns the filesystem path to the UI handler.\n- `#handleWorkReport()` writes `/tmp/work-profile-<Date.now()>.svg` before opening it.\n- `RawSseViewerComponent` and `DebugLogViewerComponent` can copy captured text to the clipboard.\n\n## Flow\n1. Tool registration is conditional: `DebugTool.createIf()` in `packages/coding-agent/src/tools/debug.ts` returns `null` unless `session.settings.get(\"debug.enabled\")` is true. `packages/coding-agent/src/tools/index.ts` wires the factory and rechecks the same setting in tool filtering.\n2. `DebugTool.execute()` clamps `params.timeout` through `clampTimeout(\"debug\", params.timeout)` and composes the caller `AbortSignal` with `AbortSignal.timeout(...)`.\n3. `launch` and `attach` resolve cwd/program paths, select an adapter in `packages/coding-agent/src/dap/config.ts`, then delegate to `dapSessionManager.launch()` / `.attach()`.\n4. `DapSessionManager.launch()` / `.attach()` enforce the single-session rule with `#ensureLaunchSlot()`, spawn the adapter through `DapClient.spawn()`, register listeners, send `initialize`, cache capabilities, start listening for an initial stop event before sending `launch`/`attach`, then complete the `initialized` → `configurationDone` handshake in `#completeConfigurationHandshake()`.\n5. `DapClient.spawn()` starts the adapter detached with `NON_INTERACTIVE_ENV`. Most adapters use stdio; socket-mode adapters (`dlv`) use `#spawnSocketUnix()` on Linux or `#spawnSocketClientAddr()` on macOS/other.\n6. `#registerSession()` in `packages/coding-agent/src/dap/session.ts` installs reverse-request handlers:\n - `runInTerminal`: spawns the requested debuggee command detached via `ptree.spawn()` and returns `{ processId }`\n - `startDebugging`: logs the child-session request and returns `{}`; it does not create nested sessions\n - events: `output`, `initialized`, `stopped`, `continued`, `exited`, `terminated` update cached session state\n7. Operational actions (`set_breakpoint`, `evaluate`, `threads`, `read_memory`, `custom_request`, and similar) call `dapSessionManager` methods. Most flow through `#sendRequestWithConfig()`, which first sends `configurationDone` when required, then sends the DAP request, then updates `lastUsedAt`.\n8. Breakpoint actions maintain local cached breakpoint sets in `DapSessionManager` and remap adapter responses back onto those cached records.\n9. `continue` and the three step actions clear cached stop state, subscribe for `stopped`/`terminated`/`exited` before sending the DAP request, then `#awaitStopOutcome()` either returns the new stopped location or reports that the program is still running after timeout.\n10. `pause` sends DAP `pause`, waits for a stopped event if needed, and reuses cached stop state if the program was already stopped.\n11. `stack_trace`, `scopes`, `variables`, and `evaluate` default to the current stopped thread/frame when the caller omits ids and cached state is available.\n12. `output` reads the in-memory output ring from `DapSessionManager.getOutput()`. `terminate` sends `terminate` when supported, always attempts `disconnect`, marks the session terminated, and disposes the client.\n13. `sessions` reads the manager’s current map and formats all summaries. Although the manager stores a map, only one active session can exist because new launch/attach calls are blocked until the active one is terminated or cleaned up.\n14. The interactive selector in `packages/coding-agent/src/debug/index.ts` builds a `SelectList` of fixed values and dispatches each to a handler:\n - `performance`: `startCpuProfile()`, wait for Enter/Escape, stop profiling, read a 30-second work profile with `getWorkProfile(30)`, then bundle via `createReportBundle()`\n - `work`: read `getWorkProfile(30)`, write a temp SVG, open it externally\n - `dump`: create a report bundle immediately\n - `memory`: force GC, call `Bun.generateHeapSnapshot(\"v8\")`, then bundle\n - `logs`: build a `DebugLogSource` and mount `DebugLogViewerComponent`\n - `raw-sse`: resolve a `RawSseDebugBuffer` from the session and mount `RawSseViewerComponent`\n - `system`: call `collectSystemInfo()` and render `formatSystemInfo()` into the chat pane\n - `terminal`: `collectTerminalState()` + `formatTerminalState()` rendered into the chat pane\n - `protocols`: fires a test desktop notification (unless suppressed), then mounts `ProtocolProbeComponent` with a sample image\n - `open-artifacts`: open the current session artifact directory if it exists\n - `transcript`: delegates to `ctx.handleDebugTranscriptCommand()`\n - `clear-cache`: show confirmation, then remove artifact directories older than 30 days with `clearArtifactCache()`\n\n## Modes / Variants\n- **Availability gate**\n - Tool hidden when `debug.enabled` is false.\n- **Adapter selection**\n - `launch`: explicit `adapter` wins; otherwise `selectLaunchAdapter()` ranks available adapters by extension match, root-marker match, then native-debugger preference (`gdb`, `lldb-dap`) for extensionless binaries.\n - `attach`: explicit `adapter` wins; otherwise remote `port` prefers `debugpy`, then native debuggers, then first available adapter.\n- **Transport**\n - stdio adapters: direct `stdin`/`stdout` framing.\n - socket adapters: Unix domain socket on Linux; TCP callback on macOS/other.\n- **DAP agent-tool actions**\n - `launch` — spawn adapter, initialize session, maybe stop on entry; returns formatted session snapshot and `details.adapter`.\n - `attach` — connect to a live process or remote port; same output shape as `launch`.\n - `set_breakpoint` — source or function breakpoint add/update; returns the current breakpoint list for that target.\n - `remove_breakpoint` — source or function breakpoint removal; returns the remaining breakpoint list.\n - `set_instruction_breakpoint` / `remove_instruction_breakpoint` — require `supportsInstructionBreakpoints`; return current instruction breakpoint list.\n - `data_breakpoint_info` — require `supportsDataBreakpoints`; asks the adapter for a `dataId`, access types, and description for `name`.\n - `set_data_breakpoint` / `remove_data_breakpoint` — require `supportsDataBreakpoints`; return the cached data-breakpoint list.\n - `continue` / `step_over` / `step_in` / `step_out` — return text describing whether execution stopped, terminated, or kept running, plus `details.state` and `details.timedOut`.\n - `pause` — interrupts a running target and returns a stopped snapshot.\n - `evaluate` — adapter expression evaluation; defaults context to `repl`.\n - `stack_trace` — fetches frames for the resolved thread.\n - `threads` — fetches current threads.\n - `scopes` — frame scopes for an explicit `frame_id` or the current stopped frame.\n - `variables` — variables for `variable_ref` or `scope_id`.\n - `disassemble` — require `supportsDisassembleRequest`; disassembles around `memory_reference`, or around the current stopped instruction pointer when no memory reference is supplied.\n - `read_memory` — require `supportsReadMemoryRequest`; returns address, base64 data, unreadable-byte count.\n - `write_memory` — require `supportsWriteMemoryRequest`; writes base64 data and reports bytes written.\n - `modules` — require `supportsModulesRequest`; optional pagination via `start_module` / `module_count`.\n - `loaded_sources` — require `supportsLoadedSourcesRequest`; returns loaded source descriptors.\n - `custom_request` — sends any DAP request name with arbitrary arguments.\n - `output` — dumps captured stdout/stderr/console text from the session cache.\n - `terminate` — disconnects and disposes the active session; returns `No debug session to terminate.` when none exists.\n - `sessions` — lists all cached session summaries.\n- **Interactive selector routes (UI-only)**\n - `logs` — loads today’s log tail and optional older daily log files into `DebugLogViewerComponent`; supports copy, range selection, pid filtering, load-older.\n - `raw-sse` — live view over the session’s `RawSseDebugBuffer`; supports tail-follow, scrolling, copy-all.\n - `performance` — CPU profile + 30-second work profile + report bundle.\n - `memory` — heap snapshot + report bundle.\n - `dump` — report bundle without profiler artifacts.\n - `work` — standalone work-profile flamegraph export/open.\n - `system` — formatted OS/arch/CPU/memory/version/cwd/shell/terminal dump.\n - `terminal` — formatted terminal subprotocol/geometry/scrollback state dump.\n - `protocols` — terminal protocol test: desktop-notification side effect plus a probe panel sampling special protocols.\n - `open-artifacts` / `transcript` / `clear-cache` — artifact directory open, transcript export, artifact-cache pruning.\n\n## Side Effects\n- Filesystem\n - Resolves program/file/cwd paths against the session cwd.\n - Report creation writes `.tar.gz` bundles and may read the session JSONL, artifact files, subagent session JSONLs, and log files.\n - Work-profile export writes `/tmp/work-profile-<timestamp>.svg`.\n - Log source reads daily log files from the logs dir.\n - Artifact-cache cleanup removes session artifact directories older than the cutoff.\n - `resolveRawSseDebugBuffer()` may attach a non-enumerable `rawSseDebugBuffer` property to the owner object.\n- Network\n - Socket-mode adapters bind/connect local sockets.\n - Remote attach may connect through the adapter to a remote debug port.\n- Subprocesses / native bindings\n - Spawns debugger adapters (`gdb`, `lldb-dap`, `python -m debugpy.adapter`, `dlv`, and others from `defaults.json`) detached.\n - Reverse DAP `runInTerminal` requests spawn the debuggee detached via `ptree.spawn()`.\n - `getWorkProfile(30)` comes from `@oh-my-pi/pi-natives`.\n - CPU profiling uses `node:inspector/promises`; heap snapshots use `Bun.generateHeapSnapshot(\"v8\")`; raw/log viewers sanitize text via `@oh-my-pi/pi-natives`.\n - `openPath()` launches the OS default file/browser handler for artifact dirs and SVGs.\n - Log/raw-SSE viewers can call `copyToClipboard()`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - `DapSessionManager` keeps session summaries, breakpoints, threads, stack frames, stop location, output capture, capabilities, and last-used timestamps in memory.\n - Active-session id is global to the singleton `dapSessionManager`.\n - `RawSseDebugBuffer` stores recent SSE events per owner/session.\n - The tool is `exclusive`; concurrent debug tool calls are blocked by the scheduler.\n- User-visible prompts / interactive UI\n - Debug selector shows confirmation before cache deletion.\n - Performance profiling temporarily hijacks editor Enter/Escape handlers until profiling stops.\n - Log/raw-SSE viewers replace the editor pane with custom components.\n- Background work / cancellation\n - Every DAP request accepts an `AbortSignal`; timeouts and caller cancellation abort the active request, not the whole session lifetime.\n - `DapSessionManager` runs a background cleanup loop every 30 seconds.\n - Raw SSE viewers subscribe to buffer updates until closed.\n\n## Limits & Caps\n- Tool timeout clamp: `default=30`, `min=5`, `max=300` in `packages/coding-agent/src/tools/tool-timeouts.ts`.\n- Per-request DAP default timeout: `DEFAULT_REQUEST_TIMEOUT_MS = 30_000` in `packages/coding-agent/src/dap/client.ts`.\n- Single active session: enforced by `#ensureLaunchSlot()` in `packages/coding-agent/src/dap/session.ts`.\n- Idle session cleanup: `IDLE_TIMEOUT_MS = 10 * 60 * 1000`, checked every `CLEANUP_INTERVAL_MS = 30 * 1000`.\n- Adapter liveness heartbeat: `HEARTBEAT_INTERVAL_MS = 5 * 1000`.\n- Output capture cap: `MAX_OUTPUT_BYTES = 128 * 1024`; whole chunks are dropped from the front (then the front chunk is byte-sliced so exactly the cap remains) and `outputTruncated` is recorded.\n- Initial stop capture timeout after launch/attach: `STOP_CAPTURE_TIMEOUT_MS = 5_000`.\n- Socket-mode adapter readiness timeout: `10_000` ms in `waitForCondition()` and TCP connect timeout logic in `packages/coding-agent/src/dap/client.ts`.\n- Raw SSE buffer caps in `packages/coding-agent/src/debug/raw-sse-buffer.ts`:\n - `MAX_RAW_SSE_EVENTS = 1_000`\n - `MAX_RAW_SSE_CHARS = 512_000`\n - `MAX_RAW_SSE_EVENT_CHARS = 64_000` per event, with `: omp-debug-truncated ...` marker appended on trim\n- Log viewer window in `packages/coding-agent/src/debug/log-viewer.ts`:\n - `INITIAL_LOG_CHUNK = 50`\n - `LOAD_OLDER_CHUNK = 50`\n- Report/log ingestion caps in `packages/coding-agent/src/debug/report-bundle.ts`:\n - `MAX_LOG_LINES = 5000` for interactive log reading\n - `MAX_LOG_BYTES = 2 * 1024 * 1024` tail-read ceiling\n - report bundles include only the last `1000` log lines\n - subagent session inclusion is capped at the most recent `10` JSONL files\n- Interactive profiling windows in `packages/coding-agent/src/debug/index.ts`: both performance and work reports request `getWorkProfile(30)`.\n- Artifact cache pruning default: `30` days in `clearArtifactCache()` and the selector confirmation text.\n\n## Errors\n- Parameter validation in `packages/coding-agent/src/tools/debug.ts` throws `ToolError` with explicit messages such as:\n - `program is required for launch`\n - `attach requires pid or port`\n - `set_breakpoint requires file+line or function`\n - `variables requires variable_ref or scope_id`\n - `instruction_count is required for disassemble`\n - `disassemble requires memory_reference unless the current stop location has an instruction pointer reference`\n - `memory_reference is required for read_memory`\n - `count is required for read_memory`\n - `data is required for write_memory`\n - `command is required for custom_request`\n- Adapter selection failure throws `No debugger adapter available. Installed adapters: ...`.\n- Capability-gated actions throw from `requireCapability(...)`, e.g. `Active adapter does not support memory reads.`\n- No-session and state errors come from `DapSessionManager`, e.g. `No active debug session. Launch or attach first.`, `No active stack frame. Run stack_trace first or supply frame_id.`, `Debugger reported no threads.`\n- Launching a second live session throws `Debug session <id> is still active. Terminate it before launching another.`\n- DAP transport/request failures surface as thrown errors from `DapClient`:\n - `DAP request <command> timed out after <ms>ms`\n - `DAP event <event> timed out after <ms>ms`\n - `DAP adapter <name> is not running`\n - `DAP adapter exited (code N): <stderr>` or `DAP adapter exited unexpectedly (code N)`\n - adapter response `message` when a DAP request fails\n- `continue` / `step_*` are intentionally non-fatal when the target stays running past the timeout: they return `details.timedOut = true` and `state: \"running\"` instead of throwing.\n- `terminate` suppresses adapter errors while sending `terminate`/`disconnect`; it still disposes the client and returns the last summary when possible.\n- Interactive selector handlers report UI errors instead of throwing:\n - profiler start/stop, report bundling, log reading, system-info collection, cache clearing, and artifact opening use `ctx.showError(...)` / `ctx.showWarning(...)`\n - empty logs and empty artifact caches are warnings/status messages, not failures\n - copy failures in log/raw-SSE viewers become status/error text in the UI\n- Report-bundle helpers are intentionally best-effort for many file reads: missing session files, missing artifact dirs, unreadable artifact files, missing log dirs, inaccessible cache dirs, and missing subagent files are skipped silently.\n- `collectSystemInfo()` is best-effort for CPU probing; failure there falls back to `Unknown CPU`.\n\n## Notes\n- `packages/coding-agent/src/prompts/tools/debug.md` tells the model only one active session is supported; that is not advisory, it is enforced in code.\n- `configurationDone` is sent automatically both during launch/attach handshake and lazily before later requests if the adapter required it and the initial handshake did not complete.\n- `startDebugging` reverse requests are acknowledged but not implemented; child debug sessions are not spawned.\n- `output` exposes the merged `output` event stream only; the tool does not distinguish stdout, stderr, and console categories.\n- Session summaries expose `needsConfigurationDone`; this is derived from adapter capabilities and whether `configurationDone` has been sent.\n- Source breakpoint file paths are normalized with `path.resolve()` before caching and sending to the adapter.\n- `evaluate` defaults to `repl`, so the tool can forward raw debugger commands when the adapter supports them.\n- `disassemble` resolves its target from `memory_reference` first, then the current stopped session's `instructionPointerReference`; it throws if neither is present.\n- `RawSseDebugBuffer.recordEvent()` increments `totalEvents` before bounded retention. A snapshot can therefore show fewer retained records than total observed events.\n- Raw SSE buffer listener failures are swallowed so viewer bugs do not break capture.\n- `createDebugLogSource()` walks daily log files newest-first, but `loadOlderLogs()` reverses each requested slice before concatenation so older chunks prepend in chronological order.\n- `clearArtifactCache()` deletes directories by directory mtime, not per-file age.\n- `addDirectoryToArchive()` reads artifact files as text with `Bun.file(...).text()`. Binary artifact contents are not preserved byte-for-byte in the report bundle.\n- The tool renderer truncates displayed output for the TUI preview, but the underlying text result still contains the full returned string.\n",
83
- "tools/edit.md": "# edit\n\n> Applies source edits; default mode is the hashline patch language consumed from a single `input` string.\n\n## Source\n- Entry: `packages/coding-agent/src/edit/index.ts`\n- Model-facing prompt: `packages/hashline/src/prompt.md`\n- Key collaborators:\n - `packages/coding-agent/src/utils/edit-mode.ts` — selects active edit mode\n - `packages/hashline/src/grammar.lark` — canonical constrained-decoding grammar\n - `packages/hashline/src/format.ts` — sigils and header constants (`[`, `]`, `#`, `+`, `replace`, `delete`, `insert`)\n - `packages/hashline/src/input.ts` — parses `[PATH#TAG]` sections\n - `packages/hashline/src/tokenizer.ts` / `packages/hashline/src/parser.ts` — tokenizes and parses ops\n - `packages/hashline/src/apply.ts` — applies parsed edits to file text\n - `packages/hashline/src/mismatch.ts` — stale-anchor mismatch formatting\n - `packages/hashline/src/recovery.ts` — snapshot-based stale-anchor recovery\n - `packages/hashline/src/snapshots.ts` — mints and resolves per-path opaque snapshot tags\n\n## Inputs\n\n### Hashline mode (default)\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `input` | `string` | Yes | One or more file sections. Anchored sections must start with `[PATH#TAG]`; `TAG` is the four-hex snapshot tag emitted by the latest `read`/`search`/`write`/successful `edit`. Optional `*** Begin Patch` / `*** End Patch` envelope is ignored if present. |\n\nPatch language inside `input`:\n\n- **File header**: `[PATH#TAG]`. `TAG` is four uppercase-hex chars — a content-derived hash of the whole normalized file (`computeFileHash()`), recorded in the session snapshot store.\n- **Operations**:\n - `replace N..M:` — replace original lines N..M with the body rows below.\n - `replace block N:` — replace the whole tree-sitter block beginning on line N (its header line through its closing line) with the body rows. The line span is resolved at apply time from the file's parse tree; point N at the line that opens the construct. The resolved span is exactly the node that begins on line N — a leading decorator, attribute, or doc-comment is a separate node and is not included; point N at the first decorator line (Python wraps `@dec` + `def` as one block) or fall back to `replace N..M:` to take a leading line-comment that parses as its own node (e.g. Rust `///`). On success the result echoes the matched span (`replace block N → resolved lines A-B`). Errors (and steers to `replace N..M:`) when the language is unsupported, line N is blank or a closing delimiter, no node begins there, or the resolved block has a syntax error.\n - `delete N..M` — delete original lines N..M. No body.\n - `delete block N` — delete the whole tree-sitter block beginning on line N (resolved like `replace block N`, with the same decorator/comment caveat). No body. On success the result echoes the matched span (`delete block N → resolved lines A-B`). Same resolution failure modes and `delete N..M` fallback.\n - `insert before N:` — insert body rows immediately before line N.\n - `insert after N:` — insert body rows immediately after line N.\n - `insert after block N:` — insert body rows after the last line of the tree-sitter block beginning on line N. Point N at the line that opens the construct, never its closing delimiter / last visible line; if you can see the last line already, use plain `insert after M:`. Same resolution failure modes and `insert after M:` fallback.\n - `insert head:` — insert body rows at the start of the file.\n - `insert tail:` — insert body rows at the end of the file.\n- **Body rows**:\n - Only body-bearing headers end in `:`.\n - Every body row is `+TEXT`; `+` alone adds a blank line.\n - `delete` never has body rows.\n - There is no repeat row kind. To keep a line, leave it out of every range; split edits into multiple hunks when needed.\n - `-` rows are invalid. Literal text beginning with `-` or `+` must be written as `+-text` / `++text`.\n\nAnchors come from `read`/`search` output. `read` emits a `[PATH#TAG]` header from the session snapshot store and lines as `LINE:TEXT`; copy the header into the edit section and copy only the line number into hunk headers.\n\n### Tolerated input shapes (lenient parsing)\n\nThe canonical grammar is strict, but the hand parser accepts a few non-dangerous variants:\n\n- `replace N:` — accepted as `replace N..N:`.\n- `delete N` — accepted as single-line delete.\n- Missing trailing colon on `replace` or `insert` — accepted.\n- `replace N-M:`, `replace N…M:`, and `replace N M:` — accepted as `replace N..M:`.\n- Bare body rows with no `+` prefix are auto-prepended with `+` and a `BARE_BODY_AUTO_PIPED_WARNING` is appended.\n- `*** Begin Patch` / `*** End Patch` envelopes are silently consumed. `*** Abort` terminates parsing silently — ops parsed before the marker still apply, no warning surfaced.\n- Some malformed bracketed headers are recovered after stripping apply-patch path noise such as `Update File:` / `Add File:` and extra `***`, but the recovered header still needs a valid four-hex tag for the patcher to apply it.\n- `*** Update File:` / `*** Add File:` / `*** Delete File:` / `*** Move to:` apply_patch sentinels inside the diff body throw an `apply_patch sentinel … is not valid in hashline` error.\n- `@@`-bracketed hunk headers are rejected with guidance to write a verb header.\n- Bare `N` and bare `N M` / `N..M` headers are rejected with guidance to write `replace` or `delete`.\n- `delete N..M:` and any body rows under `delete` / `delete block` are rejected.\n- Empty `replace` / `insert` / `replace block` hunks are rejected.\n- `-` body rows are rejected with `MINUS_ROW_REJECTED`.\n- `replace block N:` / `delete block N` / `insert after block N:` require a wired tree-sitter resolver; `replace block` and `insert after block` additionally need at least one `+TEXT` body row, while `delete block` takes none. An unresolvable block (unsupported language, blank/closing-delimiter line, no node beginning on N, or a syntax error in the resolved block) is rejected on the apply/final-preview path; the streaming preview silently drops it instead. Exception: `insert after block N:` anchored on a pure closing-delimiter line is lowered to plain `insert after N:` with a warning — line N is the end of a block, and inserting after that end is exactly what the plain form does.\n\n## Outputs\n- Single-shot tool result; hashline mode does not use a `resolve` preview/apply handshake.\n- `content` contains one text block per call. For a successful single-file edit it is the post-edit `[path#TAG]` section header (a fresh snapshot tag for the written content), followed by a compact diff preview from `packages/hashline/src/diff-preview.ts` when one is emitted.\n- When the patch used `replace block`/`delete block`/`insert after block` ops (and the apply matched the tagged content), one `replace block N → resolved lines A-B (K lines)` line per block op (single-line spans render `resolved line A (1 line)`; insert-after appends `; body lands after line B`) is inserted between the `[PATH#TAG]` header and the diff preview, so the caller can confirm tree-sitter resolved the construct it intended.\n- Parse, apply, or recovery warnings are appended as:\n\n```text\nWarnings:\n...\n```\n\n- `details` is `EditToolDetails` from `packages/coding-agent/src/edit/renderer.ts`:\n - `diff`: unified diff string\n - `firstChangedLine`: first changed post-edit line\n - `diagnostics`: LSP/format result if available\n - `op`: `\"create\"` or `\"update\"` for hashline mode\n - `meta`: output metadata\n - `perFileResults`: present for multi-section input\n- Multi-section input returns one aggregated result with combined text and per-file details.\n\n## Worked examples\n\nReference file (the exact shape `read` returns):\n\n```text\n[a.ts#0A3B]\n1:const X = \"a\";\n2:const Y = X;\n3:\n4:console.log(X);\n5:console.log(Y);\n6:export { X, Y };\n```\n\nReplace line 1 with two lines:\n\n```text\n[a.ts#0A3B]\nreplace 1..1:\n+const X = \"b\";\n+export const Y = X;\n```\n\nInsert below line 5:\n\n```text\n[a.ts#0A3B]\ninsert after 5:\n+console.log(X + Y);\n```\n\nInsert above line 5:\n\n```text\n[a.ts#0A3B]\ninsert before 5:\n+console.log(X + Y);\n```\n\nDelete lines 4..5 entirely:\n\n```text\n[a.ts#0A3B]\ndelete 4..5\n```\n\nInsert at start and end of file:\n\n```text\n[a.ts#0A3B]\ninsert head:\n+// header\ninsert tail:\n+// trailer\n```\n\nMulti-file:\n\n```text\n[src/a.ts#0A3B]\nreplace 4..4:\n+const enabled = true;\n[src/b.ts#1F7C]\ndelete 20\n```\n\n## Limits & Caps\n- File snapshot tags are exactly four uppercase-hex chars — content-derived hashes (`computeFileHash()`) recorded in the per-session snapshot store.\n- The visible mismatch report shows 2 lines of context on each side (`MISMATCH_CONTEXT`) in `packages/hashline/src/messages.ts`.\n- Stale-anchor recovery uses `fuzzFactor: 0` in `packages/hashline/src/recovery.ts`.\n- `HL_FILE_PREFIX` is `[`, `HL_FILE_SUFFIX` is `]`, `HL_PAYLOAD_REPLACE` is `+`, `HL_RANGE_SEP` is `..`, `HL_FILE_HASH_SEP` is `#`, and hunk keyword constants are `replace` / `delete` / `insert` (`packages/hashline/src/format.ts`).\n\n## Errors\n- Missing section header:\n - `input must begin with \"[PATH#HASH]\" on the first non-blank line for anchored edits; got: ...`\n- Missing tag for any section:\n - `Missing hashline snapshot tag for edit to <path>; use \\`[<path>#tag]\\` from your latest read/search output. To create a new file, use the write tool.`\n- Stray payload line:\n - `line N: payload line has no preceding hunk header. Use \\`replace N..M:\\`, \\`delete N..M\\`, or \\`insert before|after|head|tail:\\` above the body. Got \"...\".`\n- Minus row:\n - ``line N: `-` rows are not valid; hashline ranges already name the lines being changed. To insert a literal line starting with `-`, write `+-…`.``\n- Empty body-bearing hunk:\n - `line N: \\`replace N..M:\\` needs at least one \\`+TEXT\\` body row. To delete lines, use \\`delete N..M\\`.`\n - `line N: \\`insert\\` needs at least one \\`+TEXT\\` body row.`\n - `line N: \\`replace block N:\\` needs at least one \\`+TEXT\\` body row. To delete a block, use \\`delete N..M\\` with the block's line range.`\n- Unresolvable block anchor (apply / final-preview path only):\n - `line N: \\`replace block X:\\` could not resolve a syntactic block beginning on line X. The language may be unsupported, the line may be blank or a closing delimiter, or the block may not parse. Use \\`replace X..M:\\` with the block's explicit end line instead.` — followed by a blank line and numbered `*`-marked context rows around line X (same shape as the mismatch preview).\n - `line N: \\`insert after block X:\\` could not resolve a syntactic block beginning on line X. The language may be unsupported, the line may be blank or a closing delimiter, or the block may not parse. Use \\`insert after M:\\` with the block's explicit last line instead.` — same context preview.\n- Delete with body:\n - `line N: \\`delete N..M\\` does not take body rows. Remove the body, or use \\`replace N..M:\\`.`\n - `line N: \\`delete block N\\` does not take body rows. Remove the body, or use \\`replace block N:\\` to replace the block.`\n- Range out of order:\n - `line N: range A..B ends before it starts.`\n- Overlapping hunks on the same anchor:\n - `line N: anchor line X is already targeted by another hunk on line Y. Issue ONE hunk per range; payload is only the final desired content, never a before/after pair.`\n- apply_patch / unified-diff contamination:\n - `line N: apply_patch sentinel \"*** …\" is not valid in hashline. File sections start with \\`[path#HASH]\\` (no \\`Update File:\\` / \\`Add File:\\` keyword). Use \\`replace N..M:\\`, \\`delete N..M\\`, or \\`insert before|after|head|tail:\\` ops.`\n - `line N: unified-diff hunk header (\\`@@ -N,M +N,M @@\\`) is not valid in hashline. Use \\`replace N..M:\\`, \\`delete N..M\\`, or \\`insert before|after|head|tail:\\` ops.`\n - `line N: \\`@@\\`-bracketed hunk header \"@@ …\" is not valid in hashline. Drop the \\`@@ ... @@\\` brackets and write a verb header such as \\`replace N..M:\\`.`\n - `line N: hunk headers need a verb. Use \\`replace N..N:\\` to replace, or \\`delete N\\` to delete.`\n - `line N: bare range hunk header \"N M\" is not valid. Hunk headers need a verb: write \\`replace N..M:\\` or \\`delete N..M\\`.`\n- Out-of-range anchor:\n - `Line N does not exist (file has M lines)`\n- Stale snapshot tag: the `Patcher` first attempts snapshot-based recovery. When recovery cannot prove a valid result it throws `MismatchError`, which distinguishes recognized-but-drifted hashes from never-recorded hashes. The error includes the current file hash plus context around each anchor.\n- No-op edit:\n - `Edits to <path> parsed and applied cleanly, but produced no change: your body row(s) are byte-identical to the file at the targeted lines. The bug is somewhere else — re-read the file before issuing another edit. Do NOT widen the payload or add lines; verify the anchor first.`\n - After `NOOP_HARD_LIMIT = 3` consecutive byte-identical no-ops of the same payload on the same file, the soft text result escalates to a `ToolError` (`STOP. Edits to <path> have been a byte-identical no-op N times in a row …`) from `packages/coding-agent/src/edit/hashline/noop-loop-guard.ts`.\n- Recovery failure is silent internally: if cache-based merge cannot prove a valid result, the mismatch error is surfaced unchanged.\n\n## Warnings\n- `Auto-prefixed bare body row(s) with +. Body rows must be +TEXT literal lines …` (`BARE_BODY_AUTO_PIPED_WARNING`)\n- Recovery banners: `RECOVERY_EXTERNAL_WARNING`, `RECOVERY_SESSION_CHAIN_WARNING`, `RECOVERY_SESSION_REPLAY_WARNING` (`packages/hashline/src/messages.ts`).\n",
90
+ "tools/edit.md": "# edit\n\n> Applies source edits; default mode is the hashline patch language consumed from a single `input` string.\n\n## Source\n- Entry: `packages/coding-agent/src/edit/index.ts`\n- Model-facing prompt: `packages/hashline/src/prompt.md`\n- Key collaborators:\n - `packages/coding-agent/src/utils/edit-mode.ts` — selects active edit mode\n - `packages/hashline/src/grammar.lark` — canonical constrained-decoding grammar\n - `packages/hashline/src/format.ts` — sigils and header constants (`[`, `]`, `#`, `+`, `SWAP`, `DEL`, `INS`)\n - `packages/hashline/src/input.ts` — parses `[PATH#TAG]` sections\n - `packages/hashline/src/tokenizer.ts` / `packages/hashline/src/parser.ts` — tokenizes and parses ops\n - `packages/hashline/src/apply.ts` — applies parsed edits to file text\n - `packages/hashline/src/mismatch.ts` — stale-anchor mismatch formatting\n - `packages/hashline/src/recovery.ts` — snapshot-based stale-anchor recovery\n - `packages/hashline/src/snapshots.ts` — mints and resolves per-path opaque snapshot tags\n\n## Inputs\n\n### Hashline mode (default)\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `input` | `string` | Yes | One or more file sections. Anchored sections must start with `[PATH#TAG]`; `TAG` is the four-hex snapshot tag emitted by the latest `read`/`search`/`write`/successful `edit`. Optional `*** Begin Patch` / `*** End Patch` envelope is ignored if present. |\n\nPatch language inside `input`:\n\n- **File header**: `[PATH#TAG]`. `TAG` is four uppercase-hex chars — a content-derived hash of the whole normalized file (`computeFileHash()`), recorded in the session snapshot store.\n- **Operations**:\n - `SWAP N..M:` — replace original lines N..M with the body rows below.\n - `SWAP.BLK N:` — replace the whole tree-sitter block beginning on line N (its header line through its closing line) with the body rows. The line span is resolved at apply time from the file's parse tree; point N at the line that opens the construct. The resolved span is exactly the node that begins on line N — a leading decorator, attribute, or doc-comment is a separate node and is not included; point N at the first decorator line (Python wraps `@dec` + `def` as one block) or fall back to `SWAP N..M:` to take a leading line-comment that parses as its own node (e.g. Rust `///`). On success the result echoes the matched span (`SWAP.BLK N → resolved lines A-B`). Errors (and steers to `SWAP N..M:`) when the language is unsupported, line N is blank or a closing delimiter, no node begins there, or the resolved block has a syntax error.\n - `DEL N..M` — delete original lines N..M. No body.\n - `DEL.BLK N` — delete the whole tree-sitter block beginning on line N (resolved like `SWAP.BLK N`, with the same decorator/comment caveat). No body. On success the result echoes the matched span (`DEL.BLK N → resolved lines A-B`). Same resolution failure modes and `DEL N..M` fallback.\n - `INS.PRE N:` — insert body rows immediately before line N.\n - `INS.POST N:` — insert body rows immediately after line N.\n - `INS.BLK.POST N:` — insert body rows after the last line of the tree-sitter block beginning on line N. Point N at the line that opens the construct, never its closing delimiter / last visible line; if you can see the last line already, use plain `INS.POST M:`. Same resolution failure modes and `INS.POST M:` fallback.\n - `INS.HEAD:` — insert body rows at the start of the file.\n - `INS.TAIL:` — insert body rows at the end of the file.\n- **Body rows**:\n - Only body-bearing headers end in `:`.\n - Every body row is `+TEXT`; `+` alone adds a blank line.\n - `DEL` never has body rows.\n - There is no repeat row kind. To keep a line, leave it out of every range; split edits into multiple hunks when needed.\n - `-` rows are invalid. Literal text beginning with `-` or `+` must be written as `+-text` / `++text`.\n\nAnchors come from `read`/`search` output. `read` emits a `[PATH#TAG]` header from the session snapshot store and lines as `LINE:TEXT`; copy the header into the edit section and copy only the line number into hunk headers.\n\n### Tolerated input shapes (lenient parsing)\n\nThe canonical grammar is strict, but the hand parser accepts a few non-dangerous variants:\n\n- `SWAP N:` — accepted as `SWAP N..N:`.\n- `DEL N` — accepted as single-line delete.\n- Missing trailing colon on `SWAP` or `INS` — accepted.\n- `SWAP N-M:`, `SWAP N…M:`, and `SWAP N M:` — accepted as `SWAP N..M:`.\n- Bare body rows with no `+` prefix are auto-prepended with `+` and a `BARE_BODY_AUTO_PIPED_WARNING` is appended.\n- `*** Begin Patch` / `*** End Patch` envelopes are silently consumed. `*** Abort` terminates parsing silently — ops parsed before the marker still apply, no warning surfaced.\n- Some malformed bracketed headers are recovered after stripping apply-patch path noise such as `Update File:` / `Add File:` and extra `***`, but the recovered header still needs a valid four-hex tag for the patcher to apply it.\n- `*** Update File:` / `*** Add File:` / `*** Delete File:` / `*** Move to:` apply_patch sentinels inside the diff body throw an `apply_patch sentinel … is not valid in hashline` error.\n- `@@`-bracketed hunk headers are rejected with guidance to write a verb header.\n- Bare `N` and bare `N M` / `N..M` headers are rejected with guidance to write `SWAP` or `DEL`.\n- `DEL N..M:` and any body rows under `DEL` / `DEL.BLK` are rejected.\n- Empty `SWAP` / `INS` / `SWAP.BLK` hunks are rejected.\n- `-` body rows are rejected with `MINUS_ROW_REJECTED`.\n- `SWAP.BLK N:` / `DEL.BLK N` / `INS.BLK.POST N:` require a wired tree-sitter resolver; `SWAP.BLK` and `INS.BLK.POST` additionally need at least one `+TEXT` body row, while `DEL.BLK` takes none. An unresolvable block (unsupported language, blank/closing-delimiter line, no node beginning on N, or a syntax error in the resolved block) is rejected on the apply/final-preview path; the streaming preview silently drops it instead. Exception: `INS.BLK.POST N:` anchored on a pure closing-delimiter line is lowered to plain `INS.POST N:` with a warning — line N is the end of a block, and inserting after that end is exactly what the plain form does.\n\n## Outputs\n- Single-shot tool result; hashline mode does not use a `resolve` preview/apply handshake.\n- `content` contains one text block per call. For a successful single-file edit it is the post-edit `[path#TAG]` section header (a fresh snapshot tag for the written content), followed by a compact diff preview from `packages/hashline/src/diff-preview.ts` when one is emitted.\n- When the patch used `SWAP.BLK`/`DEL.BLK`/`INS.BLK.POST` ops (and the apply matched the tagged content), one `SWAP.BLK N → resolved lines A-B (K lines)` line per block op (single-line spans render `resolved line A (1 line)`; INS.BLK.POST appends `; body lands after line B`) is inserted between the `[PATH#TAG]` header and the diff preview, so the caller can confirm tree-sitter resolved the construct it intended.\n- Parse, apply, or recovery warnings are appended as:\n\n```text\nWarnings:\n...\n```\n\n- `details` is `EditToolDetails` from `packages/coding-agent/src/edit/renderer.ts`:\n - `diff`: unified diff string\n - `firstChangedLine`: first changed post-edit line\n - `diagnostics`: LSP/format result if available\n - `op`: `\"create\"` or `\"update\"` for hashline mode\n - `meta`: output metadata\n - `perFileResults`: present for multi-section input\n- Multi-section input returns one aggregated result with combined text and per-file details.\n\n## Worked examples\n\nReference file (the exact shape `read` returns):\n\n```text\n[a.ts#0A3B]\n1:const X = \"a\";\n2:const Y = X;\n3:\n4:console.log(X);\n5:console.log(Y);\n6:export { X, Y };\n```\n\nReplace line 1 with two lines:\n\n```text\n[a.ts#0A3B]\nSWAP 1..1:\n+const X = \"b\";\n+export const Y = X;\n```\n\nInsert below line 5:\n\n```text\n[a.ts#0A3B]\nINS.POST 5:\n+console.log(X + Y);\n```\n\nInsert above line 5:\n\n```text\n[a.ts#0A3B]\nINS.PRE 5:\n+console.log(X + Y);\n```\n\nDelete lines 4..5 entirely:\n\n```text\n[a.ts#0A3B]\nDEL 4..5\n```\n\nInsert at start and end of file:\n\n```text\n[a.ts#0A3B]\nINS.HEAD:\n+// header\nINS.TAIL:\n+// trailer\n```\n\nMulti-file:\n```text\n[src/a.ts#0A3B]\nSWAP 4..4:\n+const enabled = true;\n[src/b.ts#1F7C]\nDEL 20\n```\n\n## Limits & Caps\n- File snapshot tags are exactly four uppercase-hex chars — content-derived hashes (`computeFileHash()`) recorded in the per-session snapshot store.\n- The visible mismatch report shows 2 lines of context on each side (`MISMATCH_CONTEXT`) in `packages/hashline/src/messages.ts`.\n- Stale-anchor recovery uses `fuzzFactor: 0` in `packages/hashline/src/recovery.ts`.\n- `HL_FILE_PREFIX` is `[`, `HL_FILE_SUFFIX` is `]`, `HL_PAYLOAD_REPLACE` is `+`, `HL_RANGE_SEP` is `..`, `HL_FILE_HASH_SEP` is `#`, and hunk keyword constants are `SWAP` / `DEL` / `INS` (`packages/hashline/src/format.ts`).\n\n## Errors\n- Missing section header:\n - `input must begin with \"[PATH#HASH]\" on the first non-blank line for anchored edits; got: ...`\n- Missing tag for any section:\n - `Missing hashline snapshot tag for edit to <path>; use \\`[<path>#tag]\\` from your latest read/search output. To create a new file, use the write tool.`\n- Stray payload line:\n - `line N: payload line has no preceding hunk header. Use \\`SWAP N..M:\\`, \\`DEL N..M\\`, or \\`INS.PRE|POST|HEAD|TAIL:\\` above the body. Got \"...\".`\n- Minus row:\n - ``line N: `-` rows are not valid; hashline ranges already name the lines being changed. To insert a literal line starting with `-`, write `+-…`.``\n- Empty body-bearing hunk:\n - `line N: \\`SWAP N..M:\\` needs at least one \\`+TEXT\\` body row. To delete lines, use \\`DEL N..M\\`.`\n - `line N: \\`INS\\` needs at least one \\`+TEXT\\` body row.`\n - `line N: \\`SWAP.BLK N:\\` needs at least one \\`+TEXT\\` body row. To delete a block, use \\`DEL.BLK N\\`.`\n- Unresolvable block anchor (apply / final-preview path only):\n - `line N: \\`SWAP.BLK X:\\` could not resolve a syntactic block beginning on line X. The language may be unsupported, the line may be blank or a closing delimiter, or the block may not parse. Use \\`SWAP X..M:\\` with the block's explicit end line instead.` — followed by a blank line and numbered `*`-marked context rows around line X (same shape as the mismatch preview).\n - `line N: \\`INS.BLK.POST X:\\` could not resolve a syntactic block beginning on line X. The language may be unsupported, the line may be blank or a closing delimiter, or the block may not parse. Use \\`INS.POST M:\\` with the block's explicit last line instead.` — same context preview.\n- Delete with body:\n - `line N: \\`DEL N..M\\` does not take body rows. Remove the body, or use \\`SWAP N..M:\\`.`\n - `line N: \\`DEL.BLK N\\` does not take body rows. Remove the body, or use \\`SWAP.BLK N:\\` to replace the block.`\n- Range out of order:\n - `line N: range A..B ends before it starts.`\n- Overlapping hunks on the same anchor:\n - `line N: anchor line X is already targeted by another hunk on line Y. Issue ONE hunk per range; payload is only the final desired content, never a before/after pair.`\n- apply_patch / unified-diff contamination:\n - `line N: apply_patch sentinel \"*** …\" is not valid in hashline. File sections start with \\`[path#HASH]\\` (no \\`Update File:\\` / \\`Add File:\\` keyword). Use \\`SWAP N..M:\\`, \\`DEL N..M\\`, or \\`INS.PRE|POST|HEAD|TAIL:\\` ops.`\n - `line N: unified-diff hunk header (\\`@@ -N,M +N,M @@\\`) is not valid in hashline. Use \\`SWAP N..M:\\`, \\`DEL N..M\\`, or \\`INS.PRE|POST|HEAD|TAIL:\\` ops.`\n - `line N: \\`@@\\`-bracketed hunk header \"@@ …\" is not valid in hashline. Drop the \\`@@ ... @@\\` brackets and write a verb header such as \\`SWAP N..M:\\`.`\n - `line N: hunk headers need a verb. Use \\`SWAP N..N:\\` to replace, or \\`DEL N\\` to delete.`\n - `line N: bare range hunk header \"N M\" is not valid. Hunk headers need a verb: write \\`SWAP ${bareRange[1]}..${bareRange[2]}:\\` or \\`DEL ${bareRange[1]}..${bareRange[2]}\\`.`\n- Out-of-range anchor:\n - `Line N does not exist (file has M lines)`\n- Stale snapshot tag: the `Patcher` first attempts snapshot-based recovery. When recovery cannot prove a valid result it throws `MismatchError`, which distinguishes recognized-but-drifted hashes from never-recorded hashes. The error includes the current file hash plus context around each anchor.\n- No-op edit:\n - `Edits to <path> parsed and applied cleanly, but produced no change: your body row(s) are byte-identical to the file at the targeted lines. The bug is somewhere else — re-read the file before issuing another edit. Do NOT widen the payload or add lines; verify the anchor first.`\n - After `NOOP_HARD_LIMIT = 3` consecutive byte-identical no-ops of the same payload on the same file, the soft text result escalates to a `ToolError` (`STOP. Edits to <path> have been a byte-identical no-op N times in a row …`) from `packages/coding-agent/src/edit/hashline/noop-loop-guard.ts`.\n- Recovery failure is silent internally: if cache-based merge cannot prove a valid result, the mismatch error is surfaced unchanged.\n\n## Warnings\n- `Auto-prefixed bare body row(s) with +. Body rows must be +TEXT literal lines …` (`BARE_BODY_AUTO_PIPED_WARNING`)\n- Recovery banners: `RECOVERY_EXTERNAL_WARNING`, `RECOVERY_SESSION_CHAIN_WARNING`, `RECOVERY_SESSION_REPLAY_WARNING` (`packages/hashline/src/messages.ts`).\n",
84
91
  "tools/eval.md": "# eval\n\n> Execute Python or JavaScript code in persistent cell-based runtimes.\n\n> **Notice:** Do not shell out to `python -c`/`python -e`, `bun -e`, or `node -e` via the `bash` tool for ad-hoc code execution. Use this tool instead — it gives you persistent state across cells, structured `display()` output, image/JSON capture, and proper cancellation/timeout handling that one-shot `-e`/`-c` invocations cannot provide.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/eval.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/eval.md`\n- Key collaborators:\n - `packages/coding-agent/src/eval/backend.ts` — backend execution contract\n - `packages/coding-agent/src/eval/agent-bridge.ts` — host-side `agent()` bridge into the subagent executor\n - `packages/coding-agent/src/eval/js/executor.ts` — JS backend adapter\n - `packages/coding-agent/src/eval/js/worker-core.ts` — JS execution, VM context, display/log capture\n - `packages/coding-agent/src/eval/js/shared/prelude.txt` — JS global helper installer\n - `packages/coding-agent/src/eval/js/shared/helpers.ts` — JS filesystem/text/env helper implementations\n - `packages/coding-agent/src/eval/py/index.ts` — Python backend adapter\n - `packages/coding-agent/src/eval/py/executor.ts` — kernel session retention, reset, cleanup\n - `packages/coding-agent/src/eval/py/kernel.ts` — subprocess NDJSON runner protocol, display capture\n - `packages/coding-agent/src/eval/py/prelude.py` — Python helper functions and status events\n - `packages/coding-agent/src/session/streaming-output.ts` — truncation, artifacts, streamed chunks\n - `docs/python-repl.md` — Python kernel/runner internals\n\n## Inputs\n\nTool parameters are a JSON object with a single `cells` field — an ordered array of cell objects. Each cell is a structured record; there is no `*** Cell` header parsing, no language sniffing, and no implicit single-cell fallback. Cells run in array order; state persists within each language across cells and across tool calls.\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `cells` | `EvalCellInput[]` | Yes | Cells executed in order. At least one cell is required (`.min(1)`). |\n\nEach `EvalCellInput` (from `evalCellSchema` in `packages/coding-agent/src/tools/eval.ts`):\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `language` | `\"py\" \\| \"js\"` | Yes | Backend selector. `\"py\"` maps to the IPython-style subprocess kernel (`python` backend); `\"js\"` maps to the persistent JavaScript VM. |\n| `code` | `string` | Yes | Cell body, verbatim. JSON-encoded — embed newlines, quotes, and indentation directly; no fences, no headers. |\n| `title` | `string` | No | Short label rendered in the transcript (e.g. `\"imports\"`, `\"load config\"`). |\n| `timeout` | `integer` | No | Per-cell timeout in seconds, clamped to `1..3600`. Defaults to 30 when omitted. |\n| `reset` | `boolean` | No | Wipe this cell's language kernel before running. Reset is per-language: a `py` cell's reset does not touch the JS VM and vice versa. Defaults to `false`. |\n\nMinimal example matching the live schema:\n\n```json\n{\n \"cells\": [\n { \"language\": \"py\", \"title\": \"imports\", \"timeout\": 10, \"code\": \"import json\\nfrom pathlib import Path\" },\n { \"language\": \"py\", \"title\": \"load config\", \"code\": \"data = json.loads(read('package.json'))\\ndisplay(data)\" },\n { \"language\": \"js\", \"title\": \"summary\", \"reset\": true, \"code\": \"const data = JSON.parse(await read('package.json'));\\ndisplay(data);\\nreturn data.name;\" }\n ]\n}\n```\n\n## Outputs\n\nFinal result from `EvalTool.execute()` is single-shot, but `onUpdate` streams partial text and `details` while cells run.\n\nReturned shape:\n\n- `content`: one text block containing combined cell output, `(displayed N image(s); no text output)` when only images exist, or `(no output)` when nothing visible was produced; image outputs are appended as additional image content blocks.\n- `details` (`EvalToolDetails` from `packages/coding-agent/src/eval/types.ts`):\n - `cells`: per-cell code, status (`pending`/`running`/`complete`/`error`), output, duration, exit code, status events, markdown flag\n - `language`: first backend used\n - `languages`: distinct backends used, in first-use order\n - `jsonOutputs`: structured values emitted via `display(...)`\n - `statusEvents`: aggregated helper/tool status events\n - `notice`: backend fallback notice (currently unused; reserved for future per-cell notices)\n - `meta`: truncation metadata\n - `isError`: set on cell failure or cancellation\n\nRenderer behavior in `packages/coding-agent/src/tools/eval.ts`:\n\n- call preview renders each cell's `code` with syntax highlighting based on its declared `language`\n- result view renders each cell separately, including status, duration, and output\n- markdown outputs are rendered with the Markdown component instead of plain text\n- `jsonOutputs` render as a tree, collapsed or expanded depending on UI state\n- timeout / truncation notices render as dim metadata lines\n- images are returned as content image blocks; live updates may also carry `details.images` while execution is in progress\n\nSide-channel artifacts:\n\n- `session.allocateOutputArtifact?.(\"eval\")` may allocate an `artifact://...` backing store for spilled output.\n- Truncated output metadata points at that artifact when available.\n\n## Flow\n\n1. `EvalTool.execute()` in `packages/coding-agent/src/tools/eval.ts` receives `params.cells` already validated by the Zod schema — no string parsing step.\n2. For each cell, `execute()` maps `cell.language` to an `EvalLanguage` (`\"py\"` → `\"python\"`, `\"js\"` → `\"js\"`) and calls `resolveBackend(session, language)`:\n - `python` is gated on `eval.py !== false` and `pythonBackend.isAvailable(session)`.\n - `js` is gated on `eval.js !== false`.\n - A disabled or unavailable requested backend throws `ToolError`; there is no auto-fallback or sniffing.\n3. The tool allocates an `OutputSink`, a `TailBuffer`, per-cell result objects, and a `sessionAbortController`. `session.trackEvalExecution?.(...)` can wrap the whole run for external cancellation tracking.\n4. It resolves the executor session id from `session.getEvalSessionId?.()`, falling back to `defaultEvalSessionId(session)`. Subagents inherit the parent's id so both sides share the same JS VM and Python kernel for each backend.\n5. Cells execute sequentially within one eval tool call. For each cell, `execute()`:\n - clamps `cell.timeout ?? 30` seconds through `clampTimeout(\"eval\", ...)`\n - builds a combined abort signal from the tool signal, the timeout, and the session abort controller\n - marks the cell `running` and emits an update\n - calls the backend's `execute()` with `cwd`, `sessionId`, `sessionFile`, `kernelOwnerId`, `idleTimeoutMs`, `reset` (defaults to `false`), the combined signal, and chunk/status callbacks\n6. JS cells dispatch through `packages/coding-agent/src/eval/js/index.ts` into `executeJs()`; Python cells dispatch through `packages/coding-agent/src/eval/py/index.ts` into `executePython()`.\n7. Backend text chunks stream into the shared `OutputSink`; rich outputs are accumulated separately as JSON, images, markdown markers, and status events.\n8. After each cell:\n - text output is trimmed and stored on that cell result\n - multi-cell runs prefix text with `[i/n]` and the optional title\n - cancellations return early with `isError: true` and a cell-specific abort message\n - non-zero exit codes return early with `isError: true` and a message naming the failed cell\n - later cells are skipped after the first error, but earlier cell state persists in the underlying runtime\n9. On success, the tool joins all cell outputs, synthesizes `(no text output)` or `(no output)` when needed, and attaches truncation metadata from `summarizeFinal()`.\n10. The renderer uses `details.cells`, `details.jsonOutputs`, and `details.statusEvents` to build notebook-style output. `mergeCallAndResult = true` and `inline = true`, so call and result render together in the transcript.\n\n## Modes / Variants\n\n### Backend selection\n\nBackend choice is **explicit per cell** — there is no auto-detection.\n\n- `language: \"py\"` → Python (IPython-style subprocess kernel) backend\n- `language: \"js\"` → JavaScript VM backend\n\nIf the requested backend is disabled or unavailable, the tool throws `ToolError` for that cell. The caller chooses; the tool does not silently substitute.\n\n### JavaScript runtime\n\nImplemented in `packages/coding-agent/src/eval/js/worker-core.ts`, `packages/coding-agent/src/eval/js/shared/prelude.txt`, and `packages/coding-agent/src/eval/js/shared/helpers.ts`.\n\n- Persistent worker-backed VM sessions keyed by `js:${sessionId}`\n- `reset: true` calls `resetVmContext(sessionKey)` before the cell executes; reset is destructive for all live runs on that JS session\n- Top-level `await` and bare `return` are supported by wrapping code in an async IIFE when `wrapCode()` sees `await` or `return`\n- Top-level static `import ... from ...` and dynamic `import(...)` calls are routed through `rewriteImports()`, which sends them via `__omp_import__` so the specifier resolves against the session cwd. Dynamic-import call sites are swapped for a guarded shim (`typeof __omp_import__ === \"function\" ? __omp_import__ : (s, o) => import(s, o)`) rather than the bare helper identifier: functions handed to puppeteer (`tab.evaluate`, `page.evaluate`, ...) are serialized with `Function.prototype.toString()` and re-evaluated inside the browser page, where the worker-injected helper does not exist, so the shim falls back to native dynamic import there\n- Module cache is busted for **local** imports between cells so edits to source files are picked up without restarting the runtime. `__omp_import__` deletes `require.cache[absPath]` before re-importing whenever the original specifier is a filesystem path: relative (`./x`, `../x`, `.`, `..`), POSIX-absolute (`/...`), home-prefixed (`~/...`), or Windows drive-letter (`C:\\...` / `C:/...`). Bare specifiers (`react`, `lodash/x`) and URL/scheme specifiers (`node:fs`, `file://...`, `https://...`) are left in cache so package identity stays stable across cells. The cache-bust only fires when the resolved target is an absolute path — unresolved bare-package fallbacks (`resolveImportSpecifier()` returning the original specifier) skip it.\n- The prelude installs globals:\n - `display`, `print`\n - `read`, `write`, `append`, `sort`, `uniq`, `counter`, `diff`, `tree`, `env`, `output`\n - `tool.<name>(args)` proxy for arbitrary session tool calls\n - `completion(prompt, opts?)` for oneshot, stateless model calls (see _Oneshot completion helper_ below)\n - `agent(prompt, opts?)` for a single subagent call, plus `parallel()` / `pipeline()` bounded-pool helpers (see _Subagent helper_ below)\n- JS helpers that touch the host/runtime boundary are async and `await`able; pure text helpers (`sort`, `uniq`, `counter`) return synchronously but may still be safely awaited.\n- JS helper options may be passed either positionally in the Python order or as a trailing options object. `null` and `undefined` skip positional slots:\n - `await read(path, offset?, limit?)` or `await read(path, { offset?, limit? })`\n - `await tree(path = \".\", maxDepth?, showHidden?)` or `await tree(path, { maxDepth?, showHidden? })`\n - `sort(text, reverse?, unique?)`, `uniq(text, count?)`, `counter(items, limit?, reverse?)`\n - `await agent(prompt, agentType?, model?, label?, schema?)` or `await agent(prompt, { agentType?, model?, label?, schema? })`\n - `await parallel([() => agent(\"a\"), () => agent(\"b\")])`\n - `await pipeline(items, stage1, stage2)`\n- `display(value)` behavior:\n - plain objects/arrays become JSON outputs\n - `{ type: \"image\", data, mimeType }` becomes an image output\n - scalars become text\n- The VM exposes a restricted `process` subset plus `Buffer`, `fetch`, `Blob`, `File`, `Headers`, `Request`, `Response`, `fs`, `require`, and browser-style globals\n- Concurrent runs on the same VM are not queued end-to-end. Synchronous JS still runs on the single event loop; awaited regions can interleave with sibling runs.\n\n### Python runtime\n\nImplemented in `packages/coding-agent/src/eval/py/executor.ts`, `packages/coding-agent/src/eval/py/kernel.ts`, and `packages/coding-agent/src/eval/py/prelude.py`. See `docs/python-repl.md` for kernel and runner details.\n\n- Default mode is retained `session` kernels keyed by `python:${sessionId}` plus normalized cwd and interpreter\n- Optional `python.kernelMode = \"per-call\"` creates a fresh kernel for each cell and shuts it down afterward\n- `reset: true` disposes the retained kernel for that session before the cell runs; later Python cells in the same tool call reuse the fresh kernel\n- Startup path:\n - availability check\n - create/connect kernel\n - initialize cwd / env / `sys.path`\n - execute `PYTHON_PRELUDE`\n- Python cells run in the runner's persistent asyncio event loop, so top-level `await` works; the prompt warns not to use `asyncio.run(...)`\n- The Python prelude defines helpers with the same surface as JS where practical, including `tool.<name>(args)`, `completion(...)`, and `agent(...)` through a per-run loopback bridge\n- Synchronous statement blocks run in the default executor with ContextVar state copied in; the GIL still serializes bytecode execution, but awaited regions can interleave with sibling cells\n- Kernel `display` / `result` frames map to:\n - `application/x-omp-status` → status event\n - `image/png` → image output\n - `application/json` → JSON output\n - `text/markdown` → markdown output\n - `text/plain` → text output\n - `text/html` → HTML converted to markdown with `htmlToBasicMarkdown()`\n- Interactive stdin is rejected: a stdin-flagged result returns exit code `1` with `Kernel requested stdin; interactive input is not supported.`\n\n### Oneshot completion helper (`completion`)\n\nBoth runtimes expose `completion()` — a single stateless completion against a model tier. It is intentionally minimal: no conversation history, no agent-visible tools, pure text in / text (or object) out. Implemented host-side in `packages/coding-agent/src/eval/completion-bridge.ts` and routed through the existing tool bridge under the reserved name `__completion__`.\n\n- Signatures:\n - JS: `await completion(prompt, { model?, system?, schema? })`\n - Python: `completion(prompt, *, model=\"default\", system=None, schema=None)`\n- `model` selects a tier (default `\"default\"`):\n - `\"smol\"` → `pi/smol` role (fast / cheap)\n - `\"default\"` → the session's active model, falling back to the `pi/default` role\n - `\"slow\"` → `pi/slow` role; requests high reasoning effort only on reasoning-capable models\n- `system` (optional) supplies a system prompt.\n- `schema` (optional) is a plain JSON-Schema object. When present, the model is forced to call a single synthetic `respond` tool with that schema (loose, non-strict), and the helper returns the parsed object. When absent, the helper returns the completion string.\n- Errors surface as exceptions: unresolved tier, missing API key, an `error`/`aborted` stop reason, or empty output each raise.\n\n### Subagent helper (`agent`)\n\nBoth runtimes expose `agent()` — a single subagent invocation routed through `packages/coding-agent/src/eval/agent-bridge.ts` into the same `runSubprocess(...)` path used by the `task` tool. It uses the current eval session's spawn policy and inherits the parent eval executor id, so parent and subagent code share JS/Python runtime state.\n\n- Signatures:\n - JS: `await agent(prompt, agentType?, model?, label?, schema?)` or `await agent(prompt, { agentType?, model?, label?, schema? })`\n - Python: `agent(prompt, *, agent_type=\"task\", model=None, label=None, schema=None)`\n- `agentType` / `agent_type` defaults to the bundled `task` agent and resolves through normal agent discovery, so project and user agents work.\n- `model` overrides the selected agent's model. Without it, normal per-agent settings and the agent frontmatter model apply.\n- Shared background is passed via files: write a `local://` file and reference it in the prompt. `label` controls the `agent://<id>` output label prefix.\n- `schema` passes a JSON Schema to the subagent structured-output path. When present, the helper parses the final JSON text and returns an object.\n- Spawn restrictions use `session.getSessionSpawns()` exactly like the `task` tool. Eval-driven subagent recursion is capped at depth 3.\n- JS and Python both expose `parallel(thunks)` and `pipeline(items, ...stages)`; both use a bounded async/threaded pool whose width tracks the `task.maxConcurrency` setting (the same ceiling the `task` tool uses; `0` = run every item at once), preserve item order, and propagate rejections. The width is fetched live from the host via the `__concurrency__` bridge, so the helpers no longer take a `concurrency` argument.\n- Errors surface as exceptions: unknown or disabled agent, disallowed spawn, recursion cap, subagent failure, or invalid structured output all fail the eval cell.\n\n### Multi-language call behavior\n\nA single tool call can mix Python and JS cells. Persistence is per language runtime:\n\n- `reset: true` on a Python cell does not touch JS state\n- `reset: true` on a JS cell does not touch Python state\n- each backend keeps its own retained session keyed from the same session-derived ID\n\n## Side Effects\n\n- Filesystem\n - JS/Python prelude helpers can read, write, append, diff, and traverse filesystem paths under the session cwd or absolute paths.\n - JS helper `read()` rejects protocol URIs (`://`) and directory paths; use `tool.read(...)` for internal URLs or reader-mode behavior.\n - Output may spill to an artifact file via `OutputSink`.\n- Network\n - Python backend speaks NDJSON to a local `python3` subprocess over stdin/stdout (no network).\n - JS runtime exposes `fetch` and `tool.<name>()`; those tools may perform additional network I/O.\n- Subprocesses / native bindings\n - Python availability check runs `<python> -c ...`.\n - Python backend spawns one `python -u runner.py` subprocess per kernel; cancellation sends `SIGINT`. Details in `docs/python-repl.md`.\n - `agent()` runs one in-process subagent via the task executor; that subagent may use its configured tools.\n- Session state\n - `session.assertEvalExecutionAllowed?.()` can block execution.\n - `session.trackEvalExecution?.(...)` can register cancellable eval work.\n - `session.getSessionFile?.()`, `session.getEvalSessionId?.()`, and `session.getEvalKernelOwnerId?.()` influence VM/kernel reuse and artifact lookup.\n - JS VM contexts persist across eval calls until reset/disposal.\n - Python retained kernels persist until reset, owner cleanup, or process exit.\n - `agent()` allocates `agent://<id>` output artifacts and reuses the parent's eval executor id.\n- User-visible prompts / interactive UI\n - none; stdin requests are rejected programmatically\n- Background work / cancellation\n - Python retained kernels have heartbeat and idle cleanup timers.\n - Cancellation hard-kills/resets the shared executor for that backend: JS terminates the worker, Python sends SIGINT and may escalate to subprocess shutdown.\n\n## Limits & Caps\n\n- Per-cell timeout default: 30s (applied when `timeout` is omitted in `EvalTool.execute()`; clamped through `TOOL_TIMEOUTS.eval.default` in `packages/coding-agent/src/tools/tool-timeouts.ts`)\n- Schema-level `timeout` range: integer `1..3600` seconds (enforced by Zod on the cell schema)\n- Timeout clamp at runtime: 1s minimum, 3600s maximum (`TOOL_TIMEOUTS.eval` in `packages/coding-agent/src/tools/tool-timeouts.ts`)\n- Transcript code/output preview: 10 lines by default (`EVAL_DEFAULT_PREVIEW_LINES` in `packages/coding-agent/src/tools/eval-render.ts`, re-exported from `eval.ts`)\n- Output truncation window: 50KB default (`DEFAULT_MAX_BYTES` in `packages/coding-agent/src/session/streaming-output.ts`)\n- Output line cap inside truncation helpers: 3000 lines (`DEFAULT_MAX_LINES` in `packages/coding-agent/src/session/streaming-output.ts`)\n- Streaming tail buffer for live updates: `DEFAULT_MAX_BYTES * 2` = 100KB (`packages/coding-agent/src/tools/eval.ts`)\n- JS/Python `parallel()` / `pipeline()` helper pool width: the `task.maxConcurrency` setting (default 32; `0` = unbounded), resolved live via the `__concurrency__` bridge (`packages/coding-agent/src/eval/concurrency-bridge.ts`)\n- Eval-driven `agent()` recursion cap: task depth 3 (`EVAL_AGENT_MAX_DEPTH`)\n- Python kernel startup wait: 10s (`STARTUP_TIMEOUT_MS` in `packages/coding-agent/src/eval/py/kernel.ts`)\n- Python kernel shutdown grace per escalation step (`exit` request → `SIGTERM` → `SIGKILL`): 1000ms (`SHUTDOWN_GRACE_MS` in `packages/coding-agent/src/eval/py/kernel.ts`)\n- Python SIGINT escalation window: 5s without a `done` frame before the subprocess is killed (`INTERRUPT_ESCALATION_MS` in `packages/coding-agent/src/eval/py/kernel.ts`)\n- Python auto-restart budget: a dead retained kernel is replaced and the cell retried once per execution (`executeOnSession` in `packages/coding-agent/src/eval/py/executor.ts`)\n\n## Errors\n\n- Zod validation rejects malformed `cells` arrays before `execute()` runs (missing `language`/`code`, out-of-range `timeout`, empty `cells`).\n- Missing session without proxy executor throws `ToolError(\"Eval tool requires a session when not using proxy executor\")`.\n- Disabled/unavailable backends throw `ToolError` from `resolveBackend()`:\n - `eval.py = false` and a `py` cell is requested\n - `eval.js = false` and a `js` cell is requested\n - Python kernel unavailable and a `py` cell is requested\n- JS runtime exceptions are converted into text output plus `exitCode: 1`; cancellations return `cancelled: true` and may append `Command timed out`.\n- Python execution errors from the kernel become text output and `exitCode: 1`; later cells are skipped.\n- Python stdin requests are treated as errors with the message `Kernel requested stdin; interactive input is not supported.`\n- Cancellation is returned, not thrown, once backend execution has started. The tool formats it as a cell failure and sets `details.isError = true`.\n- If output truncates, the tool still succeeds; truncation is surfaced through `details.meta` and artifact-backed full output when available.\n\n## Shared executor trade-offs\n\n- Parent agents and subagents share eval state bidirectionally when a subagent inherits the parent's executor id. Mutations in either direction are visible to the other participant.\n- Async regions of concurrent runs can interleave. Synchronous JS still blocks the VM event loop; synchronous Python still contends on the GIL.\n- Cancelling one run is destructive to the shared backend executor. This is intentional: JS worker termination and Python SIGINT/subprocess shutdown are the only reliable way to interrupt arbitrary user code.\n- `reset: true` is destructive for every live run on that backend session id. Concurrent Python resets coalesce — a reset already in flight is awaited rather than duplicated, and runs queued behind it proceed on the freshly-restarted kernel.\n\n## Notes\n\n- Backend selection is strictly explicit per cell: `language` must be `\"py\"` or `\"js\"`. The previous `*** Cell` header parser, the `eval.lark` constrained grammar, and the sniffer-based fallback have all been removed.\n- `EvalTool.customFormat` no longer exists. Tool calls flow through the standard JSON schema; there is no Lark-constrained sampling path.\n- `tool.<name>()` exists in both JS and Python. Python calls route through a per-run loopback bridge keyed by the current cell id.\n- JS helper paths reject protocol URIs (`://`) in `resolveRegularFile()` for `read()`, and resolve other paths against the session cwd or absolute filesystem path. Use `tool.read(...)` or another tool explicitly for internal URLs.\n- Python helper `output(...)` depends on `PI_ARTIFACTS_DIR` or `PI_SESSION_FILE`; it fails outside a session-backed run.\n- `display()` can produce text and structured outputs from the same value; the renderer prefers markdown over `text/plain` when both exist.\n- JS static imports are rewritten only at top level. Nested imports stay invalid and surface normal JS syntax/runtime errors.\n- `EvalTool` is `concurrency = \"exclusive\"` within one agent session, but parent and subagent sessions can run eval concurrently when they share an inherited executor id.\n- The tool description shown to the model is templated by backend availability (`getEvalToolDescription()`); if Python is unavailable, the prompt omits Python-specific instructions.\n",
85
92
  "tools/find.md": "# find\n\n> Find filesystem paths by glob; use `search` when you need content matches instead of path matches.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/find.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/find.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/path-utils.ts` — normalize inputs; split base path vs glob.\n - `packages/coding-agent/src/tools/list-limit.ts` — apply result-count caps.\n - `packages/coding-agent/src/session/streaming-output.ts` — truncate text output at byte cap.\n - `packages/coding-agent/src/tools/tool-result.ts` — build `content` and `details.meta`.\n - `packages/coding-agent/src/tools/output-meta.ts` — encode limit / truncation metadata.\n - `packages/coding-agent/src/tools/tool-errors.ts` — map user-facing tool errors.\n - `packages/coding-agent/src/tools/index.ts` — register the built-in local implementation.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `paths` | `string[]` | Yes | One or more globs, files, directories, or internal URLs with backing files. Empty strings are rejected. Single entries accidentally joined with comma, semicolon, or whitespace are expanded only after existence validation; existing paths containing delimiters stay intact. Each entry becomes its own walk root; multi-entry calls run those scans concurrently. |\n| `hidden` | `boolean` | No | Whether hidden files are included. Defaults to `true` (`hidden ?? true`). |\n| `gitignore` | `boolean` | No | Whether `.gitignore` is respected during local native globbing. Defaults to `true`; set `false` to include gitignored files. |\n| `limit` | `number` | No | Max returned paths. Defaults to `200`; finite positive inputs are floored then clamped to `1..200`. |\n| `timeout` | `number` | No | Timeout in seconds. Defaults to `5`; clamped to `0.5..60`. On timeout, returns partial matches collected so far with a timeout notice and `truncated: true`. |\n\n## Outputs\nThe tool returns a single text block plus structured `details`.\n\n- Success text: matching paths grouped as a multi-level, prefix-folded directory tree (`formatGroupedPaths()`): one `#` per nesting level, single-child directory chains fold into one header (`# a/b/c/`), and files are listed bare under the deepest owning header; root-level matches are listed without a header. Directory matches carry a trailing `/`. Exact file inputs return that file path as one line.\n- Empty result text: `No files found matching pattern`, optionally followed by a timeout or missing-path notice.\n- Multi-path partial miss: appends `Skipped missing paths: ...` after the result block, or after the empty-result line.\n- `details` may include:\n - `scopePath`: display form of the searched root or merged roots.\n - `fileCount`: number of paths returned after result limiting.\n - `files`: returned paths as an array.\n - `truncated`: whether result count or byte truncation occurred.\n - `resultLimitReached`: reached result limit.\n - `missingPaths`: skipped missing inputs in multi-path calls.\n - `truncation` / `meta.limits`: structured truncation and limit metadata for renderers.\n- Streaming: when the runtime supplies `onUpdate`, the local implementation emits incremental newline-delimited text snapshots during globbing, throttled to 200 ms. Final output is grouped; streaming snapshots are not.\n\n## Flow\n\n1. `FindTool.execute()` expands delimiter-flattened local `paths` entries with `expandDelimitedPathEntries(..., parseFindPattern)` unless custom operations are injected. The splitter validates candidate parts by statting their parsed base paths, keeps existing delimiter-containing paths intact, accepts comma/semicolon splits when at least one part resolves, and accepts whitespace splits only when every part resolves.\n2. The tool normalizes each resulting entry with `normalizePathLikeInput()` and `/\\\\/g -> \"/\"` (`packages/coding-agent/src/tools/find.ts`). Empty normalized entries fail with `` `paths` must contain non-empty globs or paths ``.\n3. For multi-path local calls, `partitionExistingPaths(..., parseFindPattern)` (`packages/coding-agent/src/tools/path-utils.ts`) stats each base path. Missing entries are skipped; if all are missing, the tool throws `Path not found: ...`. Single missing paths still hard-fail.\n4. The tool calls `resolveExplicitFindPatterns()` for multi-entry calls; it parses each entry into its own `(basePath, globPattern, hasGlob)` target so every path is walked as its own root (collapsing to a shared ancestor would scan unrelated siblings). Single-entry calls parse with `parseFindPattern()` directly.\n5. `parseFindPattern()` determines `(basePath, globPattern, hasGlob)`:\n - no glob chars (`*`, `?`, `[`, `{`) => search that path with implicit `**/*`.\n - glob in the first segment => search from `.` and, unless the pattern already starts with `**/`, prefix it with `**/`.\n - glob later in the path => split at the first glob-bearing segment.\n6. `resolveToCwd()` converts the base path to an absolute path under the session cwd. A resolved `/` is rejected with `Searching from root directory '/' is not allowed`.\n7. `limit` defaults to `DEFAULT_LIMIT` (`200`), must be positive and finite, is floored, then clamped to `MAX_LIMIT` (`200`). `hidden` and `gitignore` both default to `true`. `timeout` is converted to milliseconds and clamped to `500..60_000` before building an `AbortSignal.timeout(...)`.\n8. Execution then branches:\n - **Custom operations branch**: if `FindToolOptions.operations.glob` exists, the tool checks existence with `operations.exists()`, short-circuits exact-file inputs via `operations.stat()` when available, then calls `operations.glob(globPattern, searchPath, { ignore: [\"**/node_modules/**\", \"**/.git/**\"], limit })`.\n - **Built-in local branch**: the tool stats each target's `searchPath`. Exact-file inputs return immediately. Directory inputs call `natives.glob()` with `hidden`, `maxResults: effectiveLimit`, `sortByMtime: true`, `gitignore: useGitignore`, `recursive: false` (recursion comes from the `**/` prefix `parseFindPattern()` adds), and the combined abort signal; multi-target calls run their globs concurrently.\n9. In the local branch, optional `onMatch` callbacks convert each match to a cwd-relative display path and emit throttled progress updates.\n10. After native glob returns, JS merges per-target results, deduplicates repeated display paths, and sorts the merged list by `mtime` descending before formatting paths.\n11. `buildResult()` applies `applyListLimit()` to cap the array again at `effectiveLimit`, formats paths with `formatGroupedPaths()` (from `@oh-my-pi/pi-utils`), appends notices, then runs `truncateHead()` with `maxLines: Number.MAX_SAFE_INTEGER`. In practice this leaves the 50 KB byte cap in place while disabling the default 3000-line cap.\n12. `toolResult()` packages text plus `details`, and records result-limit / truncation metadata for renderers.\n\n## Modes / Variants\n- **Exact file path**: if the parsed input has no glob and the resolved path stats as a file, output is that one path.\n- **Directory path**: if the parsed input has no glob and stats as a directory, the tool searches it with implicit `**/*`.\n- **Single glob path**: one input parsed by `parseFindPattern()`.\n- **Multi-path search**: multiple inputs resolved by `resolveExplicitFindPatterns()` into per-entry targets, each walked as its own root concurrently and merged afterwards.\n- **Partial multi-path search with missing inputs**: local multi-path calls skip missing base paths and surface them as `missingPaths` / `Skipped missing paths: ...`.\n- **Internal URL input**: supported when the internal router resolves the URL to a backing file. Internal URL globs are rejected.\n- **Custom delegated search**: uses injected `FindOperations` instead of local fs + native glob.\n\n## Side Effects\n- Filesystem\n - Stats the resolved base path, and in local multi-path mode stats every candidate base path up front.\n - Does not write files.\n- Subprocesses / native bindings\n - Built-in local mode calls the native `@oh-my-pi/pi-natives` glob implementation.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Emits structured progress updates when `onUpdate` is provided.\n - Adds truncation / limit metadata to the tool result.\n- Background work / cancellation\n - Local globbing is cancellable through the caller abort signal plus the configured internal timeout.\n\n## Limits & Caps\n- Default result limit: `200` (`DEFAULT_LIMIT` in `packages/coding-agent/src/tools/find.ts`).\n- Maximum result limit: `200` (`MAX_LIMIT`); larger inputs are clamped.\n- Local glob timeout: default `5000` ms, clamped to `500..60_000` ms.\n- Output byte cap: `50 * 1024` bytes (`DEFAULT_MAX_BYTES` in `packages/coding-agent/src/session/streaming-output.ts`).\n- Default generic line cap in `truncateHead()` is `3000`, but `find` overrides `maxLines` to `Number.MAX_SAFE_INTEGER`, so byte size — not line count — is the practical output truncation cap.\n- Streaming update throttle: `200` ms between `onUpdate` emissions.\n- Sort order: most recent `mtime` first in the built-in local branch and promised in the prompt. The tool re-sorts in JS even though native glob receives `sortByMtime: true` so native code can still stop early at `maxResults`.\n\n## Errors\n- User-facing `ToolError`s from `FindTool.execute()` include:\n - `` `paths` must contain non-empty globs or paths ``\n - `Path not found: ...`\n - `Searching from root directory '/' is not allowed`\n - `Limit must be a positive number`\n - `Path is not a directory: ...`\n - timeout result text is `find timed out after <seconds>s; returning <N> partial matches — increase timeout or narrow pattern` and is returned as a successful, truncated partial result rather than an error.\n- If the caller aborts, the local branch converts `AbortError` into `ToolAbortError`.\n- Non-`ENOENT` stat failures and other unexpected errors are rethrown.\n- Empty matches are not errors; they return the no-files text result.\n\n## Notes\n- Reach for `find` for filename / path discovery. Reach for `search` when the selection criterion is file contents or regex matches; `search` takes a `pattern` and returns anchored content matches, while `find` only returns matching paths (`packages/coding-agent/src/prompts/tools/find.md`, `packages/coding-agent/src/prompts/tools/search.md`).\n- Bare top-level globs are made recursive. `*.ts` is parsed as base `.` plus glob `**/*.ts`; `src/*.ts` stays rooted at `src` with a non-recursive `*.ts` segment; `src/**/*.ts` preserves explicit recursion.\n- `.gitignore` defaults to enabled in the built-in local branch. Use `gitignore: false` to disable it for native traversal.\n- `hidden` defaults to `true`; hidden-file exclusion is opt-out, not opt-in.\n- Multi-path missing-input tolerance applies in both branches, but only the built-in local branch surfaces `missingPaths` / `Skipped missing paths: ...`. The custom-operations branch hard-fails a missing `searchPath` only for single-input calls; in multi-input calls a missing target silently contributes no results.\n- The custom `FindOperations.glob()` hook receives `ignore` and `limit`, but not the `hidden` flag or an explicit `.gitignore` toggle. A remote delegate must account for that itself if it wants parity with the local branch.\n- Built-in local globbing does not force `fileType: File`; it can return files and directories from native glob. Directory outputs also occur through exact-path passthrough or custom delegates that return them.",
86
93
  "tools/github.md": "# github\n\n> Dispatch GitHub CLI operations for repositories, issues, pull requests, search, and Actions run watching.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/gh.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/github.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/gh-format.ts` — shorten commit SHAs for summaries.\n - `packages/coding-agent/src/tools/gh-renderer.ts` — TUI rendering, especially `run_watch` live/result views.\n - `packages/coding-agent/src/utils/git.ts` — `gh`/`git` process wrappers, repo locking, branch config writes.\n - `packages/utils/src/dirs.ts` — base directory for dedicated PR worktrees.\n - `packages/coding-agent/src/sdk.ts` — session artifact allocation hook.\n - `packages/coding-agent/src/session/artifacts.ts` — artifact filename format `<id>.<toolType>.log`.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `op` | `\"repo_view\" \\| \"pr_create\" \\| \"pr_checkout\" \\| \"pr_push\" \\| \"search_issues\" \\| \"search_prs\" \\| \"search_code\" \\| \"search_commits\" \\| \"search_repos\" \\| \"run_watch\"` | Yes | Dispatch selector. `GithubTool.execute()` switches only on this field. |\n| `repo` | `string` | No | `owner/repo` override. Ignored when the identifier argument is already a full GitHub URL. For `search_issues`/`search_prs`/`search_code`/`search_commits`, defaults to the current checkout's `owner/repo` when omitted (skipped when the query already contains a `repo:`/`org:`/`user:`/`owner:` qualifier or when current-repo resolution fails). Required in practice when `gh` cannot infer repo context from the current checkout. |\n| `branch` | `string` | No | Used by `repo_view`, `pr_push`, and `run_watch`. `run_watch` falls back to current git branch when `run` is omitted; `pr_push` falls back to current branch. |\n| `pr` | `string \\| string[]` | No | Used by `pr_checkout`. Each item may be a PR number, branch name, or GitHub PR URL. Array form enables batching. Omitted means current branch PR. |\n| `force` | `boolean` | No | Used only by `pr_checkout`. Defaults to `false`; allows resetting an existing `pr-<number>` local branch to the PR head commit. |\n| `forceWithLease` | `boolean` | No | Used only by `pr_push`; passed through to git push. |\n| `title` | `string` | No | Used only by `pr_create`. Required unless `fill` is `true`. |\n| `body` | `string` | No | Used only by `pr_create`. Mutually exclusive with `fill`. Empty/omitted body becomes `--body \"\"` to suppress the interactive editor. Non-empty body is written to a temp file and passed as `--body-file`. |\n| `base` | `string` | No | Used only by `pr_create`; passed as `--base`. |\n| `head` | `string` | No | Used only by `pr_create`; passed as `--head`. |\n| `draft` | `boolean` | No | Used only by `pr_create`. Defaults to `false`. |\n| `fill` | `boolean` | No | Used only by `pr_create`. Defaults to `false`. Mutually exclusive with `title` and `body`. |\n| `reviewer` | `string[]` | No | Used only by `pr_create`; each entry becomes `--reviewer`. |\n| `assignee` | `string[]` | No | Used only by `pr_create`; each entry becomes `--assignee`. |\n| `label` | `string[]` | No | Used only by `pr_create`; each entry becomes `--label`. |\n| `query` | `string` | No | Used by all `search_*` ops. Required by local validation only for `search_code`; the other search ops compose it with optional date/repo/type qualifiers and send the result to GitHub. |\n| `since` | `string` | No | Lower date bound for `search_issues`, `search_prs`, `search_commits`, and `search_repos`. Accepts relative durations (`3d`, `12h`, `2w`, `2mo`, `1y`), `YYYY-MM-DD`, or an ISO datetime. Rejected for `search_code`. |\n| `until` | `string` | No | Upper date bound for `search_issues`, `search_prs`, `search_commits`, and `search_repos`. Same formats as `since`. Rejected for `search_code`. |\n| `dateField` | `\"created\" \\| \"updated\"` | No | Date qualifier field for issue/PR/repo search. Defaults to `created`; repo search maps `updated` to GitHub's `pushed:` qualifier. Ignored for commit search, which always uses `committer-date:`. |\n| `limit` | `number` | No | Used by all `search_*` ops. Defaults to `10`, floored, clamped to `50`, and must be `> 0`. |\n| `run` | `string` | No | Used only by `run_watch`. Must be a numeric run ID or full GitHub Actions run URL. |\n| `tail` | `number` | No | Used only by `run_watch`. Defaults to `15`, floored, clamped to `200`, and must be `> 0`. |\n\n## Outputs\nThe tool returns a single text result built by `buildTextResult()` in `packages/coding-agent/src/tools/gh.ts`.\n\n- `content`: one text block. Multi-item ops join sections with blank lines and `---` separators.\n- `sourceUrl`: set for single repo/PR/run results when a canonical URL is known.\n- `details`: optional structured metadata used by the TUI renderer.\n - Common fields: `artifactId`, `repo`, `branch`, `worktreePath`, `remote`, `remoteBranch`, `headSha`, `runId`, `runIds`, `status`, `conclusion`, `failedJobs`.\n - `pr_checkout` adds `checkouts: GhPrCheckoutSummary[]`.\n - `run_watch` adds `watch: GhRunWatchViewDetails`, which drives the custom live/result renderer in `packages/coding-agent/src/tools/gh-renderer.ts`.\n- Artifact trailer: when `artifactId` is present, the text body gets an appended line like `Full failed-job logs: artifact://<id>`.\n - `run_watch` allocates artifacts with `session.allocateOutputArtifact(\"github\")`; persistent sessions therefore save failed-log bodies as `<artifact-dir>/<id>.github.log`.\n\n`run_watch` is the only streaming op. It emits `onUpdate` snapshots while polling, then returns one final text result.\n\n## Flow\n1. `GithubTool.createIf()` exposes the tool only when `git.github.available()` finds `gh` on `PATH`.\n2. `GithubTool.execute()` wraps dispatch in `untilAborted()` and switches on `params.op`.\n3. Each op normalizes optional strings, arrays, booleans, and numeric caps locally in `packages/coding-agent/src/tools/gh.ts`.\n4. CLI execution goes through `git.github.run/json/text()` in `packages/coding-agent/src/utils/git.ts`:\n - spawns `gh ...` with `Bun.spawn()`;\n - trims stdout/stderr unless `trimOutput: false`;\n - maps common auth/repo-context failures into tool-facing `ToolError` messages;\n - `json()` rejects empty or invalid JSON.\n5. Read-style ops (`repo_view`, `search_*`) fetch JSON and format Markdown-like text summaries. Single-issue and single-PR views were moved out of the tool and now resolve through the `issue://` / `pr://` internal URL schemes, which share the same SQLite cache.\n6. PR diffs moved out of the tool. `pr://<N>/diff` lists changed files, `pr://<N>/diff/<i>` slices a single file, and `pr://<N>/diff/all` returns the full unified diff — see `docs/tools/read.md`. All three variants share one `gh pr diff` invocation through the `pr-diff` cache row.\n7. `pr_checkout` resolves PR metadata first, then enters `git.withRepoLock()` before any git mutation so parallel checkout calls for the same primary repo do not race on shared `.git` state.\n8. `pr_push` reads PR head metadata back from git branch config, derives a refspec, then pushes with `git.push()`.\n9. `pr_create` shells out once, then best-effort re-reads the created PR for a richer summary.\n10. `run_watch` chooses either run mode (`run` supplied) or commit mode (`run` omitted), polls GitHub Actions APIs every 3 seconds for the first minute and every 15 seconds after that, emits streaming updates, and may save a full failed-log artifact before returning.\n11. Final text goes through `toolResult().text(...)`; if `session.allocateOutputArtifact()` returns a slot, failed-log text is persisted with `Bun.write()`.\n\n## Modes / Variants\n\n### `repo_view`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` |\n| Optional fields | `repo`, `branch` |\n| `gh` command | `gh repo view [<repo>] [--branch <branch>] --json <GH_REPO_FIELDS>` |\n| Batching | None |\n| Output | `# <owner/repo>` header, description, URL, default branch, requested branch, visibility, permission, primary language, stars, forks, archive/fork flags, updated timestamp, homepage, topics. `sourceUrl = data.url`. |\n\nIf `repo` is omitted, `gh` repository resolution is used.\n\nSingle-issue and single-PR reads live in the `issue://<N>` / `pr://<N>` URL schemes (see `docs/tools/read.md`). They share `~/.omp/cache/github-cache.db` (override via `OMP_GITHUB_CACHE_DB`) and the `github.cache.softTtlSec` / `github.cache.hardTtlSec` / `github.cache.enabled` settings. The cache retains rendered Markdown plus the raw JSON payload returned by `gh`, including private bodies, comments, reviews, and review comments when comments are enabled; rows are scoped by the local GitHub credential fingerprint. Root and repo-scoped reads (`issue://`, `pr://owner/repo`) issue a live `gh issue list` / `gh pr list` for browsing; query params `state`, `limit`, `author`, `label` pass through to `gh` (`issue://` accepts `state=open|closed|all`; `pr://` also accepts `merged`). PR diffs ride the same cache under `pr://<N>/diff[/…]`: the listing, full diff, and per-file slices all share one `pr-diff` row keyed by repo and PR number.\n\n### `pr_create`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` plus either `fill=true` or `title` |\n| Optional fields | `repo`, `title`, `body`, `base`, `head`, `draft`, `fill`, `reviewer[]`, `assignee[]`, `label[]` |\n| `gh` command | `gh pr create ...` with flags assembled from provided fields |\n| Batching | None |\n| Output | `# Created Pull Request ...` summary with URL, state, draft flag, base/head, author, created time, labels, optional body. `sourceUrl` is the created PR URL. |\n\nBranches:\n- `fill && (title || body !== undefined)` throws.\n- Non-empty `body` is written under a temp dir `gh-pr-body-*` in `os.tmpdir()`, passed as `--body-file`, then removed in `finally`.\n- After creation, the tool parses the returned URL and best-effort runs `gh pr view <number> --repo <repo> --json <GH_PR_FIELDS_NO_COMMENTS>`; failures there are swallowed.\n\n### `pr_checkout`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` |\n| Optional fields | `repo`, `pr`, `force` |\n| `gh` command | For each requested PR: `gh pr view [<pr>] [--repo <repo>] --json <GH_PR_CHECKOUT_FIELDS>`; cross-repo PRs may also call `gh repo view <headRepository> --json <GH_REPO_CLONE_FIELDS>`. |\n| Batching | Yes. `pr` may be `string[]`; each PR is resolved in parallel, but git mutations are serialized per primary repo by `git.withRepoLock()`. |\n| Output | Single PR: checkout/worktree summary plus `details.repo`, `details.branch`, `details.worktreePath`, `details.remote`, `details.remoteBranch`, `details.checkouts`. Batched: `# <n> Pull Request Worktrees (...)` plus one section per PR and aggregated `details.checkouts`. |\n\nWorktree and metadata behavior:\n- Local branch name is always `pr-<number>`.\n- Worktree path is `path.join(getWorktreesDir(), encodeRepoPathForFilesystem(primaryRepoRoot), localBranch)`, where `getWorktreesDir()` is `~/.omp/wt`; effective path is `~/.omp/wt/<encoded-primary-repo-root>/pr-<number>`.\n- Existing worktree detection is by branch ref `refs/heads/pr-<number>` from `git.worktree.list()`.\n- New worktree creation calls `git.worktree.add(repoRoot, finalWorktreePath, localBranch, { signal })` after verifying the path is neither already registered nor already present on disk.\n- For same-repo PRs, remote is `origin`. For cross-repo PRs, the tool resolves a clone URL for the head repo, reuses an existing remote with the same URL when possible, or creates `fork-<owner>` / `fork-<owner>-<n>`.\n- The branch push metadata is persisted with `git config` under the repository's shared `.git/config` as:\n - `branch.pr-<number>.remote`\n - `branch.pr-<number>.merge`\n - `branch.pr-<number>.pushRemote`\n - `branch.pr-<number>.ompPrHeadRef`\n - `branch.pr-<number>.ompPrUrl`\n - `branch.pr-<number>.ompPrIsCrossRepository`\n - `branch.pr-<number>.ompPrMaintainerCanModify`\n- If `refs/heads/pr-<number>` already exists at a different commit, checkout fails unless `force=true`, in which case `git branch --force` resets it to the fetched PR head.\n- If a matching worktree already exists, the tool reuses it and reports `reused: true`.\n\n### `pr_push`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` |\n| Optional fields | `branch`, `forceWithLease` |\n| `gh` command | None. This path uses git, not `gh`. |\n| Batching | None |\n| Output | `# Pushed Pull Request Branch` summary with local branch, remote, remote branch, remote URL, PR URL, and force-with-lease flag. `sourceUrl = prUrl` when known. |\n\nPush target resolution reads the `branch.<name>.ompPrHeadRef`, `pushRemote`/`remote`, `ompPrUrl`, `ompPrMaintainerCanModify`, and `ompPrIsCrossRepository` git-config keys written by `pr_checkout`. If the current checked-out branch matches the target branch, the source ref is `HEAD`; otherwise it pushes `refs/heads/<branch>`. The refspec is `HEAD:refs/heads/<headRef>` or `refs/heads/<branch>:refs/heads/<headRef>`.\n\n### `search_issues`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` |\n| Optional fields | `repo`, `query`, `limit`, `since`, `until`, `dateField` |\n| `gh` command | `gh api -X GET /search/issues -f q=\"<query> [date qualifier] [repo:<repo>] is:issue\" -F per_page=<limit>` |\n| Batching | None |\n| Output | `# GitHub issues search`, echoed query, optional repo, result count, then one bullet per issue with repo/state/author/labels/timestamps/URL. |\n\n`repo` defaults to the current checkout's `owner/repo` via `resolveSearchRepoScope()` when omitted. The default is suppressed when the composed query already contains a leading `repo:`/`org:`/`user:`/`owner:` qualifier or when `gh repo view` fails to resolve the current checkout (e.g. outside a github remote).\n\n### `search_prs`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` |\n| Optional fields | `repo`, `query`, `limit`, `since`, `until`, `dateField` |\n| `gh` command | `gh api -X GET /search/issues -f q=\"<query> [date qualifier] [repo:<repo>] is:pr\" -F per_page=<limit>` |\n| Batching | None |\n| Output | Same shape as `search_issues`, labeled as pull requests. |\n\n`repo` defaults to the current checkout's `owner/repo` as in `search_issues`.\n\n### `search_code`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op`, `query` |\n| Optional fields | `repo`, `limit` |\n| `gh` command | `gh api -X GET /search/code -f q=\"<query> [repo:<repo>]\" -F per_page=<limit> -H \"Accept: application/vnd.github.text-match+json\"` |\n| Batching | None |\n| Output | `# GitHub code search`, result count, then one bullet per match with path, repo, short commit SHA, URL, and first normalized text-match fragment line when present. |\n\n`repo` defaults to the current checkout's `owner/repo` as in `search_issues`. `since` and `until` are explicitly rejected for this op because GitHub code search has no supported date qualifier.\n\n### `search_commits`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` |\n| Optional fields | `repo`, `query`, `limit`, `since`, `until`, `dateField` (accepted but ignored; commit searches use `committer-date`) |\n| `gh` command | `gh api -X GET /search/commits -f q=\"<query> [committer-date qualifier] [repo:<repo>]\" -F per_page=<limit>` |\n| Batching | None |\n| Output | `# GitHub commits search`, result count, then one bullet per commit: short SHA + first commit-message line, repo, author, date, URL. |\n\n`repo` defaults to the current checkout's `owner/repo` as in `search_issues`.\n\n### `search_repos`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` |\n| Optional fields | `query`, `limit`, `since`, `until`, `dateField` |\n| `gh` command | `gh api -X GET /search/repositories -f q=\"<query> [date qualifier]\" -F per_page=<limit>` |\n| Batching | None |\n| Output | `# GitHub repositories search`, result count, then one bullet per repo with first description line, language, stars, forks, open issues, visibility, archive/fork flags, updated time, URL. |\n\n`repo` is intentionally not used for this op. If `query`, `since`, and `until` are all omitted, the tool sends an empty GitHub repository-search query and the GitHub API may reject it.\n\n### `run_watch`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` |\n| Optional fields | `repo`, `branch`, `run`, `tail` |\n| `gh` command | Repo resolution: `gh repo view --json nameWithOwner -q .nameWithOwner` when `repo` and run URL repo are both absent. Single-run mode uses `gh api --method GET /repos/<repo>/actions/runs/<runId>` and `gh api --method GET /repos/<repo>/actions/runs/<runId>/jobs`. Commit mode uses `gh api --method GET /repos/<repo>/branches/<branch>`, `gh api --method GET /repos/<repo>/actions/runs`, `gh api --method GET /repos/<repo>/actions/runs/<runId>/jobs`, and `gh api /repos/<repo>/actions/jobs/<jobId>/logs` for failed jobs. |\n| Batching | Implicit batching only in commit mode: all workflow runs for one commit are tracked together. |\n| Output | Streaming watch snapshots via `onUpdate`, then a final text report. On failure, appends `Full failed-job logs: artifact://<id>` and sets `details.artifactId`. |\n\nWatch flow:\n- `run` parsing accepts either a decimal run ID or a full run URL. URL repo must match explicit `repo` when both are given.\n- Poll interval is `3` seconds (`RUN_WATCH_INTERVAL_DEFAULT`) for the first `60` seconds of the watch (`RUN_WATCH_FAST_WINDOW_MS`), then `15` seconds (`RUN_WATCH_INTERVAL_SLOW`). Rate-limited poll errors back off at the slow interval and are retried up to `5` consecutive failures (`RUN_WATCH_MAX_POLL_FAILURES`). Commit mode gives up with a clear message after `90` seconds if no runs ever appear (`RUN_WATCH_NO_RUNS_GIVE_UP_MS`).\n- Failure grace period is fixed at 5 seconds (`RUN_WATCH_GRACE_DEFAULT`). When any failed job appears before completion, the tool emits a note, waits once, re-fetches state, then collects logs so concurrent failures are included.\n- Failed-job logs are fetched with `gh api /repos/<repo>/actions/jobs/<jobId>/logs` via `git.github.run()`, not `json()`. Non-zero exit leaves `available: false` instead of failing the whole watch.\n- Inline result includes only the last `tail` lines per failed job. The saved artifact contains full logs (`mode: \"full\"`).\n- In commit mode, success is intentionally double-checked: once all known runs are successful, the tool waits one more poll interval and succeeds only if the set of run IDs is unchanged. This avoids returning before late workflow runs appear for the same commit.\n- `details.watch` drives a specialized renderer in `packages/coding-agent/src/tools/gh-renderer.ts`; non-watch results fall back to generic text rendering.\n\n## Side Effects\n- Filesystem\n - `pr_create` may create a temp dir under `os.tmpdir()` named `gh-pr-body-*`, write `body.md`, then remove the dir in `finally`.\n - `pr_checkout` may create directories under `~/.omp/wt/<encoded-primary-repo-root>/` and add git worktrees there.\n - `run_watch` may write a session artifact with full failed-job logs.\n- Network\n - Every op shells out to `gh`, which then talks to GitHub APIs except `pr_push`.\n - `pr_push` uses git network transport to the configured remote.\n- Subprocesses / native bindings\n - All `gh` calls use `Bun.spawn([\"gh\", ...args])`.\n - `pr_checkout` and `pr_push` also invoke git helpers from `packages/coding-agent/src/utils/git.ts`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - `run_watch` consumes `session.allocateOutputArtifact()` when failed-job logs are persisted.\n - Returned `details` objects carry run/checkouts metadata for the renderer/UI.\n- User-visible prompts / interactive UI\n - `gh` interactive editor fallback is suppressed for `pr_create` by forcing either `--body-file` or `--body \"\"`.\n - `gh-renderer` provides compact headers for all ops and a custom live watch view for `run_watch`.\n- Background work / cancellation\n - `run_watch` loops until success/failure and uses `scheduler.wait()` between polls.\n - `GithubTool.execute()` is wrapped in `untilAborted()`; `git.github.run()` forwards the abort signal into `Bun.spawn()`.\n\n## Limits & Caps\n- Search result default: `10` (`SEARCH_LIMIT_DEFAULT` in `packages/coding-agent/src/tools/gh.ts`).\n- Search result max: `50` (`SEARCH_LIMIT_MAX`).\n- PR file preview inside the `pr://` view: first `50` files only (`FILE_PREVIEW_LIMIT` in `gh.ts`).\n- Run-watch poll interval: `3s` for the first `60s`, then `15s` (`RUN_WATCH_INTERVAL_DEFAULT`, `RUN_WATCH_FAST_WINDOW_MS`, `RUN_WATCH_INTERVAL_SLOW`); commit mode with no runs gives up after `90s` (`RUN_WATCH_NO_RUNS_GIVE_UP_MS`); up to `5` consecutive rate-limited poll failures are tolerated (`RUN_WATCH_MAX_POLL_FAILURES`).\n- Run-watch failure grace period: `5s` (`RUN_WATCH_GRACE_DEFAULT`).\n- Run-watch failed-log tail default: `15` lines (`RUN_WATCH_TAIL_DEFAULT`).\n- Run-watch failed-log tail max: `200` lines (`RUN_WATCH_TAIL_MAX`).\n- PR review comments page size: `100` (`REVIEW_COMMENTS_PAGE_SIZE`).\n- Actions jobs page size: `100` (`RUN_JOBS_PAGE_SIZE`).\n- Search and tail numeric inputs are floored with `Math.floor()`, clamped to the max, and rejected when non-finite or `<= 0`.\n- `pr_checkout` batch fan-out is unbounded in tool code; all requested PRs are launched with `Promise.all()`.\n\n## Errors\n- Tool creation is skipped entirely when `gh` is not installed.\n- `git.github.run()` throws `ToolError(\"GitHub CLI (gh) is not installed...\")` if `gh` is missing at execution time.\n- `git.github.text/json()` map common failures to model-facing messages:\n - not authenticated → `GitHub CLI is not authenticated. Run \\`gh auth login\\`.`\n - missing repo context without explicit `repo` → `GitHub repository context is unavailable. Pass \\`repo\\` explicitly or run the tool inside a GitHub checkout.`\n - otherwise stderr/stdout text, or fallback `GitHub CLI command failed: gh ...`\n- `json()` also throws on empty stdout or invalid JSON.\n- Local validation errors throw `ToolError`, including:\n - missing required per-op fields (`query` for `search_code`, `title unless fill=true`)\n - invalid numeric `limit` / `tail`\n - invalid `since` / `until` date bound\n - invalid `run` format\n - `fill` combined with `title` or `body`\n - missing git repo / branch / HEAD context for checkout, push, or watch\n - `pr_push` on a branch without `ompPrHeadRef` metadata\n - conflicting existing worktree path or branch without `force`\n- `run_watch` treats failed-job log fetches specially: missing log content does not fail the watch; it marks that log `available: false` and prints `Log tail unavailable.` / `Full log unavailable.`.\n- `pr_create` swallows only the post-create best-effort `gh pr view` refresh; the create step itself still fails normally.\n\n## Notes\n- `appendRepoFlag()` intentionally skips `--repo` when the identifier argument is already a full GitHub URL; that lets `gh` derive repo/number from the URL.\n- `normalizePrIdentifierList()` accepts `reviewer`, `assignee`, and `label` arrays too; the helper name is broader than its callers.\n- `pr_push` depends on `pr_checkout` having run first for that local branch; there is no alternate metadata source.\n- `pr_checkout` stores push metadata in branch config, not in the worktree directory. Reusing the same `pr-<number>` branch reuses those config keys.\n- Worktree write serialization is keyed by the primary repo root, not the current worktree path, because git worktrees share `.git/config`, `packed-refs`, commit-graph, and worktree metadata files.\n- `search_repos` is the only search op that never forwards `repo`; repository scoping must be expressed in the query itself.\n- `run_watch` success on commit mode means “all observed runs succeeded and no additional runs appeared one poll later”, not merely “latest poll looked green”.\n- The TUI renderer collapses failed log previews unless the result view is expanded; the underlying text result still contains the same tailed lines plus any artifact reference.\n",