@oh-my-pi/pi-coding-agent 15.5.9 → 15.5.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -72,7 +72,7 @@ export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
72
72
  "tools/calc.md": "# calc\n\n> Evaluates one or more arithmetic expressions and returns formatted numeric results.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/calculator.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/calculator.md`\n- Key collaborators:\n - `packages/coding-agent/src/tui.ts` — status lines and tree-list rendering\n - `packages/coding-agent/src/tools/render-utils.ts` — preview limits and formatting helpers\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `calculations` | `Calculation[]` | Yes | Batch of expressions to evaluate in order. |\n\n### `Calculation`\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `expression` | `string` | Yes | Arithmetic expression string. |\n| `prefix` | `string` | Yes | Prepended verbatim to the rendered numeric result. |\n| `suffix` | `string` | Yes | Appended verbatim to the rendered numeric result. |\n\n## Outputs\n- Single-shot result.\n- `content[0].text` is the newline-joined `prefix + value + suffix` string for each calculation.\n- `details.results` is an array of `{ expression, value, output }`.\n- On renderer fallback, if `details` is missing but `content[0].text` exists, the TUI tries to pair each output line with the original expressions from call args.\n\n## Flow\n1. `execute()` wraps evaluation in `untilAborted(...)`.\n2. For each entry, `evaluateExpression(...)` tokenizes the expression, parses it with a recursive-descent parser, rejects non-finite outputs, and normalizes `-0` to `0`.\n3. `tokenizeExpression(...)` accepts whitespace, parentheses, operators, and number literals; any other character throws immediately.\n4. `ExpressionParser` applies precedence in this order: `+ -`, `* / %`, unary `+ -`, exponentiation `**`, parentheses/literals.\n5. Exponentiation is right-associative (`2 ** 3 ** 2` parses as `2 ** (3 ** 2)`).\n6. Each numeric result is formatted with `String(value)` and wrapped with the provided `prefix` and `suffix`.\n7. The tool returns text output plus structured `details`.\n\n## Side Effects\n- Background work / cancellation\n - Supports abort via `untilAborted(...)`.\n- Session state\n - None.\n- Filesystem / Network / Subprocesses\n - None.\n\n## Limits & Caps\n- Supported operators: `+`, `-`, `*`, `/`, `%`, `**` (`packages/coding-agent/src/tools/calculator.ts`).\n- Supported numeric literals:\n - decimal integers/floats, including leading-dot forms like `.5`\n - scientific notation like `1e10`, `2.5E-3`\n - hexadecimal `0x...`\n - binary `0b...`\n - octal `0o...`\n- Results must be finite; `Infinity` and `NaN` are rejected.\n- The renderer collapses long result lists using `PREVIEW_LIMITS.COLLAPSED_ITEMS` from `packages/coding-agent/src/tools/render-utils.ts`.\n\n## Errors\n- Invalid characters: e.g. `Invalid character \"x\" in expression`.\n- Malformed numbers: invalid prefixed literal, invalid exponent, invalid number.\n- Syntax errors: `Unexpected token in expression`, `Unexpected end of expression`, `Missing closing parenthesis`, `Expression is empty`.\n- Non-finite arithmetic: `Expression result is not a finite number`.\n- Any evaluation error aborts the whole batch; the tool does not return partial successes.\n\n## Notes\n- Despite the schema example showing `sqrt(16)`, the parser does not support functions, identifiers, units, or constants; only numeric literals, operators, and parentheses are accepted.\n- Precision is plain JavaScript `number` semantics throughout, including floating-point rounding behavior.\n- `/` and `%` use JavaScript numeric operators directly; there is no integer-only mode or unit handling.\n- Unary operators bind tighter than `*`/`/`/`%` but looser than exponentiation because unary parsing delegates to `#parsePower()`.\n",
73
73
  "tools/checkpoint.md": "# checkpoint\n\n> Mark the current top-level conversation state so later `rewind` can collapse exploratory context into a report.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/checkpoint.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/checkpoint.md`\n- Key collaborators:\n - `packages/coding-agent/src/session/agent-session.ts` — captures the active checkpoint after tool success.\n - `packages/coding-agent/src/session/session-manager.ts` — persists the normal session entry stream; not the active checkpoint marker.\n - `packages/coding-agent/src/tools/index.ts` — registers the tool and gates it behind `checkpoint.enabled`.\n - `packages/coding-agent/src/config/settings-schema.ts` — defines the disabled-by-default feature flag.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `goal` | `string` | Yes | Investigation goal. Required by the schema and echoed in the tool result. |\n\n## Outputs\nThe tool returns a single text result plus structured details:\n\n- text body:\n - `Checkpoint created.`\n - `Goal: <goal>`\n - `Run your investigation, then call rewind with a concise report.`\n- `details`:\n - `goal: string`\n - `startedAt: string` — ISO timestamp created inside `CheckpointTool.execute()`\n\nNo checkpoint ID, artifact URI, job handle, file path, or restore token is returned.\n\n## Flow\n1. `CheckpointTool.createIf()` in `packages/coding-agent/src/tools/checkpoint.ts` returns `null` for subagents by checking `session.taskDepth`; only top-level sessions can see the tool.\n2. `CheckpointTool.execute()` rejects subagent calls again with `ToolError(\"Checkpoint not available in subagents.\")`.\n3. It rejects nested checkpoints with `ToolError(\"Checkpoint already active.\")` when `session.getCheckpointState?.()` is already set.\n4. It creates `startedAt = new Date().toISOString()` and returns a normal `toolResult()` payload. The tool itself does not persist anything.\n5. On the later `tool_execution_end` event, `AgentSession` in `packages/coding-agent/src/session/agent-session.ts` detects successful `checkpoint` execution and captures three in-memory fields:\n - `checkpointMessageCount` — current `agent.state.messages.length`, after the checkpoint tool result has already been appended\n - `checkpointEntryId` — `sessionManager.getEntries().at(-1)?.id ?? null`, i.e. the last persisted session entry ID at checkpoint time\n - `startedAt` — copied from tool details or regenerated\n6. `AgentSession` stores that object in its private `#checkpointState` field and clears `#pendingRewindReport`.\n\n## Side Effects\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Sets `AgentSession.#checkpointState` in memory.\n - Records the checkpoint boundary as a message count plus a session entry ID.\n - Enables the later yield guard: if a checkpoint is active and no rewind report is pending, `#enforceRewindBeforeYield()` injects a developer-role warning and schedules another turn.\n- User-visible prompts / interactive UI\n - The tool result tells the model to call `rewind` after the investigation.\n - If the agent tries to `yield` first, `AgentSession` injects:\n\n```text\n<system-warning>\nYou are in an active checkpoint. You MUST call rewind with your investigation findings before yielding. Do NOT yield without completing the checkpoint.\n</system-warning>\n```\n\n## Limits & Caps\n- Availability is gated by `checkpoint.enabled`, default `false`, in `packages/coding-agent/src/config/settings-schema.ts`.\n- The tool is registered as discoverable in `packages/coding-agent/src/tools/index.ts`.\n- Only one active checkpoint is allowed per top-level session.\n- Checkpoint state is not persisted as a dedicated session entry. If the process exits, a resumed session can reload the conversation history, but not the live `#checkpointState` guard.\n- Session persistence still applies to the ordinary checkpoint tool call message. Global session persistence truncation is `MAX_PERSIST_CHARS = 500_000` in `packages/coding-agent/src/session/session-manager.ts`.\n\n## Errors\n- `ToolError(\"Checkpoint not available in subagents.\")` — thrown for subagent sessions.\n- `ToolError(\"Checkpoint already active.\")` — thrown when a prior checkpoint has not been rewound or cleared.\n- The tool body has no local `try/catch`; unexpected exceptions propagate.\n\n## Notes\n- Despite the summary string `Create a git-based checkpoint to save and restore session state`, the implementation does not call git and does not snapshot filesystem state.\n- Captured state is conversation/session metadata only:\n - in-memory message count\n - session entry ID in the session tree\n - timestamp\n- Not captured:\n - working tree contents\n - staged changes\n - artifacts\n - blob-store contents\n - SQLite history rows from `packages/coding-agent/src/session/history-storage.ts`\n - auth or agent records from `packages/coding-agent/src/session/agent-storage.ts`\n- If the turn ends with `stopReason === \"aborted\"` while a checkpoint is active, `AgentSession` clears `#checkpointState` and `#pendingRewindReport` instead of preserving a half-finished checkpoint.\n",
74
74
  "tools/debug.md": "# debug\n\n> Drive one DAP debug session; adjacent debug UI code reuses the same subsystem for logs, raw SSE capture, reports, profiling, and system diagnostics.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/debug.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/debug.md`\n- Key collaborators:\n - `packages/coding-agent/src/dap/session.ts` — session lifecycle, breakpoint/state cache\n - `packages/coding-agent/src/dap/client.ts` — adapter process/socket transport, DAP message loop\n - `packages/coding-agent/src/dap/config.ts` — adapter resolution and auto-selection\n - `packages/coding-agent/src/dap/defaults.json` — built-in adapter definitions\n - `packages/coding-agent/src/dap/types.ts` — request/response/capability shapes\n - `packages/coding-agent/src/tools/tool-timeouts.ts` — per-tool timeout clamp\n - `packages/coding-agent/src/debug/index.ts` — interactive debug selector menu\n - `packages/coding-agent/src/debug/log-viewer.ts` — recent-log TUI viewer\n - `packages/coding-agent/src/debug/raw-sse.ts` — raw SSE TUI viewer\n - `packages/coding-agent/src/debug/raw-sse-buffer.ts` — bounded SSE capture buffer\n - `packages/coding-agent/src/debug/profiler.ts` — CPU/heap profiling helpers\n - `packages/coding-agent/src/debug/report-bundle.ts` — `.tar.gz` report bundling, log source, cache cleanup\n - `packages/coding-agent/src/debug/system-info.ts` — system snapshot collection and env redaction\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `action` | `\"launch\" \\| \"attach\" \\| \"set_breakpoint\" \\| \"remove_breakpoint\" \\| \"set_instruction_breakpoint\" \\| \"remove_instruction_breakpoint\" \\| \"data_breakpoint_info\" \\| \"set_data_breakpoint\" \\| \"remove_data_breakpoint\" \\| \"continue\" \\| \"step_over\" \\| \"step_in\" \\| \"step_out\" \\| \"pause\" \\| \"evaluate\" \\| \"stack_trace\" \\| \"threads\" \\| \"scopes\" \\| \"variables\" \\| \"disassemble\" \\| \"read_memory\" \\| \"write_memory\" \\| \"modules\" \\| \"loaded_sources\" \\| \"custom_request\" \\| \"output\" \\| \"terminate\" \\| \"sessions\"` | Yes | Dispatch key for the tool switch in `packages/coding-agent/src/tools/debug.ts`. |\n| `program` | `string` | No | Launch target path. Required for `launch`. Resolved relative to `cwd` if provided, otherwise session cwd. |\n| `args` | `string[]` | No | Program argv for `launch`. |\n| `adapter` | `string` | No | Explicit adapter name. Otherwise `selectLaunchAdapter()` / `selectAttachAdapter()` auto-pick from `packages/coding-agent/src/dap/config.ts`. |\n| `cwd` | `string` | No | Launch/attach working directory. Defaults to session cwd. |\n| `file` | `string` | No | Source file path for source breakpoints. |\n| `line` | `number` | No | Source line for source breakpoints. |\n| `function` | `string` | No | Function breakpoint name. Mutually exclusive with `file`+`line` in breakpoint actions. |\n| `name` | `string` | No | Data breakpoint info target name. Required for `data_breakpoint_info`. |\n| `condition` | `string` | No | Conditional expression for source/function/instruction/data breakpoints. |\n| `hit_condition` | `string` | No | Hit-count condition for instruction/data breakpoints. |\n| `expression` | `string` | No | Expression or raw debugger command. Required for `evaluate`. |\n| `context` | `string` | No | Evaluate context. Defaults to `\"repl\"`. Passed through as DAP evaluate context. |\n| `frame_id` | `number` | No | Frame selector for `evaluate`, `scopes`, `data_breakpoint_info`. `scopes` and `evaluate` default to the current stopped frame when omitted. |\n| `scope_id` | `number` | No | Variables reference from a scope. Accepted by `variables`; also used as a fallback variables reference for `data_breakpoint_info`. |\n| `variable_ref` | `number` | No | Variables reference for `variables`; preferred over `scope_id` when both are present. |\n| `pid` | `number` | No | Local process id for `attach`. `attach` requires `pid` or `port`. |\n| `port` | `number` | No | Remote attach port. If no adapter is forced, attach prefers `debugpy` when `port` is present. |\n| `host` | `string` | No | Remote attach host for `attach`. |\n| `levels` | `number` | No | Max stack frames for `stack_trace`. |\n| `memory_reference` | `string` | No | Memory reference/address for `disassemble`, `read_memory`, `write_memory`. `disassemble` also accepts it via `instruction_reference` fallback logic in `resolveDisassemblyReference()`. |\n| `instruction_reference` | `string` | No | Instruction breakpoint reference; required for instruction breakpoint actions. |\n| `instruction_count` | `number` | No | Required for `disassemble`. |\n| `instruction_offset` | `number` | No | Instruction offset for `disassemble`. |\n| `count` | `number` | No | Byte count for `read_memory`. Required there. |\n| `data` | `string` | No | Base64 payload for `write_memory`. Required there. |\n| `data_id` | `string` | No | Data breakpoint id. Required for `set_data_breakpoint` / `remove_data_breakpoint`. |\n| `access_type` | `\"read\" \\| \"write\" \\| \"readWrite\"` | No | Access filter for `set_data_breakpoint`. |\n| `command` | `string` | No | Custom DAP request command. Required for `custom_request`. |\n| `arguments` | `Record<string, unknown>` | No | Custom DAP request body for `custom_request`. |\n| `offset` | `number` | No | Offset for instruction breakpoints, disassembly, memory read, memory write. |\n| `resolve_symbols` | `boolean` | No | `disassemble` symbol-resolution flag. |\n| `allow_partial` | `boolean` | No | `write_memory` partial-write allowance. |\n| `start_module` | `number` | No | Modules pagination start index for `modules`. |\n| `module_count` | `number` | No | Modules pagination count for `modules`. |\n| `timeout` | `number` | No | Per-request timeout in seconds. Default `30`, clamped to `5..300`. |\n\n### Action-specific requirements\n- `launch`: `program`\n- `attach`: `pid` or `port`\n- `set_breakpoint` / `remove_breakpoint`: `function`, or `file` + `line`\n- `set_instruction_breakpoint` / `remove_instruction_breakpoint`: `instruction_reference`\n- `data_breakpoint_info`: `name`\n- `set_data_breakpoint` / `remove_data_breakpoint`: `data_id`\n- `evaluate`: `expression`\n- `variables`: `variable_ref` or `scope_id`\n- `disassemble`: capability `supportsDisassembleRequest`, plus `instruction_count`\n- `read_memory`: capability `supportsReadMemoryRequest`, plus `memory_reference` and `count`\n- `write_memory`: capability `supportsWriteMemoryRequest`, plus `memory_reference` and `data`\n- `modules`: capability `supportsModulesRequest`\n- `loaded_sources`: capability `supportsLoadedSourcesRequest`\n- `custom_request`: `command`\n\n### Interactive selector values\n`packages/coding-agent/src/debug/index.ts` also exposes a fixed UI-only selector with values `open-artifacts`, `performance`, `work`, `dump`, `memory`, `logs`, `system`, `raw-sse`, `transcript`, `clear-cache`. These are not model-callable through `debugSchema`; they are local TUI menu routes.\n\n## Outputs\nThe agent tool returns a standard `toolResult()` payload from `packages/coding-agent/src/tools/debug.ts`:\n- `content`: one text block. Every action renders human-readable text; there is no structured JSON block in `content`.\n- `details.action`: echoed action.\n- `details.success`: always initialized `true`; failures surface by throwing before a result is returned.\n- `details.snapshot`: present for actions that operate on or create a session, using `DapSessionSummary` from `packages/coding-agent/src/dap/types.ts`.\n- Action-specific `details` fields:\n - `launch` / `attach`: `adapter`\n - breakpoint actions: `breakpoints`, `functionBreakpoints`, `instructionBreakpoints`, `dataBreakpoints`\n - `data_breakpoint_info`: `dataBreakpointInfo`\n - `continue` / `step_*`: `state`, `timedOut`\n - `threads`: `threads`\n - `stack_trace`: `stackFrames`\n - `scopes`: `scopes`\n - `variables`: `variables`\n - `evaluate`: `evaluation`\n - `disassemble`: `disassembly`\n - `read_memory`: `memoryAddress`, `memoryData`, `unreadableBytes`\n - `write_memory`: `bytesWritten`\n - `modules`: `modules`\n - `loaded_sources`: `sources`\n - `custom_request`: `customBody`\n - `output`: `output`\n - `sessions`: `sessions`\n\nStreaming/UI behavior:\n- The tool renderer merges call and result (`mergeCallAndResult: true`) and renders inline.\n- `debug.ts` itself does not emit progress updates through `_onUpdate`; result delivery is single-shot.\n- The interactive selector is UI-driven instead of model-driven. It swaps TUI components, appends status lines to the chat pane, opens files in external viewers, or writes archives/temp files.\n\nSide-channel artifacts outside the model tool result:\n- `createReportBundle()` writes `omp-report-<timestamp>.tar.gz` under the reports dir and returns the filesystem path to the UI handler.\n- `#handleWorkReport()` writes `/tmp/work-profile-<Date.now()>.svg` before opening it.\n- `RawSseViewerComponent` and `DebugLogViewerComponent` can copy captured text to the clipboard.\n\n## Flow\n1. Tool registration is conditional: `DebugTool.createIf()` in `packages/coding-agent/src/tools/debug.ts` returns `null` unless `session.settings.get(\"debug.enabled\")` is true. `packages/coding-agent/src/tools/index.ts` wires the factory and rechecks the same setting in tool filtering.\n2. `DebugTool.execute()` clamps `params.timeout` through `clampTimeout(\"debug\", params.timeout)` and composes the caller `AbortSignal` with `AbortSignal.timeout(...)`.\n3. `launch` and `attach` resolve cwd/program paths, select an adapter in `packages/coding-agent/src/dap/config.ts`, then delegate to `dapSessionManager.launch()` / `.attach()`.\n4. `DapSessionManager.launch()` / `.attach()` enforce the single-session rule with `#ensureLaunchSlot()`, spawn the adapter through `DapClient.spawn()`, register listeners, send `initialize`, cache capabilities, start listening for an initial stop event before sending `launch`/`attach`, then complete the `initialized` → `configurationDone` handshake in `#completeConfigurationHandshake()`.\n5. `DapClient.spawn()` starts the adapter detached with `NON_INTERACTIVE_ENV`. Most adapters use stdio; socket-mode adapters (`dlv`) use `#spawnSocketUnix()` on Linux or `#spawnSocketClientAddr()` on macOS/other.\n6. `#registerSession()` in `packages/coding-agent/src/dap/session.ts` installs reverse-request handlers:\n - `runInTerminal`: spawns the requested debuggee command detached via `ptree.spawn()` and returns `{ processId }`\n - `startDebugging`: logs the child-session request and returns `{}`; it does not create nested sessions\n - events: `output`, `initialized`, `stopped`, `continued`, `exited`, `terminated` update cached session state\n7. Operational actions (`set_breakpoint`, `evaluate`, `threads`, `read_memory`, `custom_request`, and similar) call `dapSessionManager` methods. Most flow through `#sendRequestWithConfig()`, which first sends `configurationDone` when required, then sends the DAP request, then updates `lastUsedAt`.\n8. Breakpoint actions maintain local cached breakpoint sets in `DapSessionManager` and remap adapter responses back onto those cached records.\n9. `continue` and the three step actions clear cached stop state, subscribe for `stopped`/`terminated`/`exited` before sending the DAP request, then `#awaitStopOutcome()` either returns the new stopped location or reports that the program is still running after timeout.\n10. `pause` sends DAP `pause`, waits for a stopped event if needed, and reuses cached stop state if the program was already stopped.\n11. `stack_trace`, `scopes`, `variables`, and `evaluate` default to the current stopped thread/frame when the caller omits ids and cached state is available.\n12. `output` reads the in-memory output ring from `DapSessionManager.getOutput()`. `terminate` sends `terminate` when supported, always attempts `disconnect`, marks the session terminated, and disposes the client.\n13. `sessions` reads the manager’s current map and formats all summaries. Although the manager stores a map, only one active session can exist because new launch/attach calls are blocked until the active one is terminated or cleaned up.\n14. The interactive selector in `packages/coding-agent/src/debug/index.ts` builds a `SelectList` of fixed values and dispatches each to a handler:\n - `performance`: `startCpuProfile()`, wait for Enter/Escape, stop profiling, read a 30-second work profile with `getWorkProfile(30)`, then bundle via `createReportBundle()`\n - `work`: read `getWorkProfile(30)`, write a temp SVG, open it externally\n - `dump`: create a report bundle immediately\n - `memory`: force GC, call `Bun.generateHeapSnapshot(\"v8\")`, then bundle\n - `logs`: build a `DebugLogSource` and mount `DebugLogViewerComponent`\n - `raw-sse`: resolve a `RawSseDebugBuffer` from the session and mount `RawSseViewerComponent`\n - `system`: call `collectSystemInfo()` and render `formatSystemInfo()` into the chat pane\n - `open-artifacts`: open the current session artifact directory if it exists\n - `transcript`: delegates to `ctx.handleDebugTranscriptCommand()`\n - `clear-cache`: show confirmation, then remove artifact directories older than 30 days with `clearArtifactCache()`\n\n## Modes / Variants\n- **Availability gate**\n - Tool hidden when `debug.enabled` is false.\n- **Adapter selection**\n - `launch`: explicit `adapter` wins; otherwise `selectLaunchAdapter()` ranks available adapters by extension match, root-marker match, then native-debugger preference (`gdb`, `lldb-dap`) for extensionless binaries.\n - `attach`: explicit `adapter` wins; otherwise remote `port` prefers `debugpy`, then native debuggers, then first available adapter.\n- **Transport**\n - stdio adapters: direct `stdin`/`stdout` framing.\n - socket adapters: Unix domain socket on Linux; TCP callback on macOS/other.\n- **DAP agent-tool actions**\n - `launch` — spawn adapter, initialize session, maybe stop on entry; returns formatted session snapshot and `details.adapter`.\n - `attach` — connect to a live process or remote port; same output shape as `launch`.\n - `set_breakpoint` — source or function breakpoint add/update; returns the current breakpoint list for that target.\n - `remove_breakpoint` — source or function breakpoint removal; returns the remaining breakpoint list.\n - `set_instruction_breakpoint` / `remove_instruction_breakpoint` — require `supportsInstructionBreakpoints`; return current instruction breakpoint list.\n - `data_breakpoint_info` — require `supportsDataBreakpoints`; asks the adapter for a `dataId`, access types, and description for `name`.\n - `set_data_breakpoint` / `remove_data_breakpoint` — require `supportsDataBreakpoints`; return the cached data-breakpoint list.\n - `continue` / `step_over` / `step_in` / `step_out` — return text describing whether execution stopped, terminated, or kept running, plus `details.state` and `details.timedOut`.\n - `pause` — interrupts a running target and returns a stopped snapshot.\n - `evaluate` — adapter expression evaluation; defaults context to `repl`.\n - `stack_trace` — fetches frames for the resolved thread.\n - `threads` — fetches current threads.\n - `scopes` — frame scopes for an explicit `frame_id` or the current stopped frame.\n - `variables` — variables for `variable_ref` or `scope_id`.\n - `disassemble` — require `supportsDisassembleRequest`; disassembles around a memory reference.\n - `read_memory` — require `supportsReadMemoryRequest`; returns address, base64 data, unreadable-byte count.\n - `write_memory` — require `supportsWriteMemoryRequest`; writes base64 data and reports bytes written.\n - `modules` — require `supportsModulesRequest`; optional pagination via `start_module` / `module_count`.\n - `loaded_sources` — require `supportsLoadedSourcesRequest`; returns loaded source descriptors.\n - `custom_request` — sends any DAP request name with arbitrary arguments.\n - `output` — dumps captured stdout/stderr/console text from the session cache.\n - `terminate` — disconnects and disposes the active session; returns `No debug session to terminate.` when none exists.\n - `sessions` — lists all cached session summaries.\n- **Interactive selector routes (UI-only)**\n - `logs` — loads today’s log tail and optional older daily log files into `DebugLogViewerComponent`; supports copy, range selection, pid filtering, load-older.\n - `raw-sse` — live view over the session’s `RawSseDebugBuffer`; supports tail-follow, scrolling, copy-all.\n - `performance` — CPU profile + 30-second work profile + report bundle.\n - `memory` — heap snapshot + report bundle.\n - `dump` — report bundle without profiler artifacts.\n - `work` — standalone work-profile flamegraph export/open.\n - `system` — formatted OS/arch/CPU/memory/version/cwd/shell/terminal dump.\n - `open-artifacts` / `transcript` / `clear-cache` — artifact directory open, transcript export, artifact-cache pruning.\n\n## Side Effects\n- Filesystem\n - Resolves program/file/cwd paths against the session cwd.\n - Report creation writes `.tar.gz` bundles and may read the session JSONL, artifact files, subagent session JSONLs, and log files.\n - Work-profile export writes `/tmp/work-profile-<timestamp>.svg`.\n - Log source reads daily log files from the logs dir.\n - Artifact-cache cleanup removes session artifact directories older than the cutoff.\n - `resolveRawSseDebugBuffer()` may attach a non-enumerable `rawSseDebugBuffer` property to the owner object.\n- Network\n - Socket-mode adapters bind/connect local sockets.\n - Remote attach may connect through the adapter to a remote debug port.\n- Subprocesses / native bindings\n - Spawns debugger adapters (`gdb`, `lldb-dap`, `python -m debugpy.adapter`, `dlv`, and others from `defaults.json`) detached.\n - Reverse DAP `runInTerminal` requests spawn the debuggee detached via `ptree.spawn()`.\n - `getWorkProfile(30)` comes from `@oh-my-pi/pi-natives`.\n - CPU profiling uses `node:inspector/promises`; heap snapshots use `Bun.generateHeapSnapshot(\"v8\")`; raw/log viewers sanitize text via `@oh-my-pi/pi-natives`.\n - `openPath()` launches the OS default file/browser handler for artifact dirs and SVGs.\n - Log/raw-SSE viewers can call `copyToClipboard()`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - `DapSessionManager` keeps session summaries, breakpoints, threads, stack frames, stop location, output capture, capabilities, and last-used timestamps in memory.\n - Active-session id is global to the singleton `dapSessionManager`.\n - `RawSseDebugBuffer` stores recent SSE events per owner/session.\n - The tool is `exclusive`; concurrent debug tool calls are blocked by the scheduler.\n- User-visible prompts / interactive UI\n - Debug selector shows confirmation before cache deletion.\n - Performance profiling temporarily hijacks editor Enter/Escape handlers until profiling stops.\n - Log/raw-SSE viewers replace the editor pane with custom components.\n- Background work / cancellation\n - Every DAP request accepts an `AbortSignal`; timeouts and caller cancellation abort the active request, not the whole session lifetime.\n - `DapSessionManager` runs a background cleanup loop every 30 seconds.\n - Raw SSE viewers subscribe to buffer updates until closed.\n\n## Limits & Caps\n- Tool timeout clamp: `default=30`, `min=5`, `max=300` in `packages/coding-agent/src/tools/tool-timeouts.ts`.\n- Per-request DAP default timeout: `DEFAULT_REQUEST_TIMEOUT_MS = 30_000` in `packages/coding-agent/src/dap/client.ts`.\n- Single active session: enforced by `#ensureLaunchSlot()` in `packages/coding-agent/src/dap/session.ts`.\n- Idle session cleanup: `IDLE_TIMEOUT_MS = 10 * 60 * 1000`, checked every `CLEANUP_INTERVAL_MS = 30 * 1000`.\n- Adapter liveness heartbeat: `HEARTBEAT_INTERVAL_MS = 5 * 1000`.\n- Output capture cap: `MAX_OUTPUT_BYTES = 128 * 1024`; older text is trimmed in ~1 KiB slices and `outputTruncated` is recorded.\n- Initial stop capture timeout after launch/attach: `STOP_CAPTURE_TIMEOUT_MS = 5_000`.\n- Socket-mode adapter readiness timeout: `10_000` ms in `waitForCondition()` and TCP connect timeout logic in `packages/coding-agent/src/dap/client.ts`.\n- Raw SSE buffer caps in `packages/coding-agent/src/debug/raw-sse-buffer.ts`:\n - `MAX_RAW_SSE_EVENTS = 1_000`\n - `MAX_RAW_SSE_CHARS = 512_000`\n - `MAX_RAW_SSE_EVENT_CHARS = 64_000` per event, with `: omp-debug-truncated ...` marker appended on trim\n- Log viewer window in `packages/coding-agent/src/debug/log-viewer.ts`:\n - `INITIAL_LOG_CHUNK = 50`\n - `LOAD_OLDER_CHUNK = 50`\n- Report/log ingestion caps in `packages/coding-agent/src/debug/report-bundle.ts`:\n - `MAX_LOG_LINES = 5000` for interactive log reading\n - `MAX_LOG_BYTES = 2 * 1024 * 1024` tail-read ceiling\n - report bundles include only the last `1000` log lines\n - subagent session inclusion is capped at the most recent `10` JSONL files\n- Interactive profiling windows in `packages/coding-agent/src/debug/index.ts`: both performance and work reports request `getWorkProfile(30)`.\n- Artifact cache pruning default: `30` days in `clearArtifactCache()` and the selector confirmation text.\n\n## Errors\n- Parameter validation in `packages/coding-agent/src/tools/debug.ts` throws `ToolError` with explicit messages such as:\n - `program is required for launch`\n - `attach requires pid or port`\n - `set_breakpoint requires file+line or function`\n - `variables requires variable_ref or scope_id`\n - `memory_reference is required for read_memory`\n - `count is required for read_memory`\n - `data is required for write_memory`\n - `command is required for custom_request`\n- Adapter selection failure throws `No debugger adapter available. Installed adapters: ...`.\n- Capability-gated actions throw from `requireCapability(...)`, e.g. `Active adapter does not support memory reads.`\n- No-session and state errors come from `DapSessionManager`, e.g. `No active debug session. Launch or attach first.`, `No active stack frame. Run stack_trace first or supply frame_id.`, `Debugger reported no threads.`\n- Launching a second live session throws `Debug session <id> is still active. Terminate it before launching another.`\n- DAP transport/request failures surface as thrown errors from `DapClient`:\n - `DAP request <command> timed out after <ms>ms`\n - `DAP event <event> timed out after <ms>ms`\n - `DAP adapter <name> is not running`\n - `DAP adapter exited (code N): <stderr>` or `DAP adapter exited unexpectedly (code N)`\n - adapter response `message` when a DAP request fails\n- `continue` / `step_*` are intentionally non-fatal when the target stays running past the timeout: they return `details.timedOut = true` and `state: \"running\"` instead of throwing.\n- `terminate` suppresses adapter errors while sending `terminate`/`disconnect`; it still disposes the client and returns the last summary when possible.\n- Interactive selector handlers report UI errors instead of throwing:\n - profiler start/stop, report bundling, log reading, system-info collection, cache clearing, and artifact opening use `ctx.showError(...)` / `ctx.showWarning(...)`\n - empty logs and empty artifact caches are warnings/status messages, not failures\n - copy failures in log/raw-SSE viewers become status/error text in the UI\n- Report-bundle helpers are intentionally best-effort for many file reads: missing session files, missing artifact dirs, unreadable artifact files, missing log dirs, inaccessible cache dirs, and missing subagent files are skipped silently.\n- `collectSystemInfo()` is best-effort for CPU probing; failure there falls back to `Unknown CPU`.\n\n## Notes\n- `packages/coding-agent/src/prompts/tools/debug.md` tells the model only one active session is supported; that is not advisory, it is enforced in code.\n- `configurationDone` is sent automatically both during launch/attach handshake and lazily before later requests if the adapter required it and the initial handshake did not complete.\n- `startDebugging` reverse requests are acknowledged but not implemented; child debug sessions are not spawned.\n- `output` exposes the merged `output` event stream only; the tool does not distinguish stdout, stderr, and console categories.\n- Session summaries expose `needsConfigurationDone`; this is derived from adapter capabilities and whether `configurationDone` has been sent.\n- Source breakpoint file paths are normalized with `path.resolve()` before caching and sending to the adapter.\n- `evaluate` defaults to `repl`, so the tool can forward raw debugger commands when the adapter supports them.\n- `disassemble` resolves its target from `memory_reference` first, then `instruction_reference`; it throws if neither is present.\n- `RawSseDebugBuffer.recordEvent()` increments `totalEvents` before bounded retention. A snapshot can therefore show fewer retained records than total observed events.\n- Raw SSE buffer listener failures are swallowed so viewer bugs do not break capture.\n- `createDebugLogSource()` walks daily log files newest-first, but `loadOlderLogs()` reverses each requested slice before concatenation so older chunks prepend in chronological order.\n- `clearArtifactCache()` deletes directories by directory mtime, not per-file age.\n- `addDirectoryToArchive()` reads artifact files as text with `Bun.file(...).text()`. Binary artifact contents are not preserved byte-for-byte in the report bundle.\n- The tool renderer truncates displayed output for the TUI preview, but the underlying text result still contains the full returned string.\n",
75
- "tools/edit.md": "# edit\n\n> Applies source edits; default mode is the hashline patch language consumed from a single `input` string.\n\n## Source\n- Entry: `packages/coding-agent/src/edit/index.ts`\n- Model-facing prompt: `packages/hashline/src/prompt.md`\n- Key collaborators:\n - `packages/coding-agent/src/utils/edit-mode.ts` — selects active edit mode\n - `packages/hashline/src/grammar.lark` — hashline grammar\n - `packages/hashline/src/format.ts` — sigils and header constants (`¶`, `#`, `@@`, `+`, `&`, `,`)\n - `packages/hashline/src/input.ts` — parses `¶PATH#TAG` sections\n - `packages/hashline/src/tokenizer.ts` / `packages/hashline/src/parser.ts` — tokenizes and parses ops\n - `packages/hashline/src/apply.ts` — applies parsed edits to file text\n - `packages/hashline/src/mismatch.ts` — stale-anchor mismatch formatting\n - `packages/hashline/src/recovery.ts` — snapshot-based stale-anchor recovery\n - `packages/hashline/src/snapshots.ts` — mints and resolves per-path opaque snapshot tags\n\n## Inputs\n\n### Hashline mode (default)\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `input` | `string` | Yes | One or more file sections. Anchored sections start with `¶PATH#TAG`; hashless `¶PATH` is allowed only for new-file creation or BOF/EOF-only inserts. Optional `*** Begin Patch` / `*** End Patch` envelope is ignored if present. |\n\nPatch language inside `input`:\n\n- **File header**: `¶PATH#TAG` (or `¶PATH` for new-file / virtual-only hunks). `TAG` is three uppercase-hex chars minted by the session snapshot store.\n- **Hunk header**: bare `A B` selects original lines A..B. Two numbers are REQUIRED — single-line ranges are written `A A` (`5 5`), not `5`. The range separator is normally whitespace; the parser also silently accepts `A-B`, `A..B`, and `A…B` (unicode ellipsis). Virtual variants `BOF` and `EOF` target positions before line 1 / after the last line.\n- **Body rows** (one per line, immediately under the hunk header):\n - `+TEXT` — add the literal line `TEXT` verbatim, including all leading whitespace.\n - `+` alone — add one blank line.\n - `&A..B` — re-emit original file lines A..B. Use this to keep some of the lines you selected. `&A` is accepted as `&A..A`.\n- **Semantics**:\n - The new content of the selected range is just the body rows top-to-bottom.\n - **Empty body deletes the range entirely.**\n - `BOF` / `EOF` with empty body is a no-op (nothing to insert).\n\nAnchors come from `read`/`search` output. `read` emits a `¶PATH#TAG` header from the session snapshot store and lines as `LINE:TEXT`; copy the header into the edit section and copy only the line number into hunk headers.\n\n### Tolerated input shapes (lenient parsing)\n\nBecause models reproduce nearby shapes (`read` output, `apply_patch` envelopes, unified-diff hunks), the parser is liberal about a handful of harmless variants:\n\n- `A` (bare single number) — REJECTED. The parser throws `single-number hunk header \"A\" is no longer accepted`. Spell single-line ranges as `A A`.\n- `A-B`, `A..B`, `A…B` — accepted as `A B` (any of hyphen, double-dot, or unicode ellipsis works as a silent separator).\n- `&A` — accepted as `&A..A`.\n- Bare body rows with no `+`/`&` prefix are auto-prepended with `+` and a `BARE_BODY_AUTO_PIPED_WARNING` is appended, BUT only when every row in that block is uniformly bare. Mixed `+`/raw blocks still throw.\n- `+&A..B` rows (model mistakenly prefixed a repeat with `+`) are silently rerouted as `&A..B` repeats with `PLUS_PREFIXED_REPEAT_WARNING`.\n- Identical-range hunks in the same patch are coalesced last-wins with `REPLACE_PAIR_COALESCED_WARNING`.\n- An overlapping bare hunk followed by a concrete hunk is treated as a stale \"before then after\" pair; the bare hunk is dropped with `REPLACE_PAIR_COALESCED_OVERLAP_WARNING`.\n- `*** Begin Patch` / `*** End Patch` envelopes are silently consumed. `*** Abort` terminates parsing silently — ops parsed before the marker still apply, no warning surfaced.\n- `*** Update File:` / `*** Add File:` / `*** Delete File:` / `*** Move to:` apply_patch sentinels throw an `apply_patch sentinel … is not valid in hashline` error.\n- `@@`-bracketed hunk headers (whether the apply_patch `@@ context @@` form or the unified-diff `@@ -N,M +N,M @@` shape) are rejected with an explicit \"drop the `@@ ... @@` brackets\" message — hashline hunks are bare `A B` lines.\n\n## Outputs\n- Single-shot tool result; hashline mode does not use a `resolve` preview/apply handshake.\n- `content` contains one text block per call. For a successful single-file edit it is either:\n - `<path>:` plus a compact diff preview from `packages/hashline/src/diff-preview.ts`, or\n - `Updated <path>` / `Created <path>` when no compact preview text is emitted.\n- Parse, apply, or recovery warnings are appended as:\n\n```text\nWarnings:\n...\n```\n\n- `details` is `EditToolDetails` from `packages/coding-agent/src/edit/renderer.ts`:\n - `diff`: unified diff string\n - `firstChangedLine`: first changed post-edit line\n - `diagnostics`: LSP/format result if available\n - `op`: `\"create\"` or `\"update\"` for hashline mode\n - `meta`: output metadata\n - `perFileResults`: present for multi-section input\n- Multi-section input returns one aggregated result with combined text and per-file details.\n\n## Worked examples\n\nReference file (the exact shape `read` returns):\n\n```text\n¶a.ts#0A3\n1:const X = \"a\";\n2:const Y = X;\n3:\n4:console.log(X);\n5:console.log(Y);\n6:export { X, Y };\n```\n\nReplace line 1 with two lines:\n\n```text\n¶a.ts#0A3\n1\n+const X = \"b\";\n+export const Y = X;\n```\n\nInsert BELOW line 5 (keep line 5, add after):\n\n```text\n¶a.ts#0A3\n5\n&5\n+console.log(X + Y);\n```\n\nInsert ABOVE line 5 (add before, keep line 5):\n\n```text\n¶a.ts#0A3\n5\n+console.log(X + Y);\n&5\n```\n\nDelete lines 4..5 entirely:\n\n```text\n¶a.ts#0A3\n4 5\n```\n\nInsert at start and end of file:\n\n```text\n¶a.ts#0A3\nBOF\n+// header\nEOF\n+// trailer\n```\n\nMulti-file:\n\n```text\n¶src/a.ts#0A3\n4\n+const enabled = true;\n¶src/b.ts#1F7\n20\n```\n\n## Limits & Caps\n- File snapshot tags are exactly three uppercase-hex chars minted by the per-session snapshot store.\n- The visible mismatch report shows 2 lines of context on each side (`MISMATCH_CONTEXT`) in `packages/hashline/src/messages.ts`.\n- Stale-anchor recovery uses `fuzzFactor: 0` in `packages/hashline/src/recovery.ts`.\n- `HL_FILE_PREFIX` is `¶`, `HL_PAYLOAD_REPLACE` is `+`, `HL_PAYLOAD_REPEAT` is `&`, `HL_RANGE_SEP` is `..` (repeat-row bodies only), and `HL_FILE_HASH_SEP` is `#` (`packages/hashline/src/format.ts`). Hunk headers carry no sigil; the range is just two whitespace-separated line numbers.\n\n## Errors\n- Missing section header:\n - `input must begin with \"¶PATH#HASH\" on the first non-blank line for anchored edits; got: ...`\n- Missing tag for anchored edit:\n - `Missing hashline snapshot tag for anchored edit to <path>; use ¶<path>#tag from your latest read/search output.`\n- Stray payload line:\n - `line N: payload line has no preceding hunk header. Use an \\`A B\\` (or \\`BOF\\` / \\`EOF\\`) line above the body. Got \"...\".`\n- Raw body row with no `+` / `&` prefix in a mixed-prefix block:\n - `line N: payload row in a hashline hunk must start with + or &A..B. Got \"...\".`\n- Range out of order:\n - `line N: range A..B ends before it starts.`\n- Overlapping hunks on the same anchor:\n - `line N: anchor line X is already targeted by another hunk on line Y. Issue ONE hunk per range; payload is only the final desired content, never a before/after pair.`\n- apply_patch / unified-diff contamination:\n - `line N: apply_patch sentinel \"*** …\" is not valid in hashline. File sections start with \\`¶path#HASH\\` (no \\`Update File:\\` / \\`Add File:\\` keyword). Hunks are bare \\`A B\\` lines with \\`+TEXT\\` / \\`&A..B\\` body rows.`\n - `line N: unified-diff hunk header (\\`@@ -N,M +N,M @@\\`) is not valid in hashline. Hashline hunks are bare \\`A B\\` lines (or \\`BOF\\` / \\`EOF\\` keywords).`\n - `line N: \\`@@\\`-bracketed hunk header \"@@ …\" is not valid in hashline. Drop the \\`@@ ... @@\\` brackets and write the range directly: \\`5 7\\` (\\`BOF\\` / \\`EOF\\` for virtual positions).`\n - `line N: single-number hunk header \"N\" is no longer accepted. Spell single-line ranges as \\`N N\\` (two numbers); hashline hunks are bare \\`A B\\` lines (or \\`BOF\\` / \\`EOF\\`).`\n- Out-of-range anchor:\n - `Line N does not exist (file has M lines)`\n- Stale snapshot tag throws `MismatchError`. The error contains re-read guidance and nearby current file lines as `*LINE:TEXT` / ` LINE:TEXT`.\n- No-op edit:\n - `Edits to <path> parsed and applied cleanly, but produced no change: your body row(s) are byte-identical to the file at the targeted lines. The bug is somewhere else — re-read the file before issuing another edit. Do NOT widen the payload or add lines; verify the anchor first.`\n- Recovery failure is silent internally: if cache-based merge cannot prove a valid result, the mismatch error is surfaced unchanged.\n\n## Warnings\n- `Detected two identical-range hashline hunks; kept only the second hunk. …` (`REPLACE_PAIR_COALESCED_WARNING`)\n- `Detected an overlapping bare hashline hunk immediately followed by a concrete hunk; dropped the earlier bare hunk. …` (`REPLACE_PAIR_COALESCED_OVERLAP_WARNING`)\n- `Auto-prefixed bare body row(s) with +. Always start payload rows with +TEXT (literal) or &A..B (repeat) …` (`BARE_BODY_AUTO_PIPED_WARNING`)\n- `A body row started with `+&A..B`. `+` (literal text) and `&A..B` (repeat) are sibling row kinds …` (`PLUS_PREFIXED_REPEAT_WARNING`)\n- Recovery banners: `RECOVERY_EXTERNAL_WARNING`, `RECOVERY_SESSION_CHAIN_WARNING`, `RECOVERY_SESSION_REPLAY_WARNING` (`packages/hashline/src/messages.ts`).\n",
75
+ "tools/edit.md": "# edit\n\n> Applies source edits; default mode is the hashline patch language consumed from a single `input` string.\n\n## Source\n- Entry: `packages/coding-agent/src/edit/index.ts`\n- Model-facing prompt: `packages/hashline/src/prompt.md`\n- Key collaborators:\n - `packages/coding-agent/src/utils/edit-mode.ts` — selects active edit mode\n - `packages/hashline/src/grammar.lark` — hashline grammar\n - `packages/hashline/src/format.ts` — sigils and header constants (`¶`, `#`, `@@`, `+`, `&`, `,`)\n - `packages/hashline/src/input.ts` — parses `¶PATH#TAG` sections\n - `packages/hashline/src/tokenizer.ts` / `packages/hashline/src/parser.ts` — tokenizes and parses ops\n - `packages/hashline/src/apply.ts` — applies parsed edits to file text\n - `packages/hashline/src/mismatch.ts` — stale-anchor mismatch formatting (distinguishes recognized-but-drifted from never-recorded hashes)\n - `packages/hashline/src/recovery.ts` — snapshot-based stale-anchor recovery\n - `packages/hashline/src/snapshots.ts` — mints and resolves per-path opaque snapshot tags\n\n## Inputs\n\n### Hashline mode (default)\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `input` | `string` | Yes | One or more file sections. Anchored sections start with `¶PATH#TAG`; hashless `¶PATH` is allowed only for new-file creation or BOF/EOF-only inserts. Optional `*** Begin Patch` / `*** End Patch` envelope is ignored if present. |\n\nPatch language inside `input`:\n\n- **File header**: `¶PATH#TAG` (or `¶PATH` for new-file / virtual-only hunks). `TAG` is three uppercase-hex chars minted by the session snapshot store.\n- **Hunk header**: bare `A B` selects original lines A..B. Two numbers are REQUIRED — single-line ranges are written `A A` (`5 5`), not `5`. The range separator is normally whitespace; the parser also silently accepts `A-B`, `A..B`, and `A…B` (unicode ellipsis). Virtual variants `BOF` and `EOF` target positions before line 1 / after the last line.\n- **Body rows** (one per line, immediately under the hunk header):\n - `+TEXT` — add the literal line `TEXT` verbatim, including all leading whitespace.\n - `+` alone — add one blank line.\n - `&A..B` — re-emit original file lines A..B. Use this to keep some of the lines you selected. `&A` is accepted as `&A..A`.\n- **Semantics**:\n - The new content of the selected range is just the body rows top-to-bottom.\n - **Empty body deletes the range entirely.**\n - `BOF` / `EOF` with empty body is a no-op (nothing to insert).\n\nAnchors come from `read`/`search` output. `read` emits a `¶PATH#TAG` header from the session snapshot store and lines as `LINE:TEXT`; copy the header into the edit section and copy only the line number into hunk headers.\n\n### Tolerated input shapes (lenient parsing)\n\nBecause models reproduce nearby shapes (`read` output, `apply_patch` envelopes, unified-diff hunks), the parser is liberal about a handful of harmless variants:\n\n- `A` (bare single number) — REJECTED. The parser throws `single-number hunk header \"A\" is no longer accepted`. Spell single-line ranges as `A A`.\n- `A-B`, `A..B`, `A…B` — accepted as `A B` (any of hyphen, double-dot, or unicode ellipsis works as a silent separator).\n- `&A` — accepted as `&A..A`.\n- Bare body rows with no `+`/`&` prefix are auto-prepended with `+` and a `BARE_BODY_AUTO_PIPED_WARNING` is appended, BUT only when every row in that block is uniformly bare. Mixed `+`/raw blocks still throw.\n- `+&A..B` rows (model mistakenly prefixed a repeat with `+`) are silently rerouted as `&A..B` repeats with `PLUS_PREFIXED_REPEAT_WARNING`.\n- Identical-range hunks in the same patch are coalesced last-wins with `REPLACE_PAIR_COALESCED_WARNING`.\n- An overlapping bare hunk followed by a concrete hunk is treated as a stale \"before then after\" pair; the bare hunk is dropped with `REPLACE_PAIR_COALESCED_OVERLAP_WARNING`.\n- `*** Begin Patch` / `*** End Patch` envelopes are silently consumed. `*** Abort` terminates parsing silently — ops parsed before the marker still apply, no warning surfaced.\n- `*** Update File:` / `*** Add File:` / `*** Delete File:` / `*** Move to:` apply_patch sentinels throw an `apply_patch sentinel … is not valid in hashline` error.\n- `@@`-bracketed hunk headers (whether the apply_patch `@@ context @@` form or the unified-diff `@@ -N,M +N,M @@` shape) are rejected with an explicit \"drop the `@@ ... @@` brackets\" message — hashline hunks are bare `A B` lines.\n\n## Outputs\n- Single-shot tool result; hashline mode does not use a `resolve` preview/apply handshake.\n- `content` contains one text block per call. For a successful single-file edit it is either:\n - `<path>:` plus a compact diff preview from `packages/hashline/src/diff-preview.ts`, or\n - `Updated <path>` / `Created <path>` when no compact preview text is emitted.\n- Parse, apply, or recovery warnings are appended as:\n\n```text\nWarnings:\n...\n```\n\n- `details` is `EditToolDetails` from `packages/coding-agent/src/edit/renderer.ts`:\n - `diff`: unified diff string\n - `firstChangedLine`: first changed post-edit line\n - `diagnostics`: LSP/format result if available\n - `op`: `\"create\"` or `\"update\"` for hashline mode\n - `meta`: output metadata\n - `perFileResults`: present for multi-section input\n- Multi-section input returns one aggregated result with combined text and per-file details.\n\n## Worked examples\n\nReference file (the exact shape `read` returns):\n\n```text\n¶a.ts#0A3\n1:const X = \"a\";\n2:const Y = X;\n3:\n4:console.log(X);\n5:console.log(Y);\n6:export { X, Y };\n```\n\nReplace line 1 with two lines:\n\n```text\n¶a.ts#0A3\n1\n+const X = \"b\";\n+export const Y = X;\n```\n\nInsert BELOW line 5 (keep line 5, add after):\n\n```text\n¶a.ts#0A3\n5\n&5\n+console.log(X + Y);\n```\n\nInsert ABOVE line 5 (add before, keep line 5):\n\n```text\n¶a.ts#0A3\n5\n+console.log(X + Y);\n&5\n```\n\nDelete lines 4..5 entirely:\n\n```text\n¶a.ts#0A3\n4 5\n```\n\nInsert at start and end of file:\n\n```text\n¶a.ts#0A3\nBOF\n+// header\nEOF\n+// trailer\n```\n\nMulti-file:\n\n```text\n¶src/a.ts#0A3\n4\n+const enabled = true;\n¶src/b.ts#1F7\n20\n```\n\n## Limits & Caps\n- File snapshot tags are exactly three uppercase-hex chars minted by the per-session snapshot store.\n- The visible mismatch report shows 2 lines of context on each side (`MISMATCH_CONTEXT`) in `packages/hashline/src/messages.ts`.\n- Stale-anchor recovery uses `fuzzFactor: 0` in `packages/hashline/src/recovery.ts`.\n- `HL_FILE_PREFIX` is `¶`, `HL_PAYLOAD_REPLACE` is `+`, `HL_PAYLOAD_REPEAT` is `&`, `HL_RANGE_SEP` is `..` (repeat-row bodies only), and `HL_FILE_HASH_SEP` is `#` (`packages/hashline/src/format.ts`). Hunk headers carry no sigil; the range is just two whitespace-separated line numbers.\n\n## Errors\n- Missing section header:\n - `input must begin with \"¶PATH#HASH\" on the first non-blank line for anchored edits; got: ...`\n- Missing tag for anchored edit:\n - `Missing hashline snapshot tag for anchored edit to <path>; use ¶<path>#tag from your latest read/search output.`\n- Stray payload line:\n - `line N: payload line has no preceding hunk header. Use an \\`A B\\` (or \\`BOF\\` / \\`EOF\\`) line above the body. Got \"...\".`\n- Raw body row with no `+` / `&` prefix in a mixed-prefix block:\n - `line N: payload row in a hashline hunk must start with + or &A..B. Got \"...\".`\n- Range out of order:\n - `line N: range A..B ends before it starts.`\n- Overlapping hunks on the same anchor:\n - `line N: anchor line X is already targeted by another hunk on line Y. Issue ONE hunk per range; payload is only the final desired content, never a before/after pair.`\n- apply_patch / unified-diff contamination:\n - `line N: apply_patch sentinel \"*** …\" is not valid in hashline. File sections start with \\`¶path#HASH\\` (no \\`Update File:\\` / \\`Add File:\\` keyword). Hunks are bare \\`A B\\` lines with \\`+TEXT\\` / \\`&A..B\\` body rows.`\n - `line N: unified-diff hunk header (\\`@@ -N,M +N,M @@\\`) is not valid in hashline. Hashline hunks are bare \\`A B\\` lines (or \\`BOF\\` / \\`EOF\\` keywords).`\n - `line N: \\`@@\\`-bracketed hunk header \"@@ …\" is not valid in hashline. Drop the \\`@@ ... @@\\` brackets and write the range directly: \\`5 7\\` (\\`BOF\\` / \\`EOF\\` for virtual positions).`\n - `line N: single-number hunk header \"N\" is no longer accepted. Spell single-line ranges as \\`N N\\` (two numbers); hashline hunks are bare \\`A B\\` lines (or \\`BOF\\` / \\`EOF\\`).`\n- Out-of-range anchor:\n - `Line N does not exist (file has M lines)`\n- Stale snapshot tag: the `Patcher` first attempts snapshot-based recovery (3-way-merge of the model’s edits onto the current file via `packages/hashline/src/recovery.ts`, `fuzzFactor: 0`). When recovery cannot prove a valid result it throws `MismatchError`, which distinguishes two cases:\n - **Hash recognized but file content drifted** (an in-session edit advanced the hash, or an external write changed the file): \"file changed between read and edit\" / \"Section is bound to #X, but the current file hashes to #Y\". Copy the post-edit hash from the prior edit response, or re-read.\n - **Hash never recorded for this path** (likely fabricated or carried over from a prior session): \"hash #X is not from this session\". Re-read; never invent the tag.\n In both cases the error includes the current file hash plus 2 lines of context around each anchor (`*LINE:TEXT` / ` LINE:TEXT`).\n- No-op edit:\n - `Edits to <path> parsed and applied cleanly, but produced no change: your body row(s) are byte-identical to the file at the targeted lines. The bug is somewhere else — re-read the file before issuing another edit. Do NOT widen the payload or add lines; verify the anchor first.`\n- Recovery failure is silent internally: if cache-based merge cannot prove a valid result, the mismatch error is surfaced unchanged.\n\n## Warnings\n- `Detected two identical-range hashline hunks; kept only the second hunk. …` (`REPLACE_PAIR_COALESCED_WARNING`)\n- `Detected an overlapping bare hashline hunk immediately followed by a concrete hunk; dropped the earlier bare hunk. …` (`REPLACE_PAIR_COALESCED_OVERLAP_WARNING`)\n- `Auto-prefixed bare body row(s) with +. Always start payload rows with +TEXT (literal) or &A..B (repeat) …` (`BARE_BODY_AUTO_PIPED_WARNING`)\n- `A body row started with `+&A..B`. `+` (literal text) and `&A..B` (repeat) are sibling row kinds …` (`PLUS_PREFIXED_REPEAT_WARNING`)\n- Recovery banners: `RECOVERY_EXTERNAL_WARNING`, `RECOVERY_SESSION_CHAIN_WARNING`, `RECOVERY_SESSION_REPLAY_WARNING` (`packages/hashline/src/messages.ts`).\n",
76
76
  "tools/eval.md": "# eval\n\n> Execute Python or JavaScript code in persistent cell-based runtimes.\n\n> **Notice:** Do not shell out to `python -c`/`python -e`, `bun -e`, or `node -e` via the `bash` tool for ad-hoc code execution. Use this tool instead — it gives you persistent state across cells, structured `display()` output, image/JSON capture, and proper cancellation/timeout handling that one-shot `-e`/`-c` invocations cannot provide.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/eval.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/eval.md`\n- Key collaborators:\n - `packages/coding-agent/src/eval/backend.ts` — backend execution contract\n - `packages/coding-agent/src/eval/js/index.ts` — JS backend adapter\n - `packages/coding-agent/src/eval/js/executor.ts` — JS execution + output sink\n - `packages/coding-agent/src/eval/js/context-manager.ts` — persistent VM contexts, prelude, tool bridge\n - `packages/coding-agent/src/eval/js/prelude.txt` — JS global helpers\n - `packages/coding-agent/src/eval/py/index.ts` — Python backend adapter\n - `packages/coding-agent/src/eval/py/executor.ts` — kernel session retention, reset, cleanup\n - `packages/coding-agent/src/eval/py/kernel.ts` — Jupyter gateway/kernel protocol, display capture\n - `packages/coding-agent/src/eval/py/prelude.py` — Python helper functions and status events\n - `packages/coding-agent/src/session/streaming-output.ts` — truncation, artifacts, streamed chunks\n - `docs/python-repl.md` — Python kernel/gateway internals\n\n## Inputs\n\nTool parameters are a JSON object with a single `cells` field — an ordered array of cell objects. Each cell is a structured record; there is no `*** Cell` header parsing, no language sniffing, and no implicit single-cell fallback. Cells run in array order; state persists within each language across cells and across tool calls.\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `cells` | `EvalCellInput[]` | Yes | Cells executed in order. At least one cell is required (`.min(1)`). |\n\nEach `EvalCellInput` (from `evalCellSchema` in `packages/coding-agent/src/tools/eval.ts`):\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `language` | `\"py\" \\| \"js\"` | Yes | Backend selector. `\"py\"` maps to the IPython/Jupyter kernel (`python` backend); `\"js\"` maps to the persistent JavaScript VM. |\n| `code` | `string` | Yes | Cell body, verbatim. JSON-encoded — embed newlines, quotes, and indentation directly; no fences, no headers. |\n| `title` | `string` | No | Short label rendered in the transcript (e.g. `\"imports\"`, `\"load config\"`). |\n| `timeout` | `integer` | No | Per-cell timeout in seconds, clamped to `1..600`. Defaults to 30 when omitted. |\n| `reset` | `boolean` | No | Wipe this cell's language kernel before running. Reset is per-language: a `py` cell's reset does not touch the JS VM and vice versa. Defaults to `false`. |\n\nMinimal example matching the live schema:\n\n```json\n{\n \"cells\": [\n { \"language\": \"py\", \"title\": \"imports\", \"timeout\": 10, \"code\": \"import json\\nfrom pathlib import Path\" },\n { \"language\": \"py\", \"title\": \"load config\", \"code\": \"data = json.loads(read('package.json'))\\ndisplay(data)\" },\n { \"language\": \"js\", \"title\": \"summary\", \"reset\": true, \"code\": \"const data = JSON.parse(await read('package.json'));\\ndisplay(data);\\nreturn data.name;\" }\n ]\n}\n```\n\n## Outputs\n\nFinal result from `EvalTool.execute()` is single-shot, but `onUpdate` streams partial text and `details` while cells run.\n\nReturned shape:\n\n- `content`: one text block containing combined cell output, or `(no text output)` / `(no output)` when only rich outputs exist.\n- `details` (`EvalToolDetails` from `packages/coding-agent/src/eval/types.ts`):\n - `cells`: per-cell code, status (`pending`/`running`/`complete`/`error`), output, duration, exit code, status events, markdown flag\n - `language`: first backend used\n - `languages`: distinct backends used, in first-use order\n - `jsonOutputs`: structured values emitted via `display(...)`\n - `images`: image payloads emitted by Python rich display or JS `display({ type: \"image\", ... })`\n - `statusEvents`: aggregated helper/tool status events\n - `notice`: backend fallback notice (currently unused; reserved for future per-cell notices)\n - `meta`: truncation metadata\n - `isError`: set on cell failure or cancellation\n\nRenderer behavior in `packages/coding-agent/src/tools/eval.ts`:\n\n- call preview renders each cell's `code` with syntax highlighting based on its declared `language`\n- result view renders each cell separately, including status, duration, and output\n- markdown outputs are rendered with the Markdown component instead of plain text\n- `jsonOutputs` render as a tree, collapsed or expanded depending on UI state\n- timeout / truncation notices render as dim metadata lines\n- images are carried in `details.images`; generic tool UI image handling renders them outside the text block\n\nSide-channel artifacts:\n\n- `session.allocateOutputArtifact?.(\"eval\")` may allocate an `artifact://...` backing store for spilled output.\n- Truncated output metadata points at that artifact when available.\n\n## Flow\n\n1. `EvalTool.execute()` in `packages/coding-agent/src/tools/eval.ts` receives `params.cells` already validated by the Zod schema — no string parsing step.\n2. For each cell, `execute()` maps `cell.language` to an `EvalLanguage` (`\"py\"` → `\"python\"`, `\"js\"` → `\"js\"`) and calls `resolveBackend(session, language)`:\n - `python` is gated on `eval.py !== false` and `pythonBackend.isAvailable(session)`.\n - `js` is gated on `eval.js !== false`.\n - A disabled or unavailable requested backend throws `ToolError`; there is no auto-fallback or sniffing.\n3. The tool allocates an `OutputSink`, a `TailBuffer`, per-cell result objects, and a `sessionAbortController`. `session.trackEvalExecution?.(...)` can wrap the whole run for external cancellation tracking.\n4. It resolves the executor session id from `session.getEvalSessionId?.()`, falling back to `defaultEvalSessionId(session)`. Subagents inherit the parent's id so both sides share the same JS VM and Python kernel for each backend.\n5. Cells execute sequentially within one eval tool call. For each cell, `execute()`:\n - clamps `(cell.timeout ?? 30) * 1000` ms through `clampTimeout(\"eval\", ...)`\n - builds a combined abort signal from the tool signal, the timeout, and the session abort controller\n - marks the cell `running` and emits an update\n - calls the backend’s `execute()` with `cwd`, `sessionId`, `sessionFile`, `kernelOwnerId`, `deadlineMs`, `reset` (defaults to `false`), artifact info, and chunk callback\n6. JS cells dispatch through `packages/coding-agent/src/eval/js/index.ts` into `executeJs()`; Python cells dispatch through `packages/coding-agent/src/eval/py/index.ts` into `executePython()`.\n7. Backend text chunks stream into the shared `OutputSink`; rich outputs are accumulated separately as JSON, images, markdown markers, and status events.\n8. After each cell:\n - text output is trimmed and stored on that cell result\n - multi-cell runs prefix text with `[i/n]` and the optional title\n - cancellations return early with `isError: true` and a cell-specific abort message\n - non-zero exit codes return early with `isError: true` and a message naming the failed cell\n - later cells are skipped after the first error, but earlier cell state persists in the underlying runtime\n9. On success, the tool joins all cell outputs, synthesizes `(no text output)` or `(no output)` when needed, and attaches truncation metadata from `summarizeFinal()`.\n10. The renderer uses `details.cells`, `details.jsonOutputs`, and `details.statusEvents` to build notebook-style output. `mergeCallAndResult = true` and `inline = true`, so call and result render together in the transcript.\n\n## Modes / Variants\n\n### Backend selection\n\nBackend choice is **explicit per cell** — there is no auto-detection.\n\n- `language: \"py\"` → Python (IPython/Jupyter) backend\n- `language: \"js\"` → JavaScript VM backend\n\nIf the requested backend is disabled or unavailable, the tool throws `ToolError` for that cell. The caller chooses; the tool does not silently substitute.\n\n### JavaScript runtime\n\nImplemented in `packages/coding-agent/src/eval/js/context-manager.ts` and `packages/coding-agent/src/eval/js/prelude.txt`.\n\n- Persistent worker-backed VM sessions keyed by `js:${sessionId}`\n- `reset: true` calls `resetVmContext(sessionKey)` before the cell executes; reset is destructive for all live runs on that JS session\n- Top-level `await` and bare `return` are supported by wrapping code in an async IIFE when `wrapCode()` sees `await` or `return`\n- Top-level static `import ... from ...` and dynamic `import(...)` calls are routed through `rewriteImports()`, which sends them via `__omp_import__` so the specifier resolves against the session cwd\n- Module cache is busted for **local** imports between cells so edits to source files are picked up without restarting the runtime. `__omp_import__` deletes `require.cache[absPath]` before re-importing whenever the original specifier is a filesystem path: relative (`./x`, `../x`, `.`, `..`), POSIX-absolute (`/...`), home-prefixed (`~/...`), or Windows drive-letter (`C:\\...` / `C:/...`). Bare specifiers (`react`, `lodash/x`) and URL/scheme specifiers (`node:fs`, `file://...`, `https://...`) are left in cache so package identity stays stable across cells. The cache-bust only fires when the resolved target is an absolute path — unresolved bare-package fallbacks (`resolveImportSpecifier()` returning the original specifier) skip it.\n- The prelude installs globals:\n - `display`, `print`\n - `read`, `write`, `append`, `sort`, `uniq`, `counter`, `diff`, `tree`, `env`, `output`\n - `tool.<name>(args)` proxy for arbitrary session tool calls\n- JS helpers are async because they cross the VM/tool boundary\n- `display(value)` behavior:\n - plain objects/arrays become JSON outputs\n - `{ type: \"image\", data, mimeType }` becomes an image output\n - scalars become text\n- The VM exposes a restricted `process` subset plus `Buffer`, `fetch`, `Blob`, `File`, `Headers`, `Request`, `Response`, `fs`, `require`, and browser-style globals\n- Concurrent runs on the same VM are not queued end-to-end. Synchronous JS still runs on the single event loop; awaited regions can interleave with sibling runs.\n\n### Python runtime\n\nImplemented in `packages/coding-agent/src/eval/py/executor.ts`, `packages/coding-agent/src/eval/py/kernel.ts`, and `packages/coding-agent/src/eval/py/prelude.py`. See `docs/python-repl.md` for gateway and kernel details.\n\n- Default mode is retained `session` kernels keyed by `python:${sessionId}`\n- Optional `python.kernelMode = \"per-call\"` creates a fresh kernel for each cell and shuts it down afterward\n- `reset: true` disposes the retained kernel for that session before the cell runs; later Python cells in the same tool call reuse the fresh kernel\n- Startup path:\n - availability check\n - create/connect kernel\n - initialize cwd / env / `sys.path`\n - execute `PYTHON_PRELUDE`\n- Python cells run in the runner's persistent asyncio event loop, so top-level `await` works; the prompt warns not to use `asyncio.run(...)`\n- The Python prelude defines helpers with the same surface as JS where practical, including `tool.<name>(args)` through a per-run loopback bridge\n- Synchronous statement blocks run in the default executor with ContextVar state copied in; the GIL still serializes bytecode execution, but awaited regions can interleave with sibling cells\n- Kernel `display_data` / `execute_result` messages map to:\n - `application/x-omp-status` → status event\n - `image/png` → image output\n - `application/json` → JSON output\n - `text/markdown` → markdown output\n - `text/plain` → text output\n - `text/html` → HTML converted to markdown with `htmlToBasicMarkdown()`\n- Interactive stdin is rejected: `input_request` sends an empty reply, marks `stdinRequested`, and the executor returns exit code `1`\n\n### Multi-language call behavior\n\nA single tool call can mix Python and JS cells. Persistence is per language runtime:\n\n- `reset: true` on a Python cell does not touch JS state\n- `reset: true` on a JS cell does not touch Python state\n- each backend keeps its own retained session keyed from the same session-derived ID\n\n## Side Effects\n\n- Filesystem\n - JS/Python prelude helpers can read, write, append, diff, and traverse files under the session cwd or absolute paths.\n - Output may spill to an artifact file via `OutputSink`.\n- Network\n - Python backend speaks NDJSON to a local `python3` subprocess over stdin/stdout (no network).\n - JS runtime exposes `fetch` and `tool.<name>()`; those tools may perform additional network I/O.\n- Subprocesses / native bindings\n - Python availability check runs `<python> -c ...`.\n - Python backend spawns one `python -u runner.py` subprocess per kernel; cancellation sends `SIGINT`. Details in `docs/python-repl.md`.\n- Session state\n - `session.assertEvalExecutionAllowed?.()` can block execution.\n - `session.trackEvalExecution?.(...)` can register cancellable eval work.\n - `session.getSessionFile?.()`, `session.getEvalSessionId?.()`, and `session.getEvalKernelOwnerId?.()` influence VM/kernel reuse and artifact lookup.\n - JS VM contexts persist across eval calls until reset/disposal.\n - Python retained kernels persist until reset, owner cleanup, or process exit.\n- User-visible prompts / interactive UI\n - none; stdin requests are rejected programmatically\n- Background work / cancellation\n - Python retained kernels have heartbeat and idle cleanup timers.\n - Cancellation hard-kills/resets the shared executor for that backend: JS terminates the worker, Python sends SIGINT and may escalate to subprocess shutdown.\n\n## Limits & Caps\n\n- Per-cell timeout default: 30s (applied when `timeout` is omitted in `EvalTool.execute()`; clamped through `TOOL_TIMEOUTS.eval.default` in `packages/coding-agent/src/tools/tool-timeouts.ts`)\n- Schema-level `timeout` range: integer `1..600` seconds (enforced by Zod on the cell schema)\n- Timeout clamp at runtime: 1s minimum, 600s maximum (`TOOL_TIMEOUTS.eval` in `packages/coding-agent/src/tools/tool-timeouts.ts`)\n- Transcript code/output preview: 10 lines by default (`EVAL_DEFAULT_PREVIEW_LINES` in `packages/coding-agent/src/tools/eval.ts`)\n- Output truncation window: 50KB default (`DEFAULT_MAX_BYTES` in `packages/coding-agent/src/session/streaming-output.ts`)\n- Output line cap inside truncation helpers: 3000 lines (`DEFAULT_MAX_LINES` in `packages/coding-agent/src/session/streaming-output.ts`)\n- Streaming tail buffer for live updates: `DEFAULT_MAX_BYTES * 2` = 100KB (`packages/coding-agent/src/tools/eval.ts`)\n- Python retained kernel idle timeout: 5 minutes (`IDLE_TIMEOUT_MS` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python retained kernel cap: 4 sessions (`MAX_KERNEL_SESSIONS` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python retained kernel cleanup sweep: every 30s (`CLEANUP_INTERVAL_MS` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python owner-cleanup shutdown wait: 2000ms (`OWNER_CLEANUP_KERNEL_SHUTDOWN_TIMEOUT_MS` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python heartbeat interval: 5s (`ensureKernelHeartbeat()` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python external gateway availability check timeout: 5s (`AbortSignal.timeout(5000)` in `packages/coding-agent/src/eval/py/kernel.ts`)\n- Python auto-restart budget: one restart per retained session before hard failure (`restartCount > 1` in `packages/coding-agent/src/eval/py/executor.ts`)\n\n## Errors\n\n- Zod validation rejects malformed `cells` arrays before `execute()` runs (missing `language`/`code`, out-of-range `timeout`, empty `cells`).\n- Missing session without proxy executor throws `ToolError(\"Eval tool requires a session when not using proxy executor\")`.\n- Disabled/unavailable backends throw `ToolError` from `resolveBackend()`:\n - `eval.py = false` and a `py` cell is requested\n - `eval.js = false` and a `js` cell is requested\n - Python kernel unavailable and a `py` cell is requested\n- JS runtime exceptions are converted into text output plus `exitCode: 1`; cancellations return `cancelled: true` and may append `Command timed out`.\n- Python execution errors from the kernel become text output and `exitCode: 1`; later cells are skipped.\n- Python stdin requests are treated as errors with the message `Kernel requested stdin; interactive input is not supported.`\n- Cancellation is returned, not thrown, once backend execution has started. The tool formats it as a cell failure and sets `details.isError = true`.\n- If output truncates, the tool still succeeds; truncation is surfaced through `details.meta` and artifact-backed full output when available.\n\n## Shared executor trade-offs\n\n- Parent agents and subagents share eval state bidirectionally when a subagent inherits the parent's executor id. Mutations in either direction are visible to the other participant.\n- Async regions of concurrent runs can interleave. Synchronous JS still blocks the VM event loop; synchronous Python still contends on the GIL.\n- Cancelling one run is destructive to the shared backend executor. This is intentional: JS worker termination and Python SIGINT/subprocess shutdown are the only reliable way to interrupt arbitrary user code.\n- `reset: true` is destructive for every live run on that backend session id. New starts on that backend are rejected while reset is in flight.\n\n## Notes\n\n- Backend selection is now strictly explicit per cell: `language` must be `\"py\"` or `\"js\"`. The previous `*** Cell` header parser, the `eval.lark` constrained grammar, and the sniffer-based fallback have all been removed.\n- `EvalTool.customFormat` no longer exists. Tool calls flow through the standard JSON schema; there is no Lark-constrained sampling path.\n- `tool.<name>()` exists in both JS and Python. Python calls route through a per-run loopback bridge keyed by the current cell id.\n- JS helper paths reject protocol URIs (`://`) in `resolvePath()`; the JS prelude is filesystem-only unless the code calls `tool.read(...)` or another tool explicitly.\n- Python helper `output(...)` depends on `PI_SESSION_FILE`; it fails outside a session-backed run.\n- `display()` can produce text and structured outputs from the same value; the renderer prefers markdown over `text/plain` when both exist.\n- JS static imports are rewritten only at top level. Nested imports stay invalid and surface normal JS syntax/runtime errors.\n- `EvalTool` is `concurrency = \"exclusive\"` within one agent session, but parent and subagent sessions can run eval concurrently when they share an inherited executor id.\n- The tool description shown to the model is templated by backend availability (`getEvalToolDescription()`); if Python is unavailable, the prompt omits Python-specific instructions.\n",
77
77
  "tools/find.md": "# find\n\n> Find filesystem paths by glob; use `search` when you need content matches instead of path matches.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/find.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/find.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/path-utils.ts` — normalize inputs; split base path vs glob.\n - `packages/coding-agent/src/tools/list-limit.ts` — apply result-count caps.\n - `packages/coding-agent/src/session/streaming-output.ts` — truncate text output at byte cap.\n - `packages/coding-agent/src/tools/tool-result.ts` — build `content` and `details.meta`.\n - `packages/coding-agent/src/tools/output-meta.ts` — encode limit / truncation metadata.\n - `packages/coding-agent/src/tools/tool-errors.ts` — map user-facing tool errors.\n - `packages/coding-agent/src/tools/index.ts` — register the built-in local implementation.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `paths` | `string[]` | Yes | One or more globs, files, or directories. Empty strings are rejected. Multiple entries may be merged into one brace-union search when their base paths can be resolved together. |\n| `hidden` | `boolean` | No | Whether hidden files are included. Defaults to `true` (`hidden ?? true`). |\n| `limit` | `number` | No | Max returned paths. Defaults to `1000`. Must be a finite positive number; non-integers are floored. |\n\n## Outputs\nThe tool returns a single text block plus structured `details`.\n\n- Success text: newline-delimited paths, one per line, relative to the session cwd when possible; absolute when outside cwd. Exact file inputs return that file path as one line.\n- Empty result text: `No files found matching pattern`.\n- Multi-path partial miss: appends `Skipped missing paths: ...` after the result block, or after the empty-result line.\n- `details` may include:\n - `scopePath`: display form of the searched root or merged roots.\n - `fileCount`: number of paths returned after result limiting.\n - `files`: returned paths as an array.\n - `truncated`: whether result count or byte truncation occurred.\n - `resultLimitReached`: reached result limit.\n - `missingPaths`: skipped missing inputs in multi-path calls.\n - `truncation` / `meta.limits`: structured truncation and limit metadata for renderers.\n- Streaming: when the runtime supplies `onUpdate`, the local implementation emits incremental newline-delimited text snapshots during globbing, throttled to 200 ms.\n\n## Flow\n1. `FindTool.execute()` normalizes each `paths` entry with `normalizePathLikeInput()` and `/\\\\/g -> \"/\"` (`packages/coding-agent/src/tools/find.ts`). Empty normalized entries fail with `` `paths` must contain non-empty globs or paths ``.\n2. For multi-path local calls, `partitionExistingPaths(..., parseFindPattern)` (`packages/coding-agent/src/tools/path-utils.ts`) stats each base path. Missing entries are skipped; if all are missing, the tool throws `Path not found: ...`. Single missing paths still hard-fail.\n3. The tool tries `resolveExplicitFindPatterns()` to merge multiple inputs into one search rooted at a common base path. If that does not apply, it parses one input with `parseFindPattern()`.\n4. `parseFindPattern()` determines `(basePath, globPattern, hasGlob)`:\n - no glob chars (`*`, `?`, `[`, `{`) => search that path with implicit `**/*`.\n - glob in the first segment => search from `.` and, unless the pattern already starts with `**/`, prefix it with `**/`.\n - glob later in the path => split at the first glob-bearing segment.\n5. `resolveToCwd()` converts the base path to an absolute path under the session cwd. A resolved `/` is rejected with `Searching from root directory '/' is not allowed`.\n6. `limit` is defaulted to `DEFAULT_LIMIT` (`1000`) and validated as a positive finite integer. `hidden` defaults to `true`. The tool also creates a 5 s timeout via `AbortSignal.timeout(GLOB_TIMEOUT_MS)`.\n7. Execution then branches:\n - **Custom operations branch**: if `FindToolOptions.operations.glob` exists, the tool checks existence with `operations.exists()`, short-circuits exact-file inputs via `operations.stat()` when available, then calls `operations.glob(globPattern, searchPath, { ignore: [\"**/node_modules/**\", \"**/.git/**\"], limit })`.\n - **Built-in local branch**: the tool stats `searchPath`. Exact-file inputs return immediately. Directory inputs call `natives.glob()` with `fileType: File`, `hidden`, `maxResults: limit`, `sortByMtime: true`, `gitignore: true`, and the combined abort signal.\n8. In the local branch, optional `onMatch` callbacks convert each match to a cwd-relative display path and emit throttled progress updates.\n9. After native glob returns, JS sorts `result.matches` by `mtime` descending (`(b.mtime ?? 0) - (a.mtime ?? 0)`) before formatting paths.\n10. `buildResult()` applies `applyListLimit()` to cap the array again at `limit`, joins paths with `\\n`, then runs `truncateHead()` with `maxLines: Number.MAX_SAFE_INTEGER`. In practice this leaves the 50 KB byte cap in place while disabling the default 3000-line cap.\n11. `toolResult()` packages text plus `details`, and records result-limit / truncation metadata for renderers.\n\n## Modes / Variants\n- **Exact file path**: if the parsed input has no glob and the resolved path stats as a file, output is that one path.\n- **Directory path**: if the parsed input has no glob and stats as a directory, the tool searches it with implicit `**/*`.\n- **Single glob path**: one input parsed by `parseFindPattern()`.\n- **Merged multi-path search**: multiple inputs resolved by `resolveExplicitFindPatterns()` into one brace-union glob rooted at a common base path.\n- **Partial multi-path search with missing inputs**: local multi-path calls skip missing base paths and surface them as `missingPaths` / `Skipped missing paths: ...`.\n- **Custom delegated search**: uses injected `FindOperations` instead of local fs + native glob.\n\n## Side Effects\n- Filesystem\n - Stats the resolved base path, and in local multi-path mode stats every candidate base path up front.\n - Does not write files.\n- Subprocesses / native bindings\n - Built-in local mode calls the native `@oh-my-pi/pi-natives` glob implementation.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Emits structured progress updates when `onUpdate` is provided.\n - Adds truncation / limit metadata to the tool result.\n- Background work / cancellation\n - Local globbing is cancellable through the caller abort signal plus an internal 5 s timeout.\n\n## Limits & Caps\n- Default result limit: `1000` (`DEFAULT_LIMIT` in `packages/coding-agent/src/tools/find.ts`).\n- Local glob timeout: `5000` ms (`GLOB_TIMEOUT_MS` in `packages/coding-agent/src/tools/find.ts`).\n- Output byte cap: `50 * 1024` bytes (`DEFAULT_MAX_BYTES` in `packages/coding-agent/src/session/streaming-output.ts`).\n- Default generic line cap in `truncateHead()` is `3000`, but `find` overrides `maxLines` to `Number.MAX_SAFE_INTEGER`, so byte size — not line count — is the practical output truncation cap.\n- Streaming update throttle: `200` ms between `onUpdate` emissions.\n- Sort order: most recent `mtime` first in the built-in local branch and promised in the prompt. The tool re-sorts in JS even though native glob receives `sortByMtime: true` so native code can still stop early at `maxResults`.\n\n## Errors\n- User-facing `ToolError`s from `FindTool.execute()` include:\n - `` `paths` must contain non-empty globs or paths ``\n - `Path not found: ...`\n - `Searching from root directory '/' is not allowed`\n - `Limit must be a positive number`\n - `Path is not a directory: ...`\n - `find timed out after 5s`\n- If the caller aborts, the local branch converts `AbortError` into `ToolAbortError`.\n- Non-`ENOENT` stat failures and other unexpected errors are rethrown.\n- Empty matches are not errors; they return the no-files text result.\n\n## Notes\n- Reach for `find` for filename / path discovery. Reach for `search` when the selection criterion is file contents or regex matches; `search` takes a `pattern` and returns anchored content matches, while `find` only returns matching paths (`packages/coding-agent/src/prompts/tools/find.md`, `packages/coding-agent/src/prompts/tools/search.md`).\n- Bare top-level globs are made recursive. `*.ts` is parsed as base `.` plus glob `**/*.ts`; `src/*.ts` stays rooted at `src` with a non-recursive `*.ts` segment; `src/**/*.ts` preserves explicit recursion.\n- `.gitignore` is always enabled in the built-in local branch (`gitignore: true`). There is no model-facing flag to disable it.\n- `hidden` defaults to `true`; hidden-file exclusion is opt-out, not opt-in.\n- Multi-path missing-input tolerance only applies in the built-in local branch. The custom-operations branch hard-fails the first missing `searchPath` it checks.\n- The custom `FindOperations.glob()` hook receives `ignore` and `limit`, but not the `hidden` flag or an explicit `.gitignore` toggle. A remote delegate must account for that itself if it wants parity with the local branch.\n- Built-in local globbing asks the native layer for `fileType: File`, so recursive directory searches yield files, not directories. Directory outputs are only possible through exact-path passthrough or custom delegates that return them.\n",
78
78
  "tools/github.md": "# github\n\n> Dispatch GitHub CLI operations for repositories, issues, pull requests, search, and Actions run watching.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/gh.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/github.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/gh-format.ts` — shorten commit SHAs for summaries.\n - `packages/coding-agent/src/tools/gh-renderer.ts` — TUI rendering, especially `run_watch` live/result views.\n - `packages/coding-agent/src/utils/git.ts` — `gh`/`git` process wrappers, repo locking, branch config writes.\n - `packages/utils/src/dirs.ts` — base directory for dedicated PR worktrees.\n - `packages/coding-agent/src/sdk.ts` — session artifact allocation hook.\n - `packages/coding-agent/src/session/artifacts.ts` — artifact filename format `<id>.<toolType>.log`.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `op` | `\"repo_view\" \\| \"pr_create\" \\| \"pr_checkout\" \\| \"pr_push\" \\| \"search_issues\" \\| \"search_prs\" \\| \"search_code\" \\| \"search_commits\" \\| \"search_repos\" \\| \"run_watch\"` | Yes | Dispatch selector. `GithubTool.execute()` switches only on this field. |\n| `repo` | `string` | No | `owner/repo` override. Ignored when the identifier argument is already a full GitHub URL. For `search_issues`/`search_prs`/`search_code`/`search_commits`, defaults to the current checkout's `owner/repo` when omitted (skipped when the query already contains a `repo:`/`org:`/`user:`/`owner:` qualifier or when current-repo resolution fails). Required in practice when `gh` cannot infer repo context from the current checkout. |\n| `branch` | `string` | No | Used by `repo_view`, `pr_push`, and `run_watch`. `run_watch` falls back to current git branch when `run` is omitted; `pr_push` falls back to current branch. |\n| `pr` | `string \\| string[]` | No | Used by `pr_checkout`. Each item may be a PR number, branch name, or GitHub PR URL. Array form enables batching. Omitted means current branch PR. |\n| `force` | `boolean` | No | Used only by `pr_checkout`. Defaults to `false`; allows resetting an existing `pr-<number>` local branch to the PR head commit. |\n| `forceWithLease` | `boolean` | No | Used only by `pr_push`; passed through to git push. |\n| `title` | `string` | No | Used only by `pr_create`. Required unless `fill` is `true`. |\n| `body` | `string` | No | Used only by `pr_create`. Mutually exclusive with `fill`. Empty/omitted body becomes `--body \"\"` to suppress the interactive editor. Non-empty body is written to a temp file and passed as `--body-file`. |\n| `base` | `string` | No | Used only by `pr_create`; passed as `--base`. |\n| `head` | `string` | No | Used only by `pr_create`; passed as `--head`. |\n| `draft` | `boolean` | No | Used only by `pr_create`. Defaults to `false`. |\n| `fill` | `boolean` | No | Used only by `pr_create`. Defaults to `false`. Mutually exclusive with `title` and `body`. |\n| `reviewer` | `string[]` | No | Used only by `pr_create`; each entry becomes `--reviewer`. |\n| `assignee` | `string[]` | No | Used only by `pr_create`; each entry becomes `--assignee`. |\n| `label` | `string[]` | No | Used only by `pr_create`; each entry becomes `--label`. |\n| `query` | `string` | No | Used by all `search_*` ops. Required there. |\n| `limit` | `number` | No | Used by all `search_*` ops. Defaults to `10`, floored, clamped to `50`, and must be `> 0`. |\n| `run` | `string` | No | Used only by `run_watch`. Must be a numeric run ID or full GitHub Actions run URL. |\n| `tail` | `number` | No | Used only by `run_watch`. Defaults to `15`, floored, clamped to `200`, and must be `> 0`. |\n\n## Outputs\nThe tool returns a single text result built by `buildTextResult()` in `packages/coding-agent/src/tools/gh.ts`.\n\n- `content`: one text block. Multi-item ops join sections with blank lines and `---` separators.\n- `sourceUrl`: set for single repo/PR/run results when a canonical URL is known.\n- `details`: optional structured metadata used by the TUI renderer.\n - Common fields: `artifactId`, `repo`, `branch`, `worktreePath`, `remote`, `remoteBranch`, `headSha`, `runId`, `runIds`, `status`, `conclusion`, `failedJobs`.\n - `pr_checkout` adds `checkouts: GhPrCheckoutSummary[]`.\n - `run_watch` adds `watch: GhRunWatchViewDetails`, which drives the custom live/result renderer in `packages/coding-agent/src/tools/gh-renderer.ts`.\n- Artifact trailer: when `artifactId` is present, the text body gets an appended line like `Full failed-job logs: artifact://<id>`.\n - `run_watch` allocates artifacts with `session.allocateOutputArtifact(\"github\")`; persistent sessions therefore save failed-log bodies as `<artifact-dir>/<id>.github.log`.\n\n`run_watch` is the only streaming op. It emits `onUpdate` snapshots while polling, then returns one final text result.\n\n## Flow\n1. `GithubTool.createIf()` exposes the tool only when `git.github.available()` finds `gh` on `PATH`.\n2. `GithubTool.execute()` wraps dispatch in `untilAborted()` and switches on `params.op`.\n3. Each op normalizes optional strings, arrays, booleans, and numeric caps locally in `packages/coding-agent/src/tools/gh.ts`.\n4. CLI execution goes through `git.github.run/json/text()` in `packages/coding-agent/src/utils/git.ts`:\n - spawns `gh ...` with `Bun.spawn()`;\n - trims stdout/stderr unless `trimOutput: false`;\n - maps common auth/repo-context failures into tool-facing `ToolError` messages;\n - `json()` rejects empty or invalid JSON.\n5. Read-style ops (`repo_view`, `search_*`) fetch JSON and format Markdown-like text summaries. Single-issue and single-PR views were moved out of the tool and now resolve through the `issue://` / `pr://` internal URL schemes, which share the same SQLite cache.\n7. PR diffs moved out of the tool. `pr://<N>/diff` lists changed files, `pr://<N>/diff/<i>` slices a single file, and `pr://<N>/diff/all` returns the full unified diff — see `docs/tools/read.md`. All three variants share one `gh pr diff` invocation through the `pr-diff` cache row.\n8. `pr_checkout` resolves PR metadata first, then enters `git.withRepoLock()` before any git mutation so parallel checkout calls for the same primary repo do not race on shared `.git` state.\n9. `pr_push` reads PR head metadata back from git branch config, derives a refspec, then pushes with `git.push()`.\n10. `pr_create` shells out once, then best-effort re-reads the created PR for a richer summary.\n11. `run_watch` chooses either run mode (`run` supplied) or commit mode (`run` omitted), polls GitHub Actions APIs every 3 seconds, emits streaming updates, and may save a full failed-log artifact before returning.\n12. Final text goes through `toolResult().text(...)`; if `session.allocateOutputArtifact()` returns a slot, failed-log text is persisted with `Bun.write()`.\n\n## Modes / Variants\n\n### `repo_view`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` |\n| Optional fields | `repo`, `branch` |\n| `gh` command | `gh repo view [<repo>] [--branch <branch>] --json <GH_REPO_FIELDS>` |\n| Batching | None |\n| Output | `# <owner/repo>` header, description, URL, default branch, requested branch, visibility, permission, primary language, stars, forks, archive/fork flags, updated timestamp, homepage, topics. `sourceUrl = data.url`. |\n\nIf `repo` is omitted, `gh` repository resolution is used.\n\nSingle-issue and single-PR reads live in the `issue://<N>` / `pr://<N>` URL schemes (see `docs/tools/read.md`). They share `~/.omp/cache/github-cache.db` (override via `OMP_GITHUB_CACHE_DB`) and the `github.cache.softTtlSec` / `github.cache.hardTtlSec` / `github.cache.enabled` settings. The cache retains rendered Markdown plus the raw JSON payload returned by `gh`, including private bodies, comments, reviews, and review comments when comments are enabled; rows are scoped by the local GitHub credential fingerprint. Root and repo-scoped reads (`issue://`, `pr://owner/repo`) issue a live `gh issue list` / `gh pr list` for browsing; query params `state`, `limit`, `author`, `label` pass through to `gh` (`issue://` accepts `state=open|closed|all`; `pr://` also accepts `merged`). PR diffs ride the same cache under `pr://<N>/diff[/…]`: the listing, full diff, and per-file slices all share one `pr-diff` row keyed by repo and PR number.\n\n### `pr_create`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` plus either `fill=true` or `title` |\n| Optional fields | `repo`, `title`, `body`, `base`, `head`, `draft`, `fill`, `reviewer[]`, `assignee[]`, `label[]` |\n| `gh` command | `gh pr create ...` with flags assembled from provided fields |\n| Batching | None |\n| Output | `# Created Pull Request ...` summary with URL, state, draft flag, base/head, author, created time, labels, optional body. `sourceUrl` is the created PR URL. |\n\nBranches:\n- `fill && (title || body !== undefined)` throws.\n- Non-empty `body` is written under a temp dir `gh-pr-body-*` in `os.tmpdir()`, passed as `--body-file`, then removed in `finally`.\n- After creation, the tool parses the returned URL and best-effort runs `gh pr view <number> --repo <repo> --json <GH_PR_FIELDS_NO_COMMENTS>`; failures there are swallowed.\n\n### `pr_checkout`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` |\n| Optional fields | `repo`, `pr`, `force` |\n| `gh` command | For each requested PR: `gh pr view [<pr>] [--repo <repo>] --json <GH_PR_CHECKOUT_FIELDS>`; cross-repo PRs may also call `gh repo view <headRepository> --json <GH_REPO_CLONE_FIELDS>`. |\n| Batching | Yes. `pr` may be `string[]`; each PR is resolved in parallel, but git mutations are serialized per primary repo by `git.withRepoLock()`. |\n| Output | Single PR: checkout/worktree summary plus `details.repo`, `details.branch`, `details.worktreePath`, `details.remote`, `details.remoteBranch`, `details.checkouts`. Batched: `# <n> Pull Request Worktrees (...)` plus one section per PR and aggregated `details.checkouts`. |\n\nWorktree and metadata behavior:\n- Local branch name is always `pr-<number>`.\n- Worktree path is `path.join(getWorktreesDir(), encodeRepoPathForFilesystem(primaryRepoRoot), localBranch)`, where `getWorktreesDir()` is `~/.omp/wt`; effective path is `~/.omp/wt/<encoded-primary-repo-root>/pr-<number>`.\n- Existing worktree detection is by branch ref `refs/heads/pr-<number>` from `git.worktree.list()`.\n- New worktree creation calls `git.worktree.add(repoRoot, finalWorktreePath, localBranch, { signal })` after verifying the path is neither already registered nor already present on disk.\n- For same-repo PRs, remote is `origin`. For cross-repo PRs, the tool resolves a clone URL for the head repo, reuses an existing remote with the same URL when possible, or creates `fork-<owner>` / `fork-<owner>-<n>`.\n- The branch push metadata is persisted with `git config` under the repository's shared `.git/config` as:\n - `branch.pr-<number>.remote`\n - `branch.pr-<number>.merge`\n - `branch.pr-<number>.pushRemote`\n - `branch.pr-<number>.ompPrHeadRef`\n - `branch.pr-<number>.ompPrUrl`\n - `branch.pr-<number>.ompPrIsCrossRepository`\n - `branch.pr-<number>.ompPrMaintainerCanModify`\n- If `refs/heads/pr-<number>` already exists at a different commit, checkout fails unless `force=true`, in which case `git branch --force` resets it to the fetched PR head.\n- If a matching worktree already exists, the tool reuses it and reports `reused: true`.\n\n### `pr_push`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` |\n| Optional fields | `branch`, `forceWithLease` |\n| `gh` command | None. This path uses git, not `gh`. |\n| Batching | None |\n| Output | `# Pushed Pull Request Branch` summary with local branch, remote, remote branch, remote URL, PR URL, and force-with-lease flag. `sourceUrl = prUrl` when known. |\n\nPush target resolution reads the `branch.<name>.ompPrHeadRef`, `pushRemote`/`remote`, `ompPrUrl`, `ompPrMaintainerCanModify`, and `ompPrIsCrossRepository` git-config keys written by `pr_checkout`. If the current checked-out branch matches the target branch, the source ref is `HEAD`; otherwise it pushes `refs/heads/<branch>`. The refspec is `HEAD:refs/heads/<headRef>` or `refs/heads/<branch>:refs/heads/<headRef>`.\n\n### `search_issues`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op`, `query` |\n| Optional fields | `repo`, `limit` |\n| `gh` command | `gh api -X GET /search/issues -f q=\"<query> [repo:<repo>] is:issue\" -F per_page=<limit>` |\n| Batching | None |\n| Output | `# GitHub issues search`, echoed query, optional repo, result count, then one bullet per issue with repo/state/author/labels/timestamps/URL. |\n\n`repo` defaults to the current checkout's `owner/repo` via `resolveSearchRepoScope()` when omitted. The default is suppressed when the query already contains a leading `repo:`/`org:`/`user:`/`owner:` qualifier or when `gh repo view` fails to resolve the current checkout (e.g. outside a github remote).\n\n### `search_prs`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op`, `query` |\n| Optional fields | `repo`, `limit` |\n| `gh` command | `gh api -X GET /search/issues -f q=\"<query> [repo:<repo>] is:pr\" -F per_page=<limit>` |\n| Batching | None |\n| Output | Same shape as `search_issues`, labeled as pull requests. |\n\n`repo` defaults to the current checkout's `owner/repo` as in `search_issues`.\n\n### `search_code`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op`, `query` |\n| Optional fields | `repo`, `limit` |\n| `gh` command | `gh api -X GET /search/code -f q=\"<query> [repo:<repo>]\" -F per_page=<limit> -H \"Accept: application/vnd.github.text-match+json\"` |\n| Batching | None |\n| Output | `# GitHub code search`, result count, then one bullet per match with path, repo, short commit SHA, URL, and first normalized text-match fragment line when present. |\n\n`repo` defaults to the current checkout's `owner/repo` as in `search_issues`.\n\n### `search_commits`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op`, `query` |\n| Optional fields | `repo`, `limit` |\n| `gh` command | `gh api -X GET /search/commits -f q=\"<query> [repo:<repo>]\" -F per_page=<limit>` |\n| Batching | None |\n| Output | `# GitHub commits search`, result count, then one bullet per commit: short SHA + first commit-message line, repo, author, date, URL. |\n\n`repo` defaults to the current checkout's `owner/repo` as in `search_issues`.\n\n### `search_repos`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op`, `query` |\n| Optional fields | `limit` |\n| `gh` command | `gh api -X GET /search/repositories -f q=\"<query>\" -F per_page=<limit>` |\n| Batching | None |\n| Output | `# GitHub repositories search`, result count, then one bullet per repo with first description line, language, stars, forks, open issues, visibility, archive/fork flags, updated time, URL. |\n\n`repo` is intentionally not used for this op.\n\n### `run_watch`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` |\n| Optional fields | `repo`, `branch`, `run`, `tail` |\n| `gh` command | Repo resolution: `gh repo view --json nameWithOwner -q .nameWithOwner` when `repo` and run URL repo are both absent. Single-run mode uses `gh api --method GET /repos/<repo>/actions/runs/<runId>` and `gh api --method GET /repos/<repo>/actions/runs/<runId>/jobs`. Commit mode uses `gh api --method GET /repos/<repo>/branches/<branch>`, `gh api --method GET /repos/<repo>/actions/runs`, `gh api --method GET /repos/<repo>/actions/runs/<runId>/jobs`, and `gh api /repos/<repo>/actions/jobs/<jobId>/logs` for failed jobs. |\n| Batching | Implicit batching only in commit mode: all workflow runs for one commit are tracked together. |\n| Output | Streaming watch snapshots via `onUpdate`, then a final text report. On failure, appends `Full failed-job logs: artifact://<id>` and sets `details.artifactId`. |\n\nWatch flow:\n- `run` parsing accepts either a decimal run ID or a full run URL. URL repo must match explicit `repo` when both are given.\n- Poll interval is fixed at 3 seconds (`RUN_WATCH_INTERVAL_DEFAULT`).\n- Failure grace period is fixed at 5 seconds (`RUN_WATCH_GRACE_DEFAULT`). When any failed job appears before completion, the tool emits a note, waits once, re-fetches state, then collects logs so concurrent failures are included.\n- Failed-job logs are fetched with `gh api /repos/<repo>/actions/jobs/<jobId>/logs` via `git.github.run()`, not `json()`. Non-zero exit leaves `available: false` instead of failing the whole watch.\n- Inline result includes only the last `tail` lines per failed job. The saved artifact contains full logs (`mode: \"full\"`).\n- In commit mode, success is intentionally double-checked: once all known runs are successful, the tool waits one more poll interval and succeeds only if the set of run IDs is unchanged. This avoids returning before late workflow runs appear for the same commit.\n- `details.watch` drives a specialized renderer in `packages/coding-agent/src/tools/gh-renderer.ts`; non-watch results fall back to generic text rendering.\n\n## Side Effects\n- Filesystem\n - `pr_create` may create a temp dir under `os.tmpdir()` named `gh-pr-body-*`, write `body.md`, then remove the dir in `finally`.\n - `pr_checkout` may create directories under `~/.omp/wt/<encoded-primary-repo-root>/` and add git worktrees there.\n - `run_watch` may write a session artifact with full failed-job logs.\n- Network\n - Every op shells out to `gh`, which then talks to GitHub APIs except `pr_push`.\n - `pr_push` uses git network transport to the configured remote.\n- Subprocesses / native bindings\n - All `gh` calls use `Bun.spawn([\"gh\", ...args])`.\n - `pr_checkout` and `pr_push` also invoke git helpers from `packages/coding-agent/src/utils/git.ts`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - `run_watch` consumes `session.allocateOutputArtifact()` when failed-job logs are persisted.\n - Returned `details` objects carry run/checkouts metadata for the renderer/UI.\n- User-visible prompts / interactive UI\n - `gh` interactive editor fallback is suppressed for `pr_create` by forcing either `--body-file` or `--body \"\"`.\n - `gh-renderer` provides compact headers for all ops and a custom live watch view for `run_watch`.\n- Background work / cancellation\n - `run_watch` loops until success/failure and uses `scheduler.wait()` between polls.\n - `GithubTool.execute()` is wrapped in `untilAborted()`; `git.github.run()` forwards the abort signal into `Bun.spawn()`.\n\n## Limits & Caps\n- Search result default: `10` (`SEARCH_LIMIT_DEFAULT` in `packages/coding-agent/src/tools/gh.ts`).\n- Search result max: `50` (`SEARCH_LIMIT_MAX`).\n- PR file preview inside the `pr://` view: first `50` files only (`FILE_PREVIEW_LIMIT` in `gh.ts`).\n- Run-watch poll interval: `3s` (`RUN_WATCH_INTERVAL_DEFAULT`).\n- Run-watch failure grace period: `5s` (`RUN_WATCH_GRACE_DEFAULT`).\n- Run-watch failed-log tail default: `15` lines (`RUN_WATCH_TAIL_DEFAULT`).\n- Run-watch failed-log tail max: `200` lines (`RUN_WATCH_TAIL_MAX`).\n- PR review comments page size: `100` (`REVIEW_COMMENTS_PAGE_SIZE`).\n- Actions jobs page size: `100` (`RUN_JOBS_PAGE_SIZE`).\n- Search and tail numeric inputs are floored with `Math.floor()`, clamped to the max, and rejected when non-finite or `<= 0`.\n- `pr_checkout` batch fan-out is unbounded in tool code; all requested PRs are launched with `Promise.all()`.\n\n## Errors\n- Tool creation is skipped entirely when `gh` is not installed.\n- `git.github.run()` throws `ToolError(\"GitHub CLI (gh) is not installed...\")` if `gh` is missing at execution time.\n- `git.github.text/json()` map common failures to model-facing messages:\n - not authenticated → `GitHub CLI is not authenticated. Run \\`gh auth login\\`.`\n - missing repo context without explicit `repo` → `GitHub repository context is unavailable. Pass \\`repo\\` explicitly or run the tool inside a GitHub checkout.`\n - otherwise stderr/stdout text, or fallback `GitHub CLI command failed: gh ...`\n- `json()` also throws on empty stdout or invalid JSON.\n- Local validation errors throw `ToolError`, including:\n - missing required per-op fields (`query`, `title unless fill=true`)\n - invalid numeric `limit` / `tail`\n - invalid `run` format\n - `fill` combined with `title` or `body`\n - missing git repo / branch / HEAD context for checkout, push, or watch\n - `pr_push` on a branch without `ompPrHeadRef` metadata\n - conflicting existing worktree path or branch without `force`\n- `run_watch` treats failed-job log fetches specially: missing log content does not fail the watch; it marks that log `available: false` and prints `Log tail unavailable.` / `Full log unavailable.`.\n- `pr_create` swallows only the post-create best-effort `gh pr view` refresh; the create step itself still fails normally.\n\n## Notes\n- `appendRepoFlag()` intentionally skips `--repo` when the identifier argument is already a full GitHub URL; that lets `gh` derive repo/number from the URL.\n- `normalizePrIdentifierList()` accepts `reviewer`, `assignee`, and `label` arrays too; the helper name is broader than its callers.\n- `pr_push` depends on `pr_checkout` having run first for that local branch; there is no alternate metadata source.\n- `pr_checkout` stores push metadata in branch config, not in the worktree directory. Reusing the same `pr-<number>` branch reuses those config keys.\n- Worktree write serialization is keyed by the primary repo root, not the current worktree path, because git worktrees share `.git/config`, `packed-refs`, commit-graph, and worktree metadata files.\n- `search_repos` is the only search op that never forwards `repo`; repository scoping must be expressed in the query itself.\n- `run_watch` success on commit mode means “all observed runs succeeded and no additional runs appeared one poll later”, not merely “latest poll looked green”.\n- The TUI renderer collapses failed log previews unless the result view is expanded; the underlying text result still contains the same tailed lines plus any artifact reference.\n",
@@ -94,7 +94,7 @@ export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
94
94
  "tools/task.md": "# task\n\n> Launch subagents for parallel, optionally isolated work.\n\n## Source\n- Entry: `packages/coding-agent/src/task/index.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/task.md`\n- Key collaborators:\n - `packages/coding-agent/src/task/types.ts` — dynamic schema, progress/result types, output caps.\n - `packages/coding-agent/src/task/discovery.ts` — discover project/user/plugin/bundled agents.\n - `packages/coding-agent/src/task/agents.ts` — bundled agent definitions and frontmatter parsing.\n - `packages/coding-agent/src/task/executor.ts` — create child sessions, run subagents, collect output.\n - `packages/coding-agent/src/task/parallel.ts` — concurrency-limited scheduling and async semaphore.\n - `packages/coding-agent/src/task/isolation-backend.ts` — isolation backend resolution and platform fallback.\n - `packages/coding-agent/src/task/worktree.ts` — worktree / FUSE / ProjFS setup, patch capture, branch merge.\n - `packages/coding-agent/src/task/output-manager.ts` — session-scoped `agent://` id allocation.\n - `packages/coding-agent/src/task/simple-mode.ts` — `default` / `schema-free` / `independent` field gating.\n - `packages/coding-agent/src/internal-urls/agent-protocol.ts` — resolve `agent://<id>` to saved subagent output.\n - `packages/coding-agent/src/tools/index.ts` — tool registration and recursion-depth gating.\n - `packages/coding-agent/src/sdk.ts` — child-session router/tool wiring and per-subagent `AgentOutputManager`.\n - `docs/task-agent-discovery.md` — deeper discovery and precedence notes.\n - `docs/handoff-generation-pipeline.md` — session artifact/handoff persistence patterns used by the wider session layer.\n\n## Inputs\n\n### Default mode (`task.simple = \"default\"`)\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `agent` | `string` | Yes | Exact agent name for every task item. Resolved at execution time through `discoverAgents(...)`. |\n| `tasks` | `Array<{ id: string; description: string; assignment: string }>` | Yes | Batch of small, self-contained task items. `id` max length 48 in schema; duplicate ids are rejected case-insensitively at runtime. |\n| `context` | `string` | No | Shared background prepended to every subagent system prompt. Trimmed before use. |\n| `schema` | `string` | No | JSON-encoded JTD schema. Overrides agent/session output schema when this mode allows task-level schemas. |\n| `isolated` | `boolean` | No | Only present when the tool is created with isolation enabled. Requests isolated execution for the whole batch. |\n\n`tasks[].description` is UI-only. `tasks[].assignment` is the actual per-task instruction.\n\n### Schema-free mode (`task.simple = \"schema-free\"`)\n\nSame as default, except `schema` is rejected by `validateTaskModeParams(...)` in `packages/coding-agent/src/task/index.ts`.\n\n### Independent mode (`task.simple = \"independent\"`)\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `agent` | `string` | Yes | Exact agent name. |\n| `tasks` | `Array<{ id: string; description: string; assignment: string }>` | Yes | Same item shape, but each `assignment` must carry all required background because shared `context` is disabled. |\n| `isolated` | `boolean` | No | Same conditional field as above. |\n\nIn this mode both `context` and `schema` are rejected.\n\n## Outputs\nThe tool returns one text block plus `details: TaskToolDetails`.\n\n`details` fields:\n- `projectAgentsDir: string | null` — nearest discovered project `agents/` dir.\n- `results: SingleResult[]` — one entry per task in input order for synchronous execution; empty for async-launch responses.\n- `totalDurationMs: number`\n- `usage?: Usage` — sum of per-subagent assistant-message usage.\n- `outputPaths?: string[]` — written `.md` artifact paths for completed subagent outputs.\n- `progress?: AgentProgress[]` — live or final per-task progress snapshots.\n- `async?: { state: \"running\" | \"completed\" | \"failed\"; jobId: string; type: \"task\" }` — present for background execution updates/results.\n\n`SingleResult` includes:\n- identity: `index`, `id`, `agent`, `agentSource`, `description`, optional `assignment`\n- status: `exitCode`, optional `error`, optional `aborted`, optional `abortReason`\n- output: `output`, `stderr`, `truncated`, `durationMs`, `tokens`\n- artifact metadata: `outputPath?`, `patchPath?`, `branchName?`, `nestedPatches?`, `outputMeta?`\n- extracted tool data: `extractedToolData?` from registered subprocess tool handlers such as `yield` and `report_finding`\n\nArtifacts and side channels:\n- Every subagent with an artifacts dir writes `<id>.md`; `agent://<id>` resolves to that file.\n- If the output file is JSON, `agent://<id>/<path>` and `agent://<id>?q=<query>` perform JSON extraction in `packages/coding-agent/src/internal-urls/agent-protocol.ts`.\n- When the parent session persists artifacts, each subagent also gets `<id>.jsonl` session history.\n- Isolated patch mode writes `<id>.patch` per successful task before merge.\n- Async mode returns immediately after job registration, then emits `onUpdate(...)` progress snapshots and later hands completion to the session async-job pipeline.\n\n## Flow\n1. `TaskTool.create(...)` in `packages/coding-agent/src/task/index.ts` calls `discoverAgents(session.cwd)` once to build the dynamic prompt description from current agents and `task.simple` capabilities.\n2. `execute(...)` validates mode-gated fields with `validateTaskModeParams(...)`.\n3. It decides async vs sync:\n - sync when `async.enabled` is false\n - sync when the selected cached agent has `blocking === true`\n - sync when `tasks.length === 0`\n - otherwise async job scheduling\n4. Async path:\n - allocate unique output ids with `AgentOutputManager.allocateBatch(...)`\n - create one async job per task through `session.asyncJobManager.register(...)`\n - limit concurrent job bodies with `Semaphore(task.maxConcurrency)` from `packages/coding-agent/src/task/parallel.ts`\n - each job body calls `#executeSync(...)` with a one-task batch and the preallocated id\n - `onUpdate(...)` emits aggregate `progress` snapshots and `details.async`\n5. Sync path (`#executeSync(...)`) rediscovers agents from disk via `discoverAgents(...)`, so runtime resolution can differ from the earlier prompt description.\n6. It resolves the requested agent with `getAgent(...)`, rejects unknown or disabled agents, and enforces parent spawn policy plus `PI_BLOCKED_AGENT` self-recursion prevention.\n7. It derives the effective output schema in priority order: task call `schema` (if allowed) → agent frontmatter `output` → inherited parent session schema.\n8. It validates task ids: missing ids and case-insensitive duplicates are immediate errors.\n9. If `isolated` was requested, it requires a git repo (`getRepoRoot(...)` / `captureBaseline(...)`) and resolves the actual backend through `resolveIsolationBackendForTaskExecution(...)`.\n10. It chooses an artifacts dir from the parent session when available, otherwise a temp dir, and writes `context.md` there when `session.getCompactContext?.()` returns content.\n11. It allocates unique ids again if the caller did not preallocate them, then builds `tasksWithUniqueIds`.\n12. For each task, it seeds an `AgentProgress` entry and runs `runTask(...)` through `mapWithConcurrencyLimit(...)` using `task.maxConcurrency`.\n13. Non-isolated `runTask(...)` calls `runSubprocess(...)` directly with parent cwd.\n14. Isolated `runTask(...)`:\n - creates an isolation workspace (`ensureWorktree(...)`, `ensureFuseOverlay(...)`, or `ensureProjfsOverlay(...)`)\n - applies the captured baseline for worktrees\n - runs `runSubprocess(...)` inside that workspace\n - on success, either commits to a per-task branch (`mergeMode === \"branch\"`) or captures a patch with `captureDeltaPatch(...)`\n - always cleans up the isolation workspace/backend\n15. `runSubprocess(...)` in `packages/coding-agent/src/task/executor.ts` creates a child agent session with:\n - isolated settings snapshot via `Settings.isolated(...)`, forcing `async.enabled = false` and `bash.autoBackground.enabled = false`\n - child `agentId` / `parentTaskPrefix` equal to the allocated task id\n - child internal URL router and `AgentOutputManager` from `packages/coding-agent/src/sdk.ts`\n - the shared `context`, optional `context.md` reference, optional isolation worktree path, output schema, and IRC peer roster in the system prompt template\n16. Child tool availability is derived from the agent definition plus runtime guards:\n - explicit `agent.tools` if provided\n - auto-add `task` when the agent has `spawns` and recursion depth allows it\n - remove `task` at or past `task.maxRecursionDepth`\n - expand `exec` to `eval` and `bash`\n - strip parent-owned `todo_write` after session creation\n17. `runSubprocess(...)` subscribes to child agent events, coalesces progress updates every 150 ms, forwards lifecycle/progress events on the parent event bus, and extracts tool data through `subprocessToolRegistry`.\n18. The child must finish through the hidden `yield` tool. If it does not, `runSubprocess(...)` sends up to 3 reminder prompts; the last reminder forces `toolChoice = yield` when supported.\n19. Finalization uses `finalizeSubprocessOutput(...)` to reconcile raw assistant text, `yield` payloads, structured schemas, `report_finding` data, and abort states. Output is truncated with `MAX_OUTPUT_BYTES` / `MAX_OUTPUT_LINES` before returning to the parent, but the full raw output is still written to `<id>.md`.\n20. After all sync tasks finish, `#executeSync(...)` aggregates usage, collects artifact paths, and if isolation was used merges results back:\n - branch mode: cherry-pick per-task branches with `mergeTaskBranches(...)`, then delete merged branches with `cleanupTaskBranches(...)`\n - patch mode: combine non-empty patch artifacts, dry-check with `git.patch.canApplyText(...)`, then apply or leave manual artifacts\n - nested repo patches are applied separately with `applyNestedPatches(...)`\n21. The final text summary is rendered from `packages/coding-agent/src/prompts/tools/task-summary.md` and includes `agent://<id>` handles for outputs that exist.\n\n## Modes / Variants\n- Execution mode\n - Sync inline execution — default path.\n - Async background execution — one async job per task item when `async.enabled` is on and the chosen agent is not marked `blocking`.\n- Simple mode\n - `default` — accepts shared `context` and per-call `schema`.\n - `schema-free` — accepts `context`, rejects `schema`.\n - `independent` — rejects `context` and `schema`; each assignment stands alone.\n- Isolation backend\n - `none` — no isolation.\n - `worktree` — detached git worktree plus baseline replay.\n - `fuse-overlay` — Unix FUSE overlay mount.\n - `fuse-projfs` — Windows ProjFS overlay.\n- Isolation merge strategy\n - Patch mode — capture/apply root patches, keep patch artifacts when application fails.\n - Branch mode — commit each task onto `omp/task/<id>` branch, cherry-pick into parent, preserve failed branches for manual resolution.\n- Agent source\n - Project custom agents — nearest project config/plugin agent directories, first by source-family precedence.\n - User custom agents — user config/plugin agent directories after project dirs of the same source family.\n - Bundled agents — appended last from `packages/coding-agent/src/task/agents.ts`.\n- Bundled agent types\n - `explore` — read-only scout with structured handoff output.\n - `plan` — architecture/planning agent; may spawn `explore`.\n - `designer` — UI/UX specialist.\n - `reviewer` — review agent with `report_finding` extraction.\n - `task` — general-purpose worker with full capabilities.\n - `quick_task` — low-reasoning mechanical worker using the same task prompt body.\n - `librarian` — source-grounded external API/library researcher.\n\n## Side Effects\n- Filesystem\n - Writes `context.md`, `<id>.jsonl`, and `<id>.md` under the session artifacts dir or a temp task dir.\n - In isolated patch mode writes `<id>.patch` artifacts.\n - Creates/removes worktrees or overlay mount directories.\n - In branch mode creates temporary worktrees and task branches.\n- Network\n - Child sessions may use whichever networked tools/models their active tool set permits.\n - MCP proxy tools can call existing parent MCP connections with a 60_000 ms timeout.\n- Subprocesses / native bindings\n - `fuse-overlayfs` and `fusermount`/`fusermount3` for FUSE isolation.\n - ProjFS native bindings via `@oh-my-pi/pi-natives` on Windows.\n - Git operations for baseline capture, patch apply, worktrees, branches, stash, cherry-pick, commits.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Creates child `AgentSession` instances with isolated settings snapshots.\n - Registers async jobs in `session.asyncJobManager` for background task mode.\n - Emits `task:subagent:event`, `task:subagent:progress`, and `task:subagent:lifecycle` on the parent event bus.\n - Allocates session-scoped output ids through `AgentOutputManager` so `agent://` remains unique across invocations and resumes.\n - Shares the parent `local://` root with subagents by passing `localProtocolOptions` through `createAgentSession(...)`.\n- User-visible prompts / interactive UI\n - Async mode streams aggregate progress updates.\n - Missing-`yield` recovery sends up to three internal reminder prompts to the child session.\n - Final summaries include `<system-notification>` blocks for isolation fallbacks or merge failures.\n- Background work / cancellation\n - Parent abort stops scheduling new work, aborts active child sessions, and marks unscheduled tasks as skipped.\n - Async jobs keep their own cancellation via `AsyncJobManager`.\n\n## Limits & Caps\n- Per-subagent output truncation: `MAX_OUTPUT_BYTES = 500_000` and `MAX_OUTPUT_LINES = 5000` in `packages/coding-agent/src/task/types.ts`. Full raw output is still written to `<id>.md` before truncation is returned to the caller.\n- Progress coalescing in child execution: `PROGRESS_COALESCE_MS = 150` in `packages/coding-agent/src/task/executor.ts`.\n- Recent output tail for progress: `RECENT_OUTPUT_TAIL_BYTES = 8 * 1024` and `recentOutput` keeps the last 8 non-empty lines in `packages/coding-agent/src/task/executor.ts`.\n- Missing-`yield` reminder retries: `MAX_YIELD_RETRIES = 3` in `packages/coding-agent/src/task/executor.ts`.\n- MCP proxy timeout: `MCP_CALL_TIMEOUT_MS = 60_000` in `packages/coding-agent/src/task/executor.ts`.\n- Task id schema cap: `tasks[].id` `maxLength: 48` in `packages/coding-agent/src/task/types.ts`.\n- Prompt text says ids should be `≤32` chars, but the runtime schema allows 48; this mismatch is real.\n- Async/full sync parallelism both use `task.maxConcurrency` from settings:\n - sync path: `mapWithConcurrencyLimit(...)`\n - async path: `Semaphore(...)` around job bodies\n- Recursion depth gate: `task.maxRecursionDepth` from settings; `packages/coding-agent/src/tools/index.ts` hides the `task` tool at or beyond the limit, and `runSubprocess(...)` also strips child `task` access at max depth.\n- Final inline summary preview per task uses `fullOutputThreshold = 5000` chars in `packages/coding-agent/src/task/index.ts`; longer outputs are summarized while `agent://<id>` points to the full artifact.\n\n## Errors\n- Most validation failures are returned as normal tool text with empty `results`, not thrown:\n - invalid simple-mode fields\n - unknown/disabled agent\n - missing tasks\n - missing/duplicate task ids\n - spawn-policy denial\n - requesting `isolated` while isolation mode is `none`\n- Isolated execution without a git repo returns `Isolated task execution requires a git repository. ...`.\n- Backend resolution can return a hard error (`ProjFS isolation initialization failed...`) or a non-fatal warning with fallback to `worktree`.\n- `mapWithConcurrencyLimit(...)` fails fast on non-abort worker exceptions; already completed results are preserved only in the thrown path’s local state, not surfaced unless the caller catches and converts them.\n- Child-session failures surface as `SingleResult.exitCode = 1` with `stderr`/`error` populated.\n- If the child omits `yield`, `finalizeSubprocessOutput(...)` injects warnings such as `SYSTEM WARNING: Subagent exited without calling yield tool after 3 reminders.`\n- Async scheduling failures are accumulated per task; if no jobs start, the tool returns `Failed to start background task jobs: ...`.\n- `agent://<id>` resolution errors are model-visible when another tool reads them: no session, no artifacts dir, missing id, conflicting extraction syntax, or invalid JSON for extraction.\n\n## Notes\n- Agent discovery precedence is first-wins by exact name: project dirs before user dirs within a source family, plugin agent dirs after config dirs, bundled agents last. See `packages/coding-agent/src/task/discovery.ts` and `docs/task-agent-discovery.md`.\n- `TaskTool.create(...)` caches discovered agents only for description rendering and the async blocking-agent decision. `#executeSync(...)` rediscovers agents each call.\n- Custom agent frontmatter can override bundled agents by name. Bundled definitions are embedded at build time in `packages/coding-agent/src/task/agents.ts`.\n- Child sessions do not inherit conversation history automatically. The only built-in carry-over is shared `context`, optional `context.md`, workspace tree/skills/context files, and shared `local://` root.\n- `Settings.isolated(...)` gives each child a session-isolated settings snapshot; tool enablement is recomputed inside the child session rather than sharing mutable parent tool state.\n- When the parent passes `mcpManager`, child sessions disable standalone MCP discovery and instead get proxy tools that reuse the parent connections.\n- Plan mode mutates an `effectiveAgent` with a read-only tool subset and plan-mode prompt text, but `runSubprocess(...)` is still invoked with `agent` rather than `effectiveAgent`. Model/thinking/schema overrides use the effective agent; prompt/tool/spawn restrictions do not fully flow through this call path.\n- Branch-mode merge temporarily stashes the parent repo before cherry-picking task branches. A stash-pop conflict is treated as merge failure and leaves recovery state behind.\n- Patch-mode only applies combined root patches if every successful task produced a patch and `git.patch.canApplyText(...)` succeeds.\n- Nested git repos are handled separately from the root repo. They are copied into isolated worktrees, diffed independently, and merged later with `applyNestedPatches(...)` because parent git cannot track their file-level changes.\n- `agent://` ids are numeric-prefixed (`0-Task`, `1-Task`, nested like `0-Parent.0-Child`) by `AgentOutputManager`; this is what prevents artifact collisions across repeated or nested task invocations.\n",
95
95
  "tools/todo_write.md": "# todo_write\n\n> Applies ordered mutations to the session todo list and returns a text summary plus the full phase/task state.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/todo-write.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/todo-write.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/index.ts` — registers tool, exposes session hooks, gates availability.\n - `packages/coding-agent/src/modes/controllers/event-controller.ts` — updates the visible todo UI on tool completion.\n - `packages/coding-agent/src/session/agent-session.ts` — stores cached phases, auto-clears done/dropped tasks, emits failure reminders.\n - `packages/coding-agent/src/modes/controllers/todo-command-controller.ts` — `/todo` command path, custom-entry persistence, transcript reminder injection.\n - `packages/coding-agent/src/tools/render-utils.ts` — collapsed-preview cap for renderer trees.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `ops` | `TodoOpEntry[]` | Yes | Ordered operations to apply. `minItems: 1`.\n\n### `TodoOpEntry`\n\n| Op | Required fields | Optional fields | Effect |\n| --- | --- | --- | --- |\n| `init` | `list` | None of the other fields are used | Replaces the entire list with `list`; every new task starts `pending` before normalization. |\n| `start` | `task` | None | Marks one task `in_progress`; any other `in_progress` task is demoted to `pending`. |\n| `done` | `task` or `phase` or neither | None | Marks the target task, phase, or all tasks `completed`. |\n| `drop` | `task` or `phase` or neither | None | Marks the target task, phase, or all tasks `abandoned`. |\n| `rm` | `task` or `phase` or neither | None | Removes the target task, clears the phase's task list, or clears all task lists. |\n| `append` | `phase`, `items` | None | Appends new `pending` tasks to a phase; creates the phase if missing. |\n| `note` | `task`, `text` | None | Appends one trimmed note string to the task's `notes` array. |\n\n### Fields used inside ops\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `op` | `\"init\" | \"start\" | \"done\" | \"rm\" | \"drop\" | \"append\" | \"note\"` | Yes | Operation discriminator. |\n| `list` | `{ phase: string; items: string[] }[]` | For `init` | Full replacement payload. Each `items` array has `minItems: 1`. |\n| `task` | `string` | For `start`; for task-targeted `done`/`drop`/`rm`/`note` | Exact task content match. |\n| `phase` | `string` | For `append`; for phase-targeted `done`/`drop`/`rm` | Exact phase name match, except `append` lazily creates a missing phase. |\n| `items` | `string[]` | For `append` | Tasks to append. `minItems: 1`. |\n| `text` | `string` | For `note` | Note text; trailing whitespace is stripped before storing. Empty-after-trim is rejected. |\n\n## Outputs\nThe tool returns a single-shot `AgentToolResult`:\n\n- `content`: one text part containing the summary from `formatSummary(...)`.\n - Empty final state with no errors: `Todo list cleared.`\n - Non-empty final state: remaining-item list, current phase progress, then a per-phase tree.\n - If the active `in_progress` task has notes, the summary includes the note bodies inline.\n - If any op produced validation/runtime errors, the summary starts with `Errors: ...` but still returns the mutated state.\n- `details`:\n - `phases: TodoPhase[]`\n - `storage: \"session\" | \"memory\"`\n\n`TodoPhase` / `TodoItem` state model:\n\n- `TodoPhase`: `{ name: string, tasks: TodoItem[] }`\n- `TodoItem`: `{ content: string, status: \"pending\" | \"in_progress\" | \"completed\" | \"abandoned\", notes?: string[] }`\n\nThe TUI renderer (`todoWriteToolRenderer`) merges call and result into one transcript block, renders phases as a tree, shows note counts as superscripts, and renders the note bodies only for the current `in_progress` task. Collapsed transcript previews cap tree items at `PREVIEW_LIMITS.COLLAPSED_ITEMS` (`8`).\n\n## Flow\n1. `TodoWriteTool.execute(...)` clones the current cached phases from `session.getTodoPhases?.() ?? []` (`packages/coding-agent/src/tools/todo-write.ts`).\n2. `applyParams(...)` walks `params.ops` in order and applies each entry with `applyEntry(...)`.\n3. Each op mutates the working phase array:\n - `initPhases(...)` rebuilds the list from scratch.\n - `start` resolves a task by exact `content`, demotes every other `in_progress` task to `pending`, then marks the target `in_progress`.\n - `done` / `drop` use `getTaskTargets(...)` to target one task, one phase, or every task.\n - `rm` removes one task, clears one phase's `tasks`, or clears all phases' task arrays.\n - `appendItems(...)` resolves or creates the target phase and pushes new `pending` tasks unless the same task content already exists anywhere.\n - `note` trims trailing whitespace, rejects empty text, and appends the note to `task.notes`.\n4. Missing task/phase references are recorded in an `errors` array by `resolveTaskOrError(...)` / `resolvePhaseOrError(...)`; execution continues through the rest of the batch.\n5. After the full batch, `normalizeInProgressTask(...)` enforces the single-active-task invariant:\n - if multiple tasks are `in_progress`, only the first stays active and the rest become `pending`;\n - if none are `in_progress`, the first `pending` task in phase/task order is auto-promoted to `in_progress`.\n6. `execute(...)` stores the normalized phases with `session.setTodoPhases?.(...)` and reports `storage` as `\"session\"` when `session.getSessionFile()` exists, else `\"memory\"`.\n7. The agent runtime also watches `todo_write` tool results in `packages/coding-agent/src/session/agent-session.ts`; successful results refresh cached todos, failed results inject a hidden next-turn reminder telling the model that todo progress is not visible until it retries.\n8. The event controller updates the visible todo UI from `result.details.phases` on success, or shows a warning on error (`packages/coding-agent/src/modes/controllers/event-controller.ts`).\n\n## Modes / Variants\n### State transitions\n\n| Current status | `start` | `done` | `drop` | `rm` | `append` | `note` |\n| --- | --- | --- | --- | --- | --- | --- |\n| `pending` | `in_progress` on target | `completed` | `abandoned` | Removed | New tasks enter as `pending` | No status change |\n| `in_progress` | Target stays `in_progress`; non-target active tasks become `pending` | `completed` | `abandoned` | Removed | No status change | No status change |\n| `completed` | Can be set back to `in_progress` if targeted | Stays `completed` | Becomes `abandoned` if targeted | Removed | No status change | No status change |\n| `abandoned` | Can be set back to `in_progress` if targeted | Becomes `completed` if targeted | Stays `abandoned` | Removed | No status change | No status change |\n\nNormalization then re-applies the single-active-task rule after the full op batch.\n\n### Op targeting rules\n- `done`, `drop`, `rm`:\n - `task` set: affect one exact-content task.\n - else `phase` set: affect every task in that exact-name phase.\n - else: affect every task in every phase.\n- `append` is the only op that creates a missing phase.\n- `note` only targets a single task.\n- `init` discards previous phases entirely.\n\n### Markdown round-trip helpers\nThe same file also exposes non-tool helpers used by `/todo`:\n- `phasesToMarkdown(...)` serializes phases as headings plus checklist items (`[ ]`, `[/]`, `[x]`, `[-]`) with blockquote note bodies.\n- `markdownToPhases(...)` parses that format, defaults orphan tasks into a `Todos` phase, accepts `>` as an `in_progress` marker and `~` as `abandoned`, and runs the same normalization step.\n\n## Side Effects\n- Filesystem\n - None in the tool itself.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Mutates the session todo cache through `setTodoPhases`.\n - `storage` reports whether the session has a backing session file, but the tool does not append a custom session entry itself.\n - Successful tool-result messages carry `details.phases`; `getLatestTodoPhasesFromEntries(...)` can reconstruct state later from those transcript entries.\n - Failed `todo_write` results cause `agent-session` to enqueue a hidden next-turn reminder (`customType: \"todo-write-error-reminder\"`).\n- User-visible prompts / interactive UI\n - Transcript block is rendered by `todoWriteToolRenderer` and merged with the call line.\n - `event-controller` updates the visible todo panel from successful results.\n - On error, `event-controller` shows `Todo update failed...`; the visible panel may stay stale until a later successful call.\n- Background work / cancellation\n - `AgentSession.setTodoPhases(...)` schedules auto-clear timers for `completed` / `abandoned` tasks via `tasks.todoClearDelay`.\n\n## Limits & Caps\n- `ops` array: `minItems: 1` (`todoWriteSchema`).\n- `init.list[*].items`: `minItems: 1`.\n- `append.items`: `minItems: 1`.\n- Renderer collapsed preview: `PREVIEW_LIMITS.COLLAPSED_ITEMS = 8` (`packages/coding-agent/src/tools/render-utils.ts`).\n- Auto-clear delay: `tasks.todoClearDelay` default `60` seconds; `< 0` disables auto-clear, `0` clears on the next microtask (`packages/coding-agent/src/session/agent-session.ts`).\n- Tool execution mode: `concurrency = \"exclusive\"`, `strict = true`, `loadMode = \"discoverable\"`.\n\n## Errors\n- The tool does not throw for ordinary bad op payloads; it accumulates human-readable strings in `errors` and still returns success with the mutated state.\n- Error strings come from the helpers in `packages/coding-agent/src/tools/todo-write.ts`, including:\n - `Missing list for init operation`\n - `Missing task content`\n - `Task \"...\" not found` with an extra empty-list hint when applicable\n - `Missing phase name`\n - `Phase \"...\" not found`\n - `Missing phase name for append operation`\n - `Missing items for append operation`\n - `Task \"...\" already exists`\n - `Missing text for note operation`\n- Because ops are processed in order, earlier errors do not roll back later ops.\n- Runtime-level tool failure is handled outside the tool body: `agent-session` injects a hidden reminder and the event controller warns the user that visible progress may be stale.\n- Idempotency is op-specific:\n - `init` is a full replacement; replaying the same payload yields the same state.\n - `start`, `done`, and `drop` are effectively idempotent on an existing target state, but `start` also demotes any other active task.\n - `rm` is not idempotent for targeted removals: the second call errors because the task or phase is gone.\n - `append` is not idempotent: duplicate task content is rejected with `Task \"...\" already exists`.\n - `note` is append-only and never idempotent; replaying it adds another note entry.\n\n## Notes\n- Task lookup is exact string equality inside the tool. The model-facing prompt says task content and phase names are identifiers and should stay unique; `append` enforces task uniqueness globally, but `init` does not validate duplicate task or phase names.\n- `findTaskByContent(...)` returns the first matching task across phases. Duplicate task contents make later targeted ops ambiguous.\n- `normalizeInProgressTask(...)` runs after the whole batch, not after each op. A single call can intentionally build an intermediate invalid state and rely on final normalization.\n- `storage: \"session\"` means the session has a session-file backing; it does not mean this tool wrote a durable custom entry.\n- Reload persistence differs by path:\n - plain `todo_write` calls survive in transcript tool-result details;\n - `/todo` command edits additionally append `customType: \"user_todo_edit\"` entries and inject a visible-to-model `<system-reminder>` developer message describing the manual edit.\n- On session resume, `AgentSession.#syncTodoPhasesFromBranch()` strips `completed` and `abandoned` tasks before restoring the cached list. The `/todo` command works around that by reading the latest transcript/custom-entry state so historical done/dropped tasks still appear to the user.\n- Tool availability is gated by `todo.enabled`, and the registry excludes it when `includeYield` is enabled (`packages/coding-agent/src/tools/index.ts`).\n- Subagents do not inherit `todo_write`; `packages/coding-agent/src/task/executor.ts` filters it out as a parent-owned tool.\n",
96
96
  "tools/web_search.md": "# web_search\n\n> Run one web query through the first available search provider and return LLM-formatted answer, source URLs, and optional citations.\n\n## Source\n- Entry: `packages/coding-agent/src/web/search/index.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/web-search.md`\n- Key collaborators:\n - `packages/coding-agent/src/web/search/provider.ts` — lazy provider registry; availability chain.\n - `packages/coding-agent/src/web/search/types.ts` — unified `SearchResponse` / `SearchProviderError` types.\n - `packages/coding-agent/src/web/search/render.ts` — TUI renderer details type.\n - `packages/coding-agent/src/web/search/providers/base.ts` — provider interface and shared params contract.\n - `packages/coding-agent/src/web/search/providers/utils.ts` — credential lookup; source normalization.\n - `packages/coding-agent/src/web/search/providers/anthropic.ts` — Claude web-search provider.\n - `packages/coding-agent/src/web/search/providers/brave.ts` — Brave Search API adapter.\n - `packages/coding-agent/src/web/search/providers/codex.ts` — OpenAI Codex SSE adapter.\n - `packages/coding-agent/src/web/search/providers/exa.ts` — Exa API or MCP adapter.\n - `packages/coding-agent/src/web/search/providers/gemini.ts` — Gemini grounding SSE adapter.\n - `packages/coding-agent/src/web/search/providers/jina.ts` — Jina Reader search adapter.\n - `packages/coding-agent/src/web/search/providers/kagi.ts` — Kagi provider wrapper.\n - `packages/coding-agent/src/web/search/providers/kimi.ts` — Kimi search adapter.\n - `packages/coding-agent/src/web/search/providers/parallel.ts` — Parallel provider wrapper.\n - `packages/coding-agent/src/web/search/providers/perplexity.ts` — Perplexity API / OAuth adapter.\n - `packages/coding-agent/src/web/search/providers/searxng.ts` — self-hosted SearXNG adapter.\n - `packages/coding-agent/src/web/search/providers/synthetic.ts` — Synthetic search adapter.\n - `packages/coding-agent/src/web/search/providers/tavily.ts` — Tavily search adapter.\n - `packages/coding-agent/src/web/search/providers/zai.ts` — Z.AI remote MCP adapter.\n - `packages/coding-agent/src/web/parallel.ts` — Parallel search/extract HTTP client.\n - `packages/coding-agent/src/web/kagi.ts` — Kagi HTTP client.\n - `packages/coding-agent/src/tools/index.ts` — built-in tool registration and enable flag.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `query` | `string` | Yes | Search query. `executeSearch()` rewrites any `2020`-`2029` substring to the current year before dispatch. |\n| `recency` | `\"day\" \\| \"week\" \\| \"month\" \\| \"year\"` | No | Time filter. Only providers that implement it use it. Prompt text says Brave and Perplexity; code also maps it for Tavily and SearXNG. |\n| `limit` | `number` | No | Max results to return. Usually becomes the provider request's result-count parameter when `num_search_results` is absent. |\n| `max_tokens` | `number` | No | Passed through as `maxOutputTokens` / `max_tokens` only by Anthropic, Gemini, and Perplexity API-key mode. Ignored by the other providers. |\n| `temperature` | `number` | No | Passed through only by Anthropic, Gemini, and Perplexity API-key mode. Ignored by the other providers. |\n| `num_search_results` | `number` | No | Requested upstream search breadth. For most providers this is the same count used for returned sources. Perplexity is the only adapter that keeps it distinct from `limit`. |\n\n## Outputs\nThe tool returns a single text content block plus structured `details`.\n\n- `content`: `[{ type: \"text\", text: string }]`\n- `details`: `SearchRenderDetails` from `packages/coding-agent/src/web/search/render.ts`\n - `response: SearchResponse`\n - `error?: string`\n\n`text` is produced by `formatForLLM()` in `packages/coding-agent/src/web/search/index.ts`:\n\n- If `response.answer` exists, it is emitted first.\n- If sources exist, a `## Sources` section follows with a source count, then one entry per source:\n - `[n] <title> (<formatted age or published date>)`\n - ` <url>`\n - optional snippet line truncated to 240 chars.\n- If citations exist, a `## Citations` section follows with URL/title plus optional cited text truncated to 240 chars.\n- If related questions exist, a `## Related` bullet list follows.\n- If search queries exist, a `Search queries: <n>` section follows, capped to the first 3 queries and 120 chars each.\n\nFailure output is not thrown at the tool boundary when at least one provider was attempted. Instead the tool returns:\n\n- `content[0].text = \"Error: ...\"`\n- `details.response.provider = <last attempted provider> | \"none\"`\n- `details.error = ...`\n\nStreaming: none. `WebSearchTool.execute()` does not forward its `_signal` argument into `executeSearch()`, so provider cancellation is only available to internal callers that place `signal` inside `SearchQueryParams`.\n\n## Flow\n1. `WebSearchTool.execute()` in `packages/coding-agent/src/web/search/index.ts` delegates directly to `executeSearch()`.\n2. `executeSearch()` chooses a provider list:\n - if `params.provider` is set and not `\"auto\"`, it loads that provider with `getSearchProvider()`; if `isAvailable()` returns true, the list is `[that provider]`, otherwise it falls back to `resolveProviderChain(\"auto\")`.\n - otherwise it calls `resolveProviderChain()` with the module-global preferred provider from `packages/coding-agent/src/web/search/provider.ts`.\n3. `resolveProviderChain()` lazily loads each provider module on demand, checks `isAvailable()`, and returns only available providers. If a preferred provider is set, it is tried first, then the static `SEARCH_PROVIDER_ORDER` excluding that provider.\n4. If no providers are available, `executeSearch()` returns `Error: No web search provider configured.` with `details.response.provider = \"none\"`.\n5. For each provider in order, `executeSearch()` calls `provider.search()` with:\n - `query` after year-rewrite,\n - `limit`, `recency`, `temperature`, `maxOutputTokens`, `numSearchResults`,\n - `systemPrompt` from `packages/coding-agent/src/prompts/tools/web-search.md`.\n6. On the first successful `SearchResponse`, `formatForLLM()` renders answer/sources/citations/related/search-queries into one text block and returns it with `details.response`.\n7. If a provider throws, `executeSearch()` records the error and tries the next provider. There is no provider-level parallel fan-out; fallback is sequential.\n8. After all candidates fail, `formatProviderError()` normalizes the last error:\n - Anthropic `404` becomes `Anthropic web search returned 404 (model or endpoint not found).`\n - `401`/`403` become `<Provider> authorization failed ...` except Z.AI, which preserves its raw message.\n - other `SearchProviderError`s surface `error.message`.\n9. If more than one provider was attempted, the final message is `All web search providers failed (<labels>). Last error: <message>`; otherwise it is just the normalized last error.\n\n## Modes / Variants\n- **Provider selection**\n - **Forced provider**: internal callers may pass `provider`; unavailable forced providers fall back to the auto chain instead of hard-failing (`packages/coding-agent/src/web/search/index.ts`). This field is not in the model-facing schema.\n - **Preferred provider**: `setPreferredSearchProvider()` sets a module-global default used by `resolveProviderChain()`. `packages/coding-agent/src/sdk.ts` and `packages/coding-agent/src/modes/controllers/selector-controller.ts` wire this from settings.\n - **Auto chain order**: `tavily`, `perplexity`, `brave`, `jina`, `kimi`, `anthropic`, `gemini`, `codex`, `zai`, `exa`, `parallel`, `kagi`, `synthetic`, `searxng` (`SEARCH_PROVIDER_ORDER` in `packages/coding-agent/src/web/search/provider.ts`).\n- **Provider adapters**\n - **Tavily** — `packages/coding-agent/src/web/search/providers/tavily.ts`\n - Availability: API key from env or `agent.db` via `findCredential()`.\n - Querying: POST `https://api.tavily.com/search`.\n - `recency` maps to Tavily `time_range`; code explicitly keeps `topic` at default general scope instead of narrowing to news.\n - `limit` / `num_search_results`: adapter uses `params.numSearchResults ?? params.limit`, clamped to `5..20` with default `5`.\n - Output: `answer`, `sources`, `requestId`, `authMode: \"api_key\"`.\n - **Perplexity** — `packages/coding-agent/src/web/search/providers/perplexity.ts`\n - Availability: auth precedence is `PERPLEXITY_COOKIES` -> OAuth token in `agent.db` -> `PERPLEXITY_API_KEY` / `PPLX_API_KEY`.\n - OAuth/cookie mode: POSTs to `https://www.perplexity.ai/rest/sse/perplexity_ask`, consumes SSE, merges partial events, extracts answer and source URLs, sets `authMode: \"oauth\"`.\n - API-key mode: POSTs to `https://api.perplexity.ai/chat/completions` with `model: \"sonar-pro\"`, `search_mode: \"web\"`, `num_search_results`, optional `search_recency_filter`, `max_tokens`, `temperature`.\n - `num_search_results` controls upstream API breadth only in API-key mode. `limit` is preserved separately as `num_results` and slices returned `sources` after parsing in both auth modes.\n - Output may include `answer`, `sources`, `citations`, `usage`, `model`, `requestId`, `authMode`.\n - **Brave** — `packages/coding-agent/src/web/search/providers/brave.ts`\n - Availability: `BRAVE_API_KEY` only.\n - Querying: GET `https://api.search.brave.com/res/v1/web/search` with `count`, `extra_snippets=true`, and `freshness=pd|pw|pm|py` for `recency`.\n - `limit` / `num_search_results`: `params.numSearchResults ?? params.limit`, clamped to `1..20`, default `10`.\n - Output: `sources`, `requestId`.\n - **Jina** — `packages/coding-agent/src/web/search/providers/jina.ts`\n - Availability: `JINA_API_KEY` only.\n - Querying: GET-like fetch to `https://s.jina.ai/<encoded query>` with bearer auth.\n - Ignores `recency`, `max_tokens`, and `temperature`.\n - `limit` / `num_search_results`: adapter slices sources to `params.numSearchResults ?? params.limit` when provided; otherwise returns all payload items.\n - Output: `sources` only.\n - **Kimi** — `packages/coding-agent/src/web/search/providers/kimi.ts`\n - Availability: `MOONSHOT_SEARCH_API_KEY`, `KIMI_SEARCH_API_KEY`, `MOONSHOT_API_KEY`, or `agent.db` credentials for `moonshot` / `kimi-code`.\n - Querying: POST to `MOONSHOT_SEARCH_BASE_URL` / `KIMI_SEARCH_BASE_URL` / default `https://api.kimi.com/coding/v1/search` with `text_query`, `limit`, `enable_page_crawling`, `timeout_seconds: 30`.\n - `limit` / `num_search_results`: `params.numSearchResults ?? params.limit`, clamped to `1..20`, default `10`.\n - Output: `sources`, `requestId`.\n - **Anthropic** — `packages/coding-agent/src/web/search/providers/anthropic.ts`\n - Availability: `findAnthropicAuth()` from `@oh-my-pi/pi-ai`.\n - Querying: Claude Messages API with web-search tool enabled.\n - `max_tokens` and `temperature` pass through.\n - `limit` and `num_search_results` are collapsed together before dispatch: `num_results = params.numSearchResults ?? params.limit`.\n - Output may include `answer`, `sources`, `citations`, `searchQueries`, `usage.searchRequests`, `model`, `requestId`.\n - **Gemini** — `packages/coding-agent/src/web/search/providers/gemini.ts`\n - Availability: OAuth credentials in `agent.db` for `google-gemini-cli` or `google-antigravity`.\n - Querying: SSE `streamGenerateContent` call with Google Search grounding enabled. Antigravity auth tries two fallback endpoints and retries `401/403/400 invalid auth` once after token refresh; `429/5xx` retry with exponential backoff and server-provided retry delay, capped by a `5 * 60 * 1000` ms rate-limit budget.\n - `max_tokens` and `temperature` pass through as `generationConfig.maxOutputTokens` / `generationConfig.temperature`.\n - `limit` and `num_search_results` are collapsed together before dispatch.\n - Output may include `answer`, `sources`, `citations`, `searchQueries`, `usage`, `model`.\n - **Codex** — `packages/coding-agent/src/web/search/providers/codex.ts`\n - Availability: non-expired OAuth credential for `openai-codex` in `agent.db`.\n - Querying: SSE POST to `https://chatgpt.com/backend-api/codex/responses` with `tool_choice: { type: \"web_search\" }` and `search_context_size: \"high\"` by default.\n - Ignores `recency`, `max_tokens`, and `temperature` in this tool path.\n - `limit` and `num_search_results` are collapsed together before dispatch.\n - Output may include `answer`, `sources`, `usage`, `model`, `requestId`. If the streamed response has no `url_citation` annotations, the adapter falls back to scraping markdown links and bare URLs from the answer text.\n - **Z.AI** — `packages/coding-agent/src/web/search/providers/zai.ts`\n - Availability: env or `agent.db` credential for `zai`.\n - Querying: JSON-RPC `tools/call` against `https://api.z.ai/api/mcp/web_search_prime/mcp` for remote MCP tool `web_search_prime`.\n - Fallback chain inside the provider: tries `{query,count}`, then `{search_query,count}`, then `{search_query, search_engine:\"search-prime\", count}` when earlier attempts fail with argument-shape errors.\n - `limit` and `num_search_results` are collapsed together before dispatch.\n - Output may include parsed free-text `answer`, `sources`, `requestId`.\n - **Exa** — `packages/coding-agent/src/web/search/providers/exa.ts`\n - Availability: always true unless settings explicitly disable `exa.enabled` or `exa.enableSearch`; the adapter can use public MCP even without `EXA_API_KEY`.\n - Querying: with `EXA_API_KEY`, POST `https://api.exa.ai/search`; otherwise call MCP tool `web_search_exa`.\n - `limit` and `num_search_results` are collapsed together before dispatch.\n - Output: synthesized `answer` from up to 3 result summaries, `sources`, `requestId`.\n - **Parallel** — `packages/coding-agent/src/web/search/providers/parallel.ts`, `packages/coding-agent/src/web/parallel.ts`\n - Availability: env or `agent.db` credential for `parallel`.\n - Querying: POST `https://api.parallel.ai/v1beta/search` with `objective=query`, `search_queries=[query]`, `mode:\"fast\"`, `max_chars_per_result: 10000`, beta header `search-extract-2025-10-10`.\n - There is no provider fan-out here despite the name; the current adapter always sends a one-element `search_queries` array.\n - `limit` and `num_search_results` are collapsed together before dispatch, clamped to `1..40`, default `10`.\n - Output: `sources`, `requestId`.\n - **Kagi** — `packages/coding-agent/src/web/search/providers/kagi.ts`, `packages/coding-agent/src/web/kagi.ts`\n - Availability: env or `agent.db` credential for `kagi`.\n - Querying: GET `https://kagi.com/api/v0/search?q=<query>&limit=<n>` with `Authorization: Bot <key>`.\n - `limit` and `num_search_results` are collapsed together before dispatch, clamped to `1..40`, default `10`.\n - Output: `sources`, `relatedQuestions`, `requestId`.\n - **Synthetic** — `packages/coding-agent/src/web/search/providers/synthetic.ts`\n - Availability: env or `agent.db` credential for `synthetic`.\n - Querying: POST `https://api.synthetic.new/v2/search` with `{ query }`.\n - Ignores `recency`, `max_tokens`, and `temperature`.\n - `limit` and `num_search_results` are collapsed together before dispatch.\n - Output: `sources` only.\n - **SearXNG** — `packages/coding-agent/src/web/search/providers/searxng.ts`\n - Availability: endpoint from `searxng.endpoint` setting or `SEARXNG_ENDPOINT` env.\n - Querying: GET `<endpoint>/search?format=json&q=...`; optional settings add `categories` and `language`.\n - Auth precedence: Basic auth (`searxng.basicUsername` / `searxng.basicPassword` or env equivalents) over bearer token (`searxng.token` / `SEARXNG_TOKEN`). Basic credentials are validated for RFC 7617 restrictions.\n - `recency` maps to `time_range`; `week` is downgraded to `month` because SearXNG does not support week.\n - `limit` and `num_search_results` are collapsed together before dispatch, clamped to `1..20`, default `10`.\n - Output: `sources`, `relatedQuestions` from `suggestions`.\n\n## Side Effects\n- Network\n - Calls one or more external search providers over HTTPS until one succeeds or all fail.\n - Provider-specific transports include JSON POST, JSON GET, SSE streaming (Perplexity OAuth/API, Gemini, Codex), and JSON-RPC over HTTP (Z.AI).\n- Subprocesses / native bindings\n - None.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Uses a module-global provider-instance cache in `packages/coding-agent/src/web/search/provider.ts`.\n - Uses a module-global preferred-provider setting in the same file.\n - `packages/coding-agent/src/tools/index.ts` gates tool availability behind `session.settings.get(\"web_search.enabled\")`.\n- Background work / cancellation\n - Many provider adapters accept `AbortSignal`, but `WebSearchTool.execute()` does not pass its `_signal` into `executeSearch()`. Internal callers can still use cancellation by calling `runSearchQuery()` / `executeSearch()` with `signal` embedded in params.\n\n## Limits & Caps\n- Provider auto-order length: 14 providers (`SEARCH_PROVIDER_ORDER` in `packages/coding-agent/src/web/search/provider.ts`).\n- `formatForLLM()` truncates source snippets and citation text to 240 chars (`packages/coding-agent/src/web/search/index.ts`).\n- `formatForLLM()` emits at most 3 search queries, each truncated to 120 chars (`packages/coding-agent/src/web/search/index.ts`).\n- Brave result count: default `10`, max `20` (`DEFAULT_NUM_RESULTS`, `MAX_NUM_RESULTS` in `packages/coding-agent/src/web/search/providers/brave.ts`).\n- Tavily result count: default `5`, max `20` (`packages/coding-agent/src/web/search/providers/tavily.ts`).\n- Kimi result count: default `10`, max `20`; request timeout field fixed to `30` seconds (`packages/coding-agent/src/web/search/providers/kimi.ts`).\n- Parallel result count: default `10`, max `40`; per-result excerpt cap `10_000` chars (`packages/coding-agent/src/web/search/providers/parallel.ts`, `packages/coding-agent/src/web/parallel.ts`).\n- Kagi result count: default `10`, max `40` (`packages/coding-agent/src/web/search/providers/kagi.ts`).\n- SearXNG result count: default `10`, max `20` (`packages/coding-agent/src/web/search/providers/searxng.ts`).\n- Perplexity API-key mode defaults: `max_tokens = 8192`, `temperature = 0.2`, `num_search_results = 10` (`packages/coding-agent/src/web/search/providers/perplexity.ts`).\n- Anthropic defaults: model `claude-haiku-4-5`, `DEFAULT_MAX_TOKENS = 4096` when the provider omits `max_tokens` (`packages/coding-agent/src/web/search/providers/anthropic.ts`).\n- Gemini retries: up to `3` retries per endpoint, base delay `1000` ms, rate-limit delay budget `5 * 60 * 1000` ms (`packages/coding-agent/src/web/search/providers/gemini.ts`).\n\n## Errors\n- Tool-level no-provider case returns a normal tool result with `Error: No web search provider configured.`; it does not throw.\n- Tool-level all-failed case also returns a normal tool result with `Error: ...`; failures are summarized from the last attempted provider.\n- Provider adapters usually throw `SearchProviderError(provider, message, status)` for HTTP or protocol failures.\n- Availability probes intentionally swallow lookup errors and report `false` in many providers via `isApiKeyAvailable()`.\n- Per-provider notable failures:\n - Anthropic: missing credentials throw a plain `Error`; a `404` is remapped to a special final message by `formatProviderError()`.\n - Perplexity: missing auth throws a plain `Error`; OAuth stream `error_code` events become `SearchProviderError(\"perplexity\", ...)`.\n - Gemini: auth refresh, endpoint fallback, and retry logic are internal; final exhausted failures surface as `SearchProviderError(\"gemini\", ...)`.\n - Codex and Gemini both fail if the HTTP response has no body after a `200`.\n - Z.AI treats malformed SSE/JSON-RPC payloads as provider errors and retries only argument-shape failures across request variants.\n - SearXNG `findAuth()` can throw configuration errors before any HTTP call if Basic auth fields are incomplete or invalid.\n\n## Notes\n- The model-facing schema does not expose `provider`, but internal callers can force one through `SearchQueryParams`.\n- `resolveProviderChain()` lazily imports provider modules and caches singleton instances. Just asking for labels via `getSearchProviderLabel()` does not trigger those imports.\n- Most providers treat `limit` and `num_search_results` as the same number because adapters pass `params.numSearchResults ?? params.limit`. Perplexity is the only implementation that preserves both concepts.\n- The prompt says `recency` is for Brave and Perplexity, but code also implements it for Tavily and SearXNG.\n- The year rewrite in `executeSearch()` is blunt: any `2020`-`2029` substring is replaced with the current year.\n- `packages/coding-agent/src/config/settings-schema.ts` exposes provider preferences for `auto`, `exa`, `brave`, `jina`, `kimi`, `perplexity`, `anthropic`, `zai`, `tavily`, `kagi`, `synthetic`, `parallel`, and `searxng`. Gemini and Codex are in the registry and auto chain but not in that settings enum.\n- Exa availability is optimistic. Unless settings disable it, the provider stays in the chain even without an API key because it can fall back to MCP.\n",
97
- "tools/write.md": "# write\n\n> Create or overwrite a file, archive entry, or SQLite row.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/write.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/write.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/archive-reader.ts` — parse `archive.ext:entry` selectors.\n - `packages/coding-agent/src/tools/sqlite-reader.ts` — detect SQLite paths and perform row insert/update/delete.\n - `packages/coding-agent/src/lsp/index.ts` — format-on-write and diagnostics writethrough.\n - `packages/coding-agent/src/tools/auto-generated-guard.ts` — block overwriting generated files.\n - `packages/coding-agent/src/tools/fs-cache-invalidation.ts` — invalidate shared FS scan caches after writes.\n - `packages/coding-agent/src/tools/plan-mode-guard.ts` — resolve paths and enforce plan-mode write policy.\n\n## Inputs\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `path` | `string` | Yes | Target path. Plain file path writes a filesystem file. `archive.ext:inner/path` writes an archive entry for `.tar`, `.tar.gz`, `.tgz`, or `.zip`. `db.sqlite:table` inserts a row. `db.sqlite:table:key` updates or deletes a row. |\n| `content` | `string` | Yes | Full replacement file content, archive entry content, or SQLite row payload. SQLite non-delete writes must parse as a JSON5 object. Empty or whitespace-only content deletes a SQLite row when `path` includes a row key. |\n\nWorked examples:\n\n```text\npath: \"src/generated/config.json\"\ncontent: \"{\\n \\\"enabled\\\": true\\n}\\n\"\n```\n\n```text\npath: \"fixtures/archive.zip:templates/email.txt\"\ncontent: \"hello\\n\"\n```\n\n```text\npath: \"data/app.sqlite:users:42\"\ncontent: \"{name: 'Ada', active: true}\"\n```\n\n## Outputs\nSingle-shot result.\n\n- Success always returns a text block.\n - Plain file write: `Successfully wrote <bytes> bytes to <relative-path>`.\n - Archive write: `Successfully wrote <bytes> bytes to <relative-archive-path>:<entry-path>`.\n - SQLite write: one of `Inserted row into <table>`, `Updated row '<key>' in <table>`, `No row updated ...`, `Deleted row ...`, `No row deleted ...`.\n- If hashline prefixes were copied from `read` output and stripped first, the first text block gets an extra note.\n- Plain file writes may also return `details.diagnostics` plus `details.meta.diagnostics` when LSP diagnostics-on-write is enabled.\n- SQLite writes use `toolResult(...).sourcePath(...)`, so `details.meta.sourcePath` points at the database file.\n- Archive writes return empty `details`.\n\n## Flow\n1. `WriteTool.execute()` in `packages/coding-agent/src/tools/write.ts` strips pasted `¶PATH#HASH` headers and `LINE:` hashline prefixes from `content` when the session is in hashline display mode.\n2. It calls `#resolveArchiveWritePath()` first. That uses `parseArchivePathCandidates()` from `packages/coding-agent/src/tools/archive-reader.ts`, checks candidate archive files on disk, and falls back to the longest matching archive suffix even when the archive file does not exist yet.\n3. Archive writes call `enforcePlanModeWrite(..., { op: exists ? \"update\" : \"create\" })`, then `#writeArchiveEntry()`.\n - The parent directory of the archive file is created with `fs.mkdir(..., { recursive: true })`.\n - `.zip` archives are read with `fflate.unzipSync()`, the target entry is replaced in an in-memory map, and the archive is rewritten with `fflate.zipSync()` + `Bun.write()`.\n - `.tar`, `.tar.gz`, and `.tgz` archives are read with `Bun.Archive`, existing entries are copied into an object map, the target entry is replaced, and `Bun.Archive.write()` rewrites the archive.\n - `invalidateFsScanAfterWrite()` runs on the archive file path.\n4. If the path is not treated as an archive, `execute()` calls `#resolveSqliteWritePath()`. That uses `parseSqlitePathCandidates()` and `isSqliteFile()` from `packages/coding-agent/src/tools/sqlite-reader.ts`. Existing non-SQLite files suppress the SQLite path interpretation.\n5. SQLite writes call `enforcePlanModeWrite(..., { op: \"update\" })`, then `#writeSqliteRow()`.\n - The database must already exist; missing DBs throw `SQLite database '<path>' not found`.\n - The tool opens `new Database(..., { create: false, strict: true })` and sets `PRAGMA busy_timeout = 3000`.\n - Whitespace-only `content` with a row key deletes a row.\n - Non-empty `content` is parsed with `Bun.JSON5.parse()`, must be a JSON object, and is routed to insert/update helpers from `packages/coding-agent/src/tools/sqlite-reader.ts`.\n - `invalidateFsScanAfterWrite()` runs on the DB path and the connection is closed in `finally`.\n6. Otherwise the tool treats `path` as a plain filesystem file.\n - `enforcePlanModeWrite(..., { op: \"create\" })` runs before path resolution.\n - Existing files are checked by `assertEditableFile()` to block overwriting detected generated files.\n - The session’s writethrough callback writes content. With LSP enabled and `lsp.formatOnWrite` / `lsp.diagnosticsOnWrite` settings on, `createLspWritethrough()` may format content, sync it through LSP servers, save it, and collect diagnostics. Otherwise `writethroughNoop()` writes directly with `Bun.write()` or `file.write()`.\n - `invalidateFsScanAfterWrite()` runs on the file path.\n7. The tool returns a text result and optional diagnostics metadata.\n\n## Modes / Variants\n### Plain file path\n- Target is any path that does not resolve as an archive selector and does not resolve as an existing-or-new SQLite selector.\n- Existing files are overwritten.\n- `write.ts` does not call `fs.mkdir()` on this path; parent-directory creation is only implemented in the archive branch.\n\nExample:\n\n```text\npath: \"tmp/output.txt\"\ncontent: \"hello\\n\"\n```\n\n### Archive entry write\n- Selector syntax: `archive.ext:inner/path`.\n- Supported archive suffixes come from `parseArchivePathCandidates()`: `.tar`, `.tar.gz`, `.tgz`, `.zip`.\n- The inner path is normalized to `/`, strips empty and `.` segments, rejects `..`, and rejects directory targets ending in `/`.\n- Rewrites the whole archive file after replacing one entry.\n- Creates the parent directory for the archive file if needed.\n\nExample:\n\n```text\npath: \"build/assets.tar.gz:css/app.css\"\ncontent: \"body { color: black; }\\n\"\n```\n\n### SQLite table insert\n- Selector syntax: `db.sqlite:table`.\n- `content` must parse as a JSON5 object.\n- Empty object is allowed and becomes `INSERT INTO <table> DEFAULT VALUES`.\n- Query parameters are rejected for SQLite writes.\n\nExample:\n\n```text\npath: \"data/app.db:users\"\ncontent: \"{name: 'Ada', active: true}\"\n```\n\n### SQLite row update / delete\n- Selector syntax: `db.sqlite:table:key`.\n- Non-empty `content` updates the row.\n- Empty or whitespace-only `content` deletes the row.\n- Row lookup uses the single-column primary key if present; otherwise it falls back to `rowid`. Composite primary keys and `WITHOUT ROWID` tables are rejected for key-based writes.\n\nExample update:\n\n```text\npath: \"data/app.sqlite:users:42\"\ncontent: \"{email: 'ada@example.com'}\"\n```\n\nExample delete:\n\n```text\npath: \"data/app.sqlite:users:42\"\ncontent: \"\"\n```\n\n## Side Effects\n- Filesystem\n - Creates or overwrites plain files.\n - Rewrites entire archive files when writing an archive entry.\n - Creates parent directories for archive files only.\n - Mutates existing SQLite databases; never creates a new SQLite DB.\n- Subprocesses / native bindings\n - Uses Bun SQLite bindings via `bun:sqlite`.\n - Uses Bun archive APIs and lazily imports `fflate` for ZIP reads/writes.\n - May talk to configured LSP servers through `packages/coding-agent/src/lsp/index.ts`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Invalidates shared filesystem scan cache entries through `invalidateFsScanAfterWrite()`.\n - Enforces plan-mode write restrictions before mutating the target.\n- Background work / cancellation\n - Marks the tool `nonAbortable = true` and `concurrency = \"exclusive\"` in `WriteTool`.\n - LSP writethrough can schedule deferred diagnostics fetches after a timeout, but plain `write.ts` only consumes the immediate return value.\n\n## Limits & Caps\n- `WriteTool` itself exposes no byte cap beyond storing `content` in memory and, for archives, rebuilding the archive in memory.\n- Generated-file detection reads at most `CHECK_BYTE_COUNT = 1024` bytes and `HEADER_LINE_LIMIT = 40` header lines from an existing file in `packages/coding-agent/src/tools/auto-generated-guard.ts`.\n- SQLite writes set `PRAGMA busy_timeout = 3000`.\n- LSP writethrough uses a `5_000` ms operation timeout in `runLspWritethrough()` and may schedule a deferred diagnostics fetch with `AbortSignal.timeout(25_000)` in `scheduleDeferredDiagnosticsFetch()`.\n\n## Errors\n- Invalid archive subpaths throw `ToolError` with messages such as:\n - `Archive write path must target a file inside the archive`\n - `Archive write path must target a file, not a directory`\n - `Archive path cannot contain '..'`\n- SQLite path parsing throws on unsupported forms:\n - `SQLite write paths do not support query parameters`\n - `SQLite write path must target a table`\n - `SQLite row writes require a non-empty row key`\n- Missing SQLite DBs surface as `SQLite database '<path>' not found`.\n- SQLite content errors are model-visible `ToolError`s, including invalid JSON5, non-object payloads, unknown columns, non-scalar values, empty update objects, composite primary keys, and `WITHOUT ROWID` tables.\n- Existing plain files may be rejected by `assertEditableFile()` when they look generated.\n- Archive read/write failures and unexpected SQLite exceptions are wrapped in `ToolError(error.message)`.\n- If no LSP server matches or LSP formatting/diagnostics times out, file writes still fall back to writing content; diagnostics may be omitted.\n\n## Notes\n- Archive path detection runs before SQLite detection. A path that matches an archive selector is never treated as SQLite.\n- SQLite detection declines when an existing file with a `.sqlite` / `.db` suffix is present but does not have SQLite magic bytes; then the path falls back to a plain file write.\n- ZIP entry content is encoded with `new TextEncoder().encode(content)` in `#writeArchiveEntry()`. Non-ZIP archive writes pass the string directly to `Bun.Archive.write()`.\n- The prompt forbids two common anti-patterns: using `write` for routine edits that should use `edit`, and creating `*.md` / `README` files unless explicitly requested. It also forbids emojis unless requested.\n- Plain file writes report byte count using `cleanContent.length`, which is UTF-16 code units in JS, not an on-disk byte measurement.\n- `stripWriteContent()` only removes hashline prefixes when the session’s file display mode has `hashLines` enabled; otherwise content is written unchanged.\n",
97
+ "tools/write.md": "# write\n\n> Create or overwrite a file, archive entry, or SQLite row.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/write.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/write.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/archive-reader.ts` — parse `archive.ext:entry` selectors.\n - `packages/coding-agent/src/tools/sqlite-reader.ts` — detect SQLite paths and perform row insert/update/delete.\n - `packages/coding-agent/src/lsp/index.ts` — format-on-write and diagnostics writethrough.\n - `packages/coding-agent/src/tools/auto-generated-guard.ts` — block overwriting generated files.\n - `packages/coding-agent/src/tools/fs-cache-invalidation.ts` — invalidate shared FS scan caches after writes.\n - `packages/coding-agent/src/tools/plan-mode-guard.ts` — resolve paths and enforce plan-mode write policy.\n\n## Inputs\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `path` | `string` | Yes | Target path. Plain file path writes a filesystem file. `archive.ext:inner/path` writes an archive entry for `.tar`, `.tar.gz`, `.tgz`, or `.zip`. `db.sqlite:table` inserts a row. `db.sqlite:table:key` updates or deletes a row. |\n| `content` | `string` | Yes | Full replacement file content, archive entry content, or SQLite row payload. SQLite non-delete writes must parse as a JSON5 object. Empty or whitespace-only content deletes a SQLite row when `path` includes a row key. |\n\nWorked examples:\n\n```text\npath: \"src/generated/config.json\"\ncontent: \"{\\n \\\"enabled\\\": true\\n}\\n\"\n```\n\n```text\npath: \"fixtures/archive.zip:templates/email.txt\"\ncontent: \"hello\\n\"\n```\n\n```text\npath: \"data/app.sqlite:users:42\"\ncontent: \"{name: 'Ada', active: true}\"\n```\n\n## Outputs\nSingle-shot result.\n\n- Success always returns a text block.\n - Plain file write: `Successfully wrote <bytes> bytes to <relative-path>`.\n - Archive write: `Successfully wrote <bytes> bytes to <relative-archive-path>:<entry-path>`.\n - SQLite write: one of `Inserted row into <table>`, `Updated row '<key>' in <table>`, `No row updated ...`, `Deleted row ...`, `No row deleted ...`.\n- If hashline prefixes were copied from `read` output and stripped first, the first text block gets an extra note.\n- In hashline display mode, plain file writes (including ACP bridge writes) and conflict resolutions prepend a fresh `¶<relative-path>#TAG` header so the next `edit` has a current snapshot tag without an extra `read`. Bulk conflict resolutions append a `Snapshots:` block listing one header per successfully written file.\n- Plain file writes may also return `details.diagnostics` plus `details.meta.diagnostics` when LSP diagnostics-on-write is enabled.\n- SQLite writes use `toolResult(...).sourcePath(...)`, so `details.meta.sourcePath` points at the database file.\n- Archive writes return empty `details`.\n\n## Flow\n1. `WriteTool.execute()` in `packages/coding-agent/src/tools/write.ts` strips pasted `¶PATH#HASH` headers and `LINE:` hashline prefixes from `content` when the session is in hashline display mode.\n2. It calls `#resolveArchiveWritePath()` first. That uses `parseArchivePathCandidates()` from `packages/coding-agent/src/tools/archive-reader.ts`, checks candidate archive files on disk, and falls back to the longest matching archive suffix even when the archive file does not exist yet.\n3. Archive writes call `enforcePlanModeWrite(..., { op: exists ? \"update\" : \"create\" })`, then `#writeArchiveEntry()`.\n - The parent directory of the archive file is created with `fs.mkdir(..., { recursive: true })`.\n - `.zip` archives are read with `fflate.unzipSync()`, the target entry is replaced in an in-memory map, and the archive is rewritten with `fflate.zipSync()` + `Bun.write()`.\n - `.tar`, `.tar.gz`, and `.tgz` archives are read with `Bun.Archive`, existing entries are copied into an object map, the target entry is replaced, and `Bun.Archive.write()` rewrites the archive.\n - `invalidateFsScanAfterWrite()` runs on the archive file path.\n4. If the path is not treated as an archive, `execute()` calls `#resolveSqliteWritePath()`. That uses `parseSqlitePathCandidates()` and `isSqliteFile()` from `packages/coding-agent/src/tools/sqlite-reader.ts`. Existing non-SQLite files suppress the SQLite path interpretation.\n5. SQLite writes call `enforcePlanModeWrite(..., { op: \"update\" })`, then `#writeSqliteRow()`.\n - The database must already exist; missing DBs throw `SQLite database '<path>' not found`.\n - The tool opens `new Database(..., { create: false, strict: true })` and sets `PRAGMA busy_timeout = 3000`.\n - Whitespace-only `content` with a row key deletes a row.\n - Non-empty `content` is parsed with `Bun.JSON5.parse()`, must be a JSON object, and is routed to insert/update helpers from `packages/coding-agent/src/tools/sqlite-reader.ts`.\n - `invalidateFsScanAfterWrite()` runs on the DB path and the connection is closed in `finally`.\n6. Otherwise the tool treats `path` as a plain filesystem file.\n - `enforcePlanModeWrite(..., { op: \"create\" })` runs before path resolution.\n - Existing files are checked by `assertEditableFile()` to block overwriting detected generated files.\n - The session’s writethrough callback writes content. With LSP enabled and `lsp.formatOnWrite` / `lsp.diagnosticsOnWrite` settings on, `createLspWritethrough()` may format content, sync it through LSP servers, save it, and collect diagnostics. Otherwise `writethroughNoop()` writes directly with `Bun.write()` or `file.write()`.\n - `invalidateFsScanAfterWrite()` runs on the file path.\n7. The tool returns a text result and optional diagnostics metadata.\n\n## Modes / Variants\n### Plain file path\n- Target is any path that does not resolve as an archive selector and does not resolve as an existing-or-new SQLite selector.\n- Existing files are overwritten.\n- `write.ts` does not call `fs.mkdir()` on this path; parent-directory creation is only implemented in the archive branch.\n\nExample:\n\n```text\npath: \"tmp/output.txt\"\ncontent: \"hello\\n\"\n```\n\n### Archive entry write\n- Selector syntax: `archive.ext:inner/path`.\n- Supported archive suffixes come from `parseArchivePathCandidates()`: `.tar`, `.tar.gz`, `.tgz`, `.zip`.\n- The inner path is normalized to `/`, strips empty and `.` segments, rejects `..`, and rejects directory targets ending in `/`.\n- Rewrites the whole archive file after replacing one entry.\n- Creates the parent directory for the archive file if needed.\n\nExample:\n\n```text\npath: \"build/assets.tar.gz:css/app.css\"\ncontent: \"body { color: black; }\\n\"\n```\n\n### SQLite table insert\n- Selector syntax: `db.sqlite:table`.\n- `content` must parse as a JSON5 object.\n- Empty object is allowed and becomes `INSERT INTO <table> DEFAULT VALUES`.\n- Query parameters are rejected for SQLite writes.\n\nExample:\n\n```text\npath: \"data/app.db:users\"\ncontent: \"{name: 'Ada', active: true}\"\n```\n\n### SQLite row update / delete\n- Selector syntax: `db.sqlite:table:key`.\n- Non-empty `content` updates the row.\n- Empty or whitespace-only `content` deletes the row.\n- Row lookup uses the single-column primary key if present; otherwise it falls back to `rowid`. Composite primary keys and `WITHOUT ROWID` tables are rejected for key-based writes.\n\nExample update:\n\n```text\npath: \"data/app.sqlite:users:42\"\ncontent: \"{email: 'ada@example.com'}\"\n```\n\nExample delete:\n\n```text\npath: \"data/app.sqlite:users:42\"\ncontent: \"\"\n```\n\n## Side Effects\n- Filesystem\n - Creates or overwrites plain files.\n - Rewrites entire archive files when writing an archive entry.\n - Creates parent directories for archive files only.\n - Mutates existing SQLite databases; never creates a new SQLite DB.\n- Subprocesses / native bindings\n - Uses Bun SQLite bindings via `bun:sqlite`.\n - Uses Bun archive APIs and lazily imports `fflate` for ZIP reads/writes.\n - May talk to configured LSP servers through `packages/coding-agent/src/lsp/index.ts`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Invalidates shared filesystem scan cache entries through `invalidateFsScanAfterWrite()`.\n - Enforces plan-mode write restrictions before mutating the target.\n- Background work / cancellation\n - Marks the tool `nonAbortable = true` and `concurrency = \"exclusive\"` in `WriteTool`.\n - LSP writethrough can schedule deferred diagnostics fetches after a timeout, but plain `write.ts` only consumes the immediate return value.\n\n## Limits & Caps\n- `WriteTool` itself exposes no byte cap beyond storing `content` in memory and, for archives, rebuilding the archive in memory.\n- Generated-file detection reads at most `CHECK_BYTE_COUNT = 1024` bytes and `HEADER_LINE_LIMIT = 40` header lines from an existing file in `packages/coding-agent/src/tools/auto-generated-guard.ts`.\n- SQLite writes set `PRAGMA busy_timeout = 3000`.\n- LSP writethrough uses a `5_000` ms operation timeout in `runLspWritethrough()` and may schedule a deferred diagnostics fetch with `AbortSignal.timeout(25_000)` in `scheduleDeferredDiagnosticsFetch()`.\n\n## Errors\n- Invalid archive subpaths throw `ToolError` with messages such as:\n - `Archive write path must target a file inside the archive`\n - `Archive write path must target a file, not a directory`\n - `Archive path cannot contain '..'`\n- SQLite path parsing throws on unsupported forms:\n - `SQLite write paths do not support query parameters`\n - `SQLite write path must target a table`\n - `SQLite row writes require a non-empty row key`\n- Missing SQLite DBs surface as `SQLite database '<path>' not found`.\n- SQLite content errors are model-visible `ToolError`s, including invalid JSON5, non-object payloads, unknown columns, non-scalar values, empty update objects, composite primary keys, and `WITHOUT ROWID` tables.\n- Existing plain files may be rejected by `assertEditableFile()` when they look generated.\n- Archive read/write failures and unexpected SQLite exceptions are wrapped in `ToolError(error.message)`.\n- If no LSP server matches or LSP formatting/diagnostics times out, file writes still fall back to writing content; diagnostics may be omitted.\n\n## Notes\n- Archive path detection runs before SQLite detection. A path that matches an archive selector is never treated as SQLite.\n- SQLite detection declines when an existing file with a `.sqlite` / `.db` suffix is present but does not have SQLite magic bytes; then the path falls back to a plain file write.\n- ZIP entry content is encoded with `new TextEncoder().encode(content)` in `#writeArchiveEntry()`. Non-ZIP archive writes pass the string directly to `Bun.Archive.write()`.\n- The prompt forbids two common anti-patterns: using `write` for routine edits that should use `edit`, and creating `*.md` / `README` files unless explicitly requested. It also forbids emojis unless requested.\n- Plain file writes report byte count using `cleanContent.length`, which is UTF-16 code units in JS, not an on-disk byte measurement.\n- `stripWriteContent()` only removes hashline prefixes when the session’s file display mode has `hashLines` enabled; otherwise content is written unchanged.\n",
98
98
  "tree.md": "# `/tree` Command Reference\n\n`/tree` opens the interactive **Session Tree** navigator. It lets you jump to any entry in the current session file and continue from that point.\n\nThis is an in-file leaf move, not a new session export.\n\n## What `/tree` does\n\n- Builds a tree from current session entries (`SessionManager.getTree()`)\n- Opens `TreeSelectorComponent` with keyboard navigation, filters, and search\n- On selection, calls `AgentSession.navigateTree(targetId, { summarize, customInstructions })`\n- Rebuilds visible chat from the new leaf path\n- Optionally prefills editor text when selecting a user/custom message\n\nPrimary implementation:\n\n- `src/modes/controllers/input-controller.ts` (`/tree`, keybinding wiring, double-escape behavior)\n- `src/modes/controllers/selector-controller.ts` (tree UI launch + summary prompt flow)\n- `src/modes/components/tree-selector.ts` (navigation, filters, search, labels, rendering)\n- `src/session/agent-session.ts` (`navigateTree` leaf switching + optional summary)\n- `src/session/session-manager.ts` (`getTree`, `branch`, `branchWithSummary`, `resetLeaf`, label persistence)\n\n## How to open it\n\nAny of the following opens the same selector:\n\n- `/tree`\n- configured keybinding action `tree`\n- double-escape on empty editor when `doubleEscapeAction = \"tree\"` (default)\n- `/branch` when `doubleEscapeAction = \"tree\"` (routes to tree selector instead of user-only branch picker)\n\n## Tree UI model\n\nThe tree is rendered from session entry parent pointers (`id` / `parentId`).\n\n- Children are sorted by timestamp ascending (older first, newer lower)\n- Active branch (path from root to current leaf) is marked with a bullet\n- Labels (if present) render as `[label]` before node text\n- If multiple roots exist (orphaned/broken parent chains), they are shown under a virtual branching root\n\n```text\nExample tree view (active path marked with •):\n\n├─ user: \"Start task\"\n│ └─ assistant: \"Plan\"\n│ ├─ • user: \"Try approach A\"\n│ │ └─ • assistant: \"A result\"\n│ │ └─ • [milestone] user: \"Continue A\"\n│ └─ user: \"Try approach B\"\n│ └─ assistant: \"B result\"\n```\n\nThe selector recenters around current selection and shows up to:\n\n- `max(5, floor(terminalHeight / 2))` rows\n\n## Keybindings inside tree selector\n\n- `Up` / `Down`: move selection (wraps)\n- `Left` / `Right`: page up / page down\n- `Enter`: select node\n- `Esc`: clear search if active; otherwise close selector\n- `Ctrl+C`: close selector\n- `Type`: append to search query\n- `Backspace`: delete search character\n- `Shift+L`: edit/clear label on selected entry\n- `Ctrl+O`: cycle filter forward\n- `Shift+Ctrl+O`: cycle filter backward\n- `Alt+D/T/U/L/A`: jump directly to specific filter mode\n\n## Filters and search semantics\n\nFilter modes (`TreeList`):\n\n1. `default`\n2. `no-tools`\n3. `user-only`\n4. `labeled-only`\n5. `all`\n\n### `default`\n\nShows most conversational nodes, but hides bookkeeping entry types:\n\n- `label`\n- `custom`\n- `model_change`\n- `thinking_level_change`\n\n### `no-tools`\n\nSame as `default`, plus hides `toolResult` messages.\n\n### `user-only`\n\nOnly `message` entries where role is `user`.\n\n### `labeled-only`\n\nOnly entries that currently resolve to a label.\n\n### `all`\n\nEverything in the session tree, including bookkeeping/custom entries.\n\n### Tool-only assistant node behavior\n\nAssistant messages that contain **only tool calls** (no text) are hidden by default in all filtered views unless:\n\n- message is error/aborted (`stopReason` not `stop`/`toolUse`), or\n- it is the current leaf (always kept visible)\n\n### Search behavior\n\n- Query is tokenized by spaces\n- Matching is case-insensitive\n- All tokens must match (AND semantics)\n- Searchable text includes label, role, and type-specific content (message text, branch summary text, custom type, tool command snippets, etc.)\n\n## Selection outcomes (important)\n\n`navigateTree` computes new leaf behavior from selected entry type:\n\n### Selecting `user` message\n\n- New leaf becomes selected entry’s `parentId`\n- If parent is `null` (root user message), leaf resets to root (`resetLeaf()`)\n- Selected message text is copied to editor for editing/resubmit\n\n### Selecting `custom_message`\n\n- Same leaf rule as user messages (`parentId`)\n- Text content is extracted and copied to editor\n\n### Selecting non-user node (assistant/tool/summary/compaction/custom bookkeeping/etc.)\n\n- New leaf becomes selected node id\n- Editor is not prefilled\n\n### Selecting current leaf\n\n- No-op; selector closes with “Already at this point”\n\n```text\nSelection decision (simplified):\n\nselected node\n │\n ├─ is current leaf? ── yes ──> close selector (no-op)\n │\n ├─ is user/custom_message? ── yes ──> leaf := parentId (or resetLeaf for root)\n │ + prefill editor text\n │\n └─ otherwise ──> leaf := selected node id\n + no editor prefill\n```\n\n## Summary-on-switch flow\n\nSummary prompt is controlled by `branchSummary.enabled` (default: `false`).\n\nWhen enabled, after picking a node the UI asks:\n\n- `No summary`\n- `Summarize`\n- `Summarize with custom prompt`\n\nFlow details:\n\n- Escape in summary prompt reopens tree selector\n- Custom prompt cancellation returns to summary choice loop\n- During summarization, UI shows loader and binds `Esc` to `abortBranchSummary()`\n- If summarization aborts, tree selector reopens and no move is applied\n\n`navigateTree` internals:\n\n- Collects abandoned-branch entries from old leaf to common ancestor\n- Emits `session_before_tree` (extensions can cancel or inject summary)\n- Uses default summarizer only if requested and needed\n- Applies move with:\n - `branchWithSummary(...)` when summary exists\n - `branch(newLeafId)` for non-root move without summary\n - `resetLeaf()` for root move without summary\n- Replaces agent conversation with rebuilt session context\n- Emits `session_tree`\n\nNote: if user requests summary but there is nothing to summarize, navigation proceeds without creating a summary entry.\n\n## Labels\n\nLabel edits in tree UI call `appendLabelChange(targetId, label)`.\n\n- non-empty label sets/updates resolved label\n- empty label clears it\n- labels are stored as append-only `label` entries\n- tree nodes display resolved label state, not raw label-entry history\n\n## `/tree` vs adjacent operations\n\n| Operation | Scope | Result |\n| --------- | ------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| `/tree` | Current session file | Moves leaf to selected point (same file) |\n| `/branch` | Usually current session file -> new session file | By default branches from selected **user** message into a new session file; if `doubleEscapeAction = \"tree\"`, `/branch` opens tree navigation UI instead |\n| `/fork` | Whole current session | Duplicates session into a new persisted session file |\n| `/resume` | Session list | Switches to another session file |\n\nKey distinction: `/tree` is a navigation/repositioning tool inside one session file. `/branch`, `/fork`, and `/resume` all change session-file context.\n\n## Operator workflows\n\n### Re-run from an earlier user prompt without losing current branch\n\n1. `/tree`\n2. search/select earlier user message\n3. choose `No summary` (or summarize if needed)\n4. edit prefilled text in editor\n5. submit\n\nEffect: new branch grows from selected point within same session file.\n\n### Leave current branch with context breadcrumb\n\n1. enable `branchSummary.enabled`\n2. `/tree` and select target node\n3. choose `Summarize` (or custom prompt)\n\nEffect: a `branch_summary` entry is appended at the target position before continuing.\n\n### Investigate hidden bookkeeping entries\n\n1. `/tree`\n2. press `Alt+A` (all)\n3. search for `model`, `thinking`, `custom`, or labels\n\nEffect: inspect full internal timeline, not just conversational nodes.\n\n### Bookmark pivot points for later jumps\n\n1. `/tree`\n2. move to entry\n3. `Shift+L` and set label\n4. later use `Alt+L` (`labeled-only`) to jump quickly\n\nEffect: fast navigation among durable branch landmarks.\n",
99
99
  "ttsr-injection-lifecycle.md": "# TTSR Injection Lifecycle\n\nThis document covers the current Time Traveling Stream Rules (TTSR) runtime path from rule discovery to stream interruption, retry injection, extension notifications, and session-state handling.\n\n## Implementation files\n\n- [`../src/sdk.ts`](../packages/coding-agent/src/sdk.ts)\n- [`../src/export/ttsr.ts`](../packages/coding-agent/src/export/ttsr.ts)\n- [`../src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts)\n- [`../src/session/session-manager.ts`](../packages/coding-agent/src/session/session-manager.ts)\n- [`../src/prompts/system/ttsr-interrupt.md`](../packages/coding-agent/src/prompts/system/ttsr-interrupt.md)\n- [`../src/capability/index.ts`](../packages/coding-agent/src/capability/index.ts)\n- [`../src/extensibility/extensions/types.ts`](../packages/coding-agent/src/extensibility/extensions/types.ts)\n- [`../src/extensibility/hooks/types.ts`](../packages/coding-agent/src/extensibility/hooks/types.ts)\n- [`../src/extensibility/custom-tools/types.ts`](../packages/coding-agent/src/extensibility/custom-tools/types.ts)\n- [`../src/modes/controllers/event-controller.ts`](../packages/coding-agent/src/modes/controllers/event-controller.ts)\n\n## 1. Discovery feed and rule registration\n\nAt session creation, `createAgentSession()` loads discovered rules and constructs a `TtsrManager`:\n\n```ts\nconst ttsrSettings = settings.getGroup(\"ttsr\");\nconst ttsrManager = new TtsrManager(ttsrSettings);\nconst rulesResult = await loadCapability<Rule>(ruleCapability.id, { cwd });\nfor (const rule of rulesResult.items) {\n if (rule.condition?.length && ttsrManager.addRule(rule)) continue;\n // non-TTSR rules continue through normal rule handling\n}\n```\n\n### Pre-registration dedupe behavior\n\n`loadCapability(\"rules\")` deduplicates by `rule.name` with first-wins semantics (higher provider priority first). Shadowed duplicates are removed before TTSR registration.\n\n### `TtsrManager.addRule()` behavior\n\nRegistration is skipped when:\n\n- `rule.condition` is absent or all condition regexes fail to compile\n- a rule with the same `rule.name` was already registered in this manager\n- the rule scope excludes all monitored streams\n\nInvalid regex conditions and unreachable scopes are logged as warnings and ignored; session startup continues.\n\n### Setting caveat\n\n`TtsrSettings.enabled` is loaded into the manager but is not currently checked in runtime gating. If TTSR rules exist, matching still runs.\n\n## 2. Streaming monitor lifecycle\n\nTTSR detection runs inside `AgentSession.#handleAgentEvent`.\n\n### Turn start\n\nOn `turn_start`, the stream buffer is reset:\n\n- `ttsrManager.resetBuffer()`\n\n### During stream (`message_update`)\n\nWhen assistant updates arrive and rules exist:\n\n- monitor `text_delta`, `thinking_delta`, and `toolcall_delta`\n- append delta into a source/tool scoped manager buffer\n- call `checkDelta(delta, matchContext)`\n\n`checkDelta()` iterates registered rules and returns all matching rules that pass scope, global-path, condition, and repeat policy checks.\n\n## 3. Trigger decision and immediate abort path\n\nWhen one or more rules match and at least one matched rule allows interruption:\n\n1. Matched rules are deduplicated into `#pendingTtsrInjections`.\n2. `#ttsrAbortPending = true` and a TTSR resume gate is created.\n3. `agent.abort()` is called immediately.\n4. `ttsr_triggered` event is emitted asynchronously (fire-and-forget).\n5. retry work is scheduled via the post-prompt task scheduler with a 50ms delay.\n\nAbort is not blocked on extension callbacks.\n\n## 4. Retry scheduling, context mode, and reminder injection\n\nAfter the 50ms timeout:\n\n1. `#ttsrAbortPending = false`\n2. read `ttsrManager.getSettings().contextMode`\n3. if `contextMode === \"discard\"`, drop the targeted partial assistant output with `agent.replaceMessages(...slice(0, targetAssistantIndex))`\n4. build injection content from pending rules using `ttsr-interrupt.md` template\n5. append and persist a hidden `custom_message`/runtime custom message with `customType: \"ttsr-injection\"` and `details.rules`\n6. mark those rule names injected, persist a `ttsr_injection` entry, and call `agent.continue()` to retry generation\n\nTemplate payload is:\n\n```xml\n<system-interrupt reason=\"rule_violation\" rule=\"{{name}}\" path=\"{{path}}\">\n...\n{{content}}\n</system-interrupt>\n```\n\nPending injections are cleared after content generation.\n\n### `contextMode` behavior on partial output\n\n- `discard`: partial/aborted assistant message is removed before retry.\n- `keep`: partial assistant output remains in conversation state; reminder is appended after it.\n\n### Non-interrupting matches\n\nNon-interrupting matches split by `matchContext.source`:\n\n- **`source === \"tool\"` (tool-source match).** The rule is bucketed into `#perToolTtsrInjections`, keyed by the matched tool call's `id`. There is **no** deferred follow-up turn and the stream is not aborted. When the tool actually produces a result, the `afterToolCall` hook prepends a rendered `ttsr-tool-reminder.md` block to `ctx.result.content` (a single `text` block inserted ahead of the tool's own content), and persists a `ttsr_injection` entry with the consumed rule names. The template payload is:\n\n ```xml\n <system-reminder reason=\"rule_violation\" rule=\"{{name}}\" path=\"{{path}}\">\n ...\n {{content}}\n </system-reminder>\n ```\n\n- **`source === \"text\"` / `\"thinking\"` (prose-source match).** Behavior is unchanged: the rule is queued in `#pendingTtsrInjections` and, after a successful non-error, non-aborted assistant message, `AgentSession` injects the hidden `ttsr-injection` custom message as a follow-up and schedules continuation.\n\nWithin a single matching batch, each rule is attached to exactly one sibling tool call — if multiple sibling tool calls would satisfy the same rule, deduplication picks one and the others are left untouched. Multiple distinct rules can still fold onto the same tool call.\n\n#### Implications for tool authors and transcript readers\n\n- The tool's own `toolResult` content is preserved verbatim; the reminder is **prepended** as an additional leading text block. Renderers that assume `content[0]` is the tool's primary output must scan past any block whose text begins with `<system-reminder reason=\"rule_violation\"` (or filter on the wrapper tag) to find the real payload.\n- The reminder is in-band on the tool result, not a separate `custom_message`/`ttsr-injection` entry. Transcript readers looking for non-interrupting TTSR activity on tool-source rules MUST inspect tool results (and the persisted `ttsr_injection` entry list), not just synthetic injection entries.\n- A single tool result may carry reminders for several rules concatenated with a blank line between rendered templates.\n- If the assistant message ends with `stopReason === \"aborted\"` or `\"error\"` before the matched tools run, the pending per-tool buckets are cleared — those rules are **not** persisted as injected and remain eligible to re-trigger on a future turn (subject to repeat policy).\n\n## 5. Repeat policy and gap logic\n\n`TtsrManager` tracks `#messageCount` and per-rule `lastInjectedAt`.\n\n### `repeatMode: \"once\"`\n\nA rule can trigger only once after it has an injection record.\n\n### `repeatMode: \"after-gap\"`\n\nA rule can re-trigger only when:\n\n- `messageCount - lastInjectedAt >= repeatGap`\n\n`messageCount` increments on `turn_end`, so gap is measured in completed turns, not stream chunks.\n\n## 6. Event emission and extension/hook surfaces\n\n### Session event\n\n`AgentSessionEvent` includes:\n\n```ts\n{ type: \"ttsr_triggered\"; rules: Rule[] }\n```\n\n### Extension runner\n\n`#emitSessionEvent()` routes the event to:\n\n- extension listeners (`ExtensionRunner.emit({ type: \"ttsr_triggered\", rules })`)\n- local session subscribers\n\n### Hook and custom-tool typing\n\n- extension API exposes `on(\"ttsr_triggered\", ...)`\n- hook API exposes `on(\"ttsr_triggered\", ...)`\n- custom tools receive `onSession({ reason: \"ttsr_triggered\", rules })`\n\n### Interactive-mode rendering difference\n\nInteractive mode uses `session.isTtsrAbortPending` to suppress showing the aborted assistant stop reason as a visible failure during TTSR interruption, and renders a `TtsrNotificationComponent` when the event arrives.\n\n## 7. Persistence and resume state (current implementation)\n\n`SessionManager` persists injected-rule state:\n\n- entry type: `ttsr_injection`\n- append API: `appendTtsrInjection(ruleNames)`\n- query API: `getInjectedTtsrRules()`\n- context reconstruction includes `SessionContext.injectedTtsrRules`\n\n`TtsrManager` supports restoration via `restoreInjected(ruleNames)`.\n\n### Current wiring status\n\nIn the current runtime path:\n\n- interrupted injections append a hidden `custom_message` with `customType: \"ttsr-injection\"` and append a `ttsr_injection` entry via `appendTtsrInjection(...)`\n- deferred non-interrupting prose-source injections are marked/persisted when their queued custom message reaches `message_end`\n- non-interrupting tool-source injections are marked at match time and persisted via `appendTtsrInjection(...)` from the `afterToolCall` hook when the matched tool's result is produced\n- `createAgentSession()` restores `existingSession.injectedTtsrRules` into `ttsrManager`\n\nNet effect: injected-rule suppression is persisted/restored across session reload/resume for the current branch path.\n\n## 8. Race boundaries and ordering guarantees\n\n### Abort vs retry callback\n\n- abort is synchronous from TTSR handler perspective (`agent.abort()` called immediately)\n- retry is deferred by timer (`50ms`)\n- extension notification is asynchronous and intentionally not awaited before abort/retry scheduling\n\n### Multiple matches in same stream window\n\n`checkDelta()` returns all currently matching eligible rules for that scoped buffer. Pending injections are deduplicated by rule name before injection.\n\n### Between abort and continue\n\nDuring the timer window, state can change (user interruption, mode actions, additional events). The retry call is best-effort: `agent.continue().catch(() => {})` swallows follow-up errors.\n\n## 9. Edge cases summary\n\n- Invalid `condition` regex: skipped with warning; other conditions/rules continue.\n- Duplicate rule names at capability layer: lower-priority duplicates are shadowed before registration.\n- Duplicate names at manager layer: second registration is ignored.\n- `contextMode: \"keep\"`: partial violating output can remain in context before reminder retry.\n- `interruptMode: \"never\"`: prose-source matches queue a deferred hidden injection after a successful assistant message; tool-source matches fold an in-band `<system-reminder>` into the matched tool call's `toolResult` content via the `afterToolCall` hook (no mid-stream abort, no separate follow-up turn).\n- Tool-source non-interrupting buckets are cleared when the parent assistant message ends with `stopReason === \"aborted\"` or `\"error\"`, so rules whose target tool never produced a result remain eligible to re-trigger.\n- Repeat-after-gap depends on turn count increments at `turn_end`; mid-turn chunks do not advance gap counters.\n",
100
100
  "tui-runtime-internals.md": "# TUI runtime internals\n\nThis document maps the non-theme runtime path from terminal input to rendered output in interactive mode. It focuses on behavior in `packages/tui` and its integration from `packages/coding-agent` controllers.\n\n## Runtime layers and ownership\n\n- **`packages/tui` engine**: terminal lifecycle, stdin normalization, focus routing, render scheduling, differential painting, overlay composition, hardware cursor placement.\n- **`packages/coding-agent` interactive mode**: builds component tree, binds editor callbacks and keymaps, reacts to agent/session events, and translates domain state (streaming, tool execution, retries, plan mode) into UI components.\n\nBoundary rule: the TUI engine is message-agnostic. It only knows `Component.render(width)`, `handleInput(data)`, focus, and overlays. Agent semantics stay in interactive controllers.\n\n## Implementation files\n\n- [`../src/modes/interactive-mode.ts`](../packages/coding-agent/src/modes/interactive-mode.ts)\n- [`../src/modes/controllers/event-controller.ts`](../packages/coding-agent/src/modes/controllers/event-controller.ts)\n- [`../src/modes/controllers/input-controller.ts`](../packages/coding-agent/src/modes/controllers/input-controller.ts)\n- [`../src/modes/components/custom-editor.ts`](../packages/coding-agent/src/modes/components/custom-editor.ts)\n- [`../../tui/src/tui.ts`](../packages/tui/src/tui.ts)\n- [`../../tui/src/terminal.ts`](../packages/tui/src/terminal.ts)\n- [`../../tui/src/editor-component.ts`](../packages/tui/src/editor-component.ts)\n- [`../../tui/src/stdin-buffer.ts`](../packages/tui/src/stdin-buffer.ts)\n- [`../../tui/src/components/loader.ts`](../packages/tui/src/components/loader.ts)\n\n## Boot and component tree assembly\n\n`InteractiveMode` constructs `TUI(new ProcessTerminal(), settings.get(\"showHardwareCursor\"))`, applies `settings.get(\"clearOnShrink\")`, and creates persistent containers:\n\n- `chatContainer`\n- `pendingMessagesContainer`\n- `statusContainer`\n- `todoContainer`\n- `btwContainer`\n- `statusLine`\n- `hookWidgetContainerAbove`\n- `editorContainer` (holds `CustomEditor`)\n- `hookWidgetContainerBelow`\n\n`init()` wires the tree in that order, focuses the editor, registers input handlers via `InputController`, subscribes terminal appearance changes into theme auto-detection, starts TUI, and requests a forced render.\nA forced render (`requestRender(true)`) resets previous-line caches and cursor bookkeeping before repainting.\n\n## Terminal lifecycle and stdin normalization\n\n`ProcessTerminal.start()`:\n\n1. Enables raw mode and bracketed paste.\n2. Attaches resize handler.\n3. Creates a `StdinBuffer` to split partial escape chunks into complete sequences.\n4. Queries Kitty keyboard protocol support (`CSI ? u`), then enables protocol flags if supported; otherwise enables modifyOtherKeys fallback after a short timeout.\n5. Queries OSC 11 background color and enables Mode 2031 appearance notifications for dark/light theme detection.\n6. On Windows, attempts VT input enablement via `kernel32` mode flags.\n `StdinBuffer` behavior:\n\n- Buffers fragmented escape sequences (CSI/OSC/DCS/APC/SS3).\n- Emits `data` only when a sequence is complete or timeout-flushed.\n- Detects bracketed paste and emits a `paste` event with raw pasted text.\n\nThis prevents partial escape chunks from being misinterpreted as normal keypresses.\n\n## Input routing and focus model\n\nInput path:\n\n`stdin -> ProcessTerminal -> StdinBuffer -> TUI.#handleInput -> focusedComponent.handleInput`\n\nRouting details:\n\n1. TUI runs registered input listeners first (`addInputListener`), allowing consume/transform behavior.\n2. TUI handles global debug shortcut (`shift+ctrl+d`) before component dispatch.\n3. If focused component belongs to an overlay that is now hidden/invisible, TUI reassigns focus to next visible overlay or saved pre-overlay focus.\n4. Key release events are filtered unless focused component sets `wantsKeyRelease = true`.\n5. After dispatch, TUI schedules render.\n\n`setFocus()` also toggles `Focusable.focused`, which controls whether components emit `CURSOR_MARKER` for hardware cursor placement.\n\n## Key handling split: editor vs controller\n\n`CustomEditor` intercepts high-priority combos first (escape, ctrl-c/d/z, ctrl-v, ctrl-p variants, ctrl-t, alt-up, extension custom keys) and delegates the rest to base `Editor` behavior (text editing, history, autocomplete, cursor movement).\n\n`InputController.setupKeyHandlers()` then binds editor callbacks to mode actions:\n\n- cancellation / mode exits on `Escape`\n- shutdown on double `Ctrl+C` or empty-editor `Ctrl+D`\n- suspend/resume on `Ctrl+Z`\n- slash-command and selector hotkeys\n- follow-up/dequeue toggles and expansion toggles\n\nThis keeps key parsing/editor mechanics in `packages/tui` and mode semantics in coding-agent controllers.\n\n## Render loop and diffing strategy\n\n`TUI.requestRender()` is debounced to one render per tick using `process.nextTick`. Multiple state changes in the same turn coalesce.\n\n`#doRender()` pipeline:\n\n1. Render root component tree to `newLines`.\n2. Composite visible overlays (if any).\n3. Extract and strip `CURSOR_MARKER` from visible viewport lines.\n4. Append segment reset suffixes for non-image lines.\n5. Choose full repaint vs differential patch:\n - first frame\n - width change\n - shrink with `clearOnShrink` enabled and no overlays\n - edits above previous viewport\n6. For differential updates, patch only changed line range and clear stale trailing lines when needed.\n7. Reposition hardware cursor for IME support.\n\nRender writes use synchronized output mode (`CSI ? 2026 h/l`) to reduce flicker/tearing.\n\n## Render safety constraints\n\nCritical safety checks in `TUI`:\n\n- Non-image rendered lines are expected to fit terminal width; the differential path truncates overwide lines as a last-resort guard and can write debug diagnostics when redraw debugging is enabled.\n- Overlay compositing includes defensive truncation and post-composite width guarding.\n- Width changes force full redraw because wrapping semantics change.\n- Cursor position is clamped before movement.\n\nThese constraints are runtime guards plus component conventions; renderers should still return width-safe lines rather than rely on truncation.\n\n## Resize handling\n\nResize events are event-driven from `ProcessTerminal` to `TUI.requestRender()`.\n\nEffects:\n\n- Width changes trigger full redraw.\n- Height changes trigger full redraw except in Termux and terminal multiplexers, where the renderer avoids scrollback-hostile full replays.\n- Viewport/top tracking (`#previousViewportTop`, `#maxLinesRendered`) avoids invalid relative cursor math when content or terminal size changes.\n- Overlay visibility can depend on terminal dimensions (`OverlayOptions.visible`); focus is corrected when overlays become non-visible after resize.\n\n## Streaming and incremental UI updates\n\n`EventController` subscribes to `AgentSessionEvent` and updates UI incrementally:\n\n- `agent_start`: starts loader in `statusContainer`.\n- `message_start` assistant: creates `streamingComponent` and mounts it.\n- `message_update`: updates streaming assistant content; creates/updates tool execution components as tool calls appear.\n- `tool_execution_update/end`: updates tool result components and completion state.\n- `message_end`: finalizes assistant stream, handles aborted/error annotations, marks pending tool args complete on normal stop.\n- `agent_end`: stops loaders, clears transient stream state, flushes deferred model switch, issues completion notification if backgrounded.\n\nRead-tool grouping is intentionally stateful (`#lastReadGroup`) to coalesce consecutive read tool calls into one visual block until a non-read break occurs.\n\n## Status and loader orchestration\n\nStatus lane ownership:\n\n- `statusContainer` holds transient loaders (`loadingAnimation`, `autoCompactionLoader`, `retryLoader`).\n- `statusLine` renders persistent status/hooks/plan indicators and drives editor top border updates.\n\nLoader behavior:\n\n- `Loader` updates every 80ms via interval and requests render each frame.\n- Escape handlers are temporarily overridden during auto-compaction and auto-retry to cancel those operations.\n- On end/cancel paths, controllers restore prior escape handlers and stop/clear loader components.\n\n## Mode transitions and backgrounding\n\n### Bash/Python input modes\n\nInput text prefixes toggle editor border mode flags:\n\n- `!` -> bash mode\n- `$` (non-template literal prefix) -> python mode\n\nEscape exits inactive mode by clearing editor text and restoring border color; when execution is active, escape aborts the running task instead.\n\n### Plan mode\n\n`InteractiveMode` tracks plan mode flags, status-line state, active tools, and model switching. Enter/exit updates session mode entries and status/UI state, including deferred model switch if streaming is active.\n\n### Suspend/resume (`Ctrl+Z`)\n\n`InputController.handleCtrlZ()`:\n\n1. Registers one-shot `SIGCONT` handler to restart TUI and force render.\n2. Stops TUI before suspend.\n3. Sends `SIGTSTP` to process group.\n\n### Background mode (`/background` or `/bg`)\n\n`handleBackgroundCommand()`:\n\n- Rejects when idle.\n- Switches tool UI context to non-interactive (`hasUI=false`) so interactive UI tools fail fast.\n- Stops loaders/status line and unsubscribes foreground event handler.\n- Subscribes background event handler (primarily waits for `agent_end`).\n- Stops TUI and sends `SIGTSTP` (POSIX job control path).\n\nOn `agent_end` in background with no queued work, controller sends completion notification and shuts down.\n\n## Cancellation paths\n\nPrimary cancellation inputs:\n\n- `Escape` during active stream loader: restores queued messages to editor and aborts agent.\n- `Escape` during bash/python execution: aborts running command.\n- `Escape` during auto-compaction/retry: invokes dedicated abort methods through temporary escape handlers.\n- `Ctrl+C` single press: clear editor; double press within 500ms: shutdown.\n\nCancellation is state-conditional; same key can mean abort, mode-exit, selector trigger, or no-op depending on runtime state.\n\n## Event-driven vs throttled behavior\n\nEvent-driven updates:\n\n- Agent session events (`EventController`)\n- Key input callbacks (`InputController`)\n- terminal resize callback\n- terminal appearance callbacks, SIGWINCH theme reevaluation, and git branch watchers in `InteractiveMode`\n\nThrottled/debounced paths:\n\n- TUI rendering is tick-debounced (`requestRender` coalescing).\n- Loader animation is fixed-interval (80ms), each frame requesting render.\n- Editor autocomplete updates (inside `Editor`) use debounce timers, reducing recompute churn during typing.\n\nThe runtime therefore mixes event-driven state transitions with bounded render cadence to keep interactivity responsive without repaint storms.\n",
@@ -3,7 +3,7 @@ import type * as fsNode from "node:fs";
3
3
  import * as fs from "node:fs/promises";
4
4
  import * as path from "node:path";
5
5
  import type { AgentMessage } from "@oh-my-pi/pi-agent-core";
6
- import { completeSimple, Effort, type Model } from "@oh-my-pi/pi-ai";
6
+ import { clampThinkingLevelForModel, completeSimple, Effort, type Model } from "@oh-my-pi/pi-ai";
7
7
  import { getAgentDbPath, getMemoriesDir, logger, parseJsonlLenient, prompt } from "@oh-my-pi/pi-utils";
8
8
  import type { ModelRegistry } from "../config/model-registry";
9
9
  import { resolveModelRoleValue } from "../config/model-resolver";
@@ -612,7 +612,7 @@ async function runStage1Job(options: {
612
612
  apiKey,
613
613
  metadata: options.metadata,
614
614
  maxTokens: Math.max(1024, Math.min(4096, Math.floor(modelMaxTokens * 0.2))),
615
- reasoning: Effort.Low,
615
+ reasoning: clampThinkingLevelForModel(model, Effort.Low),
616
616
  },
617
617
  );
618
618
 
@@ -744,7 +744,12 @@ async function runConsolidationModel(options: {
744
744
  {
745
745
  messages: [{ role: "user", content: [{ type: "text", text: input }], timestamp: Date.now() }],
746
746
  },
747
- { apiKey, metadata: options.metadata, maxTokens: 8192, reasoning: Effort.Medium },
747
+ {
748
+ apiKey,
749
+ metadata: options.metadata,
750
+ maxTokens: 8192,
751
+ reasoning: clampThinkingLevelForModel(model, Effort.Medium),
752
+ },
748
753
  );
749
754
  if (response.stopReason === "error") {
750
755
  throw new Error(response.errorMessage || "phase2 model error");
@@ -1,5 +1,6 @@
1
1
  import { Editor, type KeyId, matchesKey, parseKittySequence } from "@oh-my-pi/pi-tui";
2
2
  import type { AppKeybinding } from "../../config/keybindings";
3
+ import { highlightUltrathink } from "../ultrathink";
3
4
 
4
5
  type ConfigurableEditorAction = Extract<
5
6
  AppKeybinding,
@@ -44,6 +45,8 @@ const DEFAULT_ACTION_KEYS: Record<ConfigurableEditorAction, KeyId[]> = {
44
45
  * Custom editor that handles configurable app-level shortcuts for coding-agent.
45
46
  */
46
47
  export class CustomEditor extends Editor {
48
+ /** Rainbow-highlight the "ultrathink" keyword as the user types it. */
49
+ decorateText = highlightUltrathink;
47
50
  onEscape?: () => void;
48
51
  shouldBypassAutocompleteOnEscape?: () => boolean;
49
52
  onClear?: () => void;