@oh-my-pi/pi-coding-agent 14.9.9 → 15.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. package/CHANGELOG.md +82 -0
  2. package/package.json +7 -7
  3. package/scripts/format-prompts.ts +1 -1
  4. package/src/cli/args.ts +2 -2
  5. package/src/cli.ts +1 -0
  6. package/src/commands/acp.ts +24 -0
  7. package/src/commands/launch.ts +6 -4
  8. package/src/commit/agentic/prompts/system.md +1 -1
  9. package/src/config/model-resolver.ts +30 -0
  10. package/src/config/settings-schema.ts +31 -0
  11. package/src/edit/index.ts +22 -1
  12. package/src/edit/modes/patch.ts +10 -0
  13. package/src/edit/modes/replace.ts +3 -0
  14. package/src/edit/renderer.ts +10 -0
  15. package/src/eval/js/context-manager.ts +1 -1
  16. package/src/eval/js/shared/rewrite-imports.ts +120 -48
  17. package/src/eval/js/shared/runtime.ts +31 -4
  18. package/src/eval/js/tool-bridge.ts +43 -21
  19. package/src/extensibility/extensions/runner.ts +54 -1
  20. package/src/extensibility/extensions/types.ts +11 -0
  21. package/src/extensibility/skills.ts +33 -1
  22. package/src/internal-urls/docs-index.generated.ts +6 -6
  23. package/src/internal-urls/index.ts +1 -0
  24. package/src/internal-urls/issue-pr-protocol.ts +577 -0
  25. package/src/internal-urls/router.ts +6 -3
  26. package/src/internal-urls/types.ts +22 -1
  27. package/src/main.ts +13 -9
  28. package/src/modes/acp/acp-agent.ts +361 -54
  29. package/src/modes/acp/acp-client-bridge.ts +152 -0
  30. package/src/modes/acp/acp-event-mapper.ts +180 -15
  31. package/src/modes/acp/terminal-auth.ts +37 -0
  32. package/src/modes/components/read-tool-group.ts +29 -1
  33. package/src/modes/controllers/command-controller.ts +14 -6
  34. package/src/modes/controllers/event-controller.ts +24 -11
  35. package/src/modes/controllers/extension-ui-controller.ts +8 -2
  36. package/src/modes/controllers/input-controller.ts +72 -39
  37. package/src/modes/interactive-mode.ts +71 -7
  38. package/src/modes/rpc/rpc-mode.ts +17 -2
  39. package/src/modes/types.ts +6 -2
  40. package/src/modes/utils/ui-helpers.ts +15 -3
  41. package/src/prompts/agents/designer.md +5 -5
  42. package/src/prompts/agents/explore.md +7 -7
  43. package/src/prompts/agents/init.md +9 -9
  44. package/src/prompts/agents/librarian.md +14 -14
  45. package/src/prompts/agents/plan.md +4 -4
  46. package/src/prompts/agents/reviewer.md +5 -5
  47. package/src/prompts/agents/task.md +10 -10
  48. package/src/prompts/commands/orchestrate.md +2 -2
  49. package/src/prompts/compaction/branch-summary.md +3 -3
  50. package/src/prompts/compaction/compaction-short-summary.md +7 -7
  51. package/src/prompts/compaction/compaction-summary-context.md +1 -1
  52. package/src/prompts/compaction/compaction-summary.md +5 -5
  53. package/src/prompts/compaction/compaction-turn-prefix.md +3 -3
  54. package/src/prompts/compaction/compaction-update-summary.md +11 -11
  55. package/src/prompts/memories/consolidation.md +2 -2
  56. package/src/prompts/memories/read-path.md +1 -1
  57. package/src/prompts/memories/stage_one_input.md +1 -1
  58. package/src/prompts/memories/stage_one_system.md +5 -5
  59. package/src/prompts/review-request.md +4 -4
  60. package/src/prompts/system/agent-creation-architect.md +17 -17
  61. package/src/prompts/system/agent-creation-user.md +2 -2
  62. package/src/prompts/system/commit-message-system.md +2 -2
  63. package/src/prompts/system/custom-system-prompt.md +2 -2
  64. package/src/prompts/system/eager-todo.md +6 -6
  65. package/src/prompts/system/handoff-document.md +1 -1
  66. package/src/prompts/system/plan-mode-active.md +22 -21
  67. package/src/prompts/system/plan-mode-approved.md +4 -4
  68. package/src/prompts/system/plan-mode-compact-instructions.md +16 -0
  69. package/src/prompts/system/plan-mode-reference.md +2 -2
  70. package/src/prompts/system/plan-mode-subagent.md +8 -8
  71. package/src/prompts/system/plan-mode-tool-decision-reminder.md +2 -2
  72. package/src/prompts/system/project-prompt.md +4 -4
  73. package/src/prompts/system/subagent-system-prompt.md +7 -7
  74. package/src/prompts/system/subagent-yield-reminder.md +4 -4
  75. package/src/prompts/system/system-prompt.md +72 -71
  76. package/src/prompts/system/ttsr-interrupt.md +1 -1
  77. package/src/prompts/tools/apply-patch.md +1 -1
  78. package/src/prompts/tools/ast-edit.md +3 -3
  79. package/src/prompts/tools/ast-grep.md +3 -3
  80. package/src/prompts/tools/browser.md +3 -3
  81. package/src/prompts/tools/checkpoint.md +3 -3
  82. package/src/prompts/tools/exit-plan-mode.md +2 -2
  83. package/src/prompts/tools/find.md +3 -3
  84. package/src/prompts/tools/github.md +2 -5
  85. package/src/prompts/tools/hashline.md +6 -6
  86. package/src/prompts/tools/image-gen.md +3 -3
  87. package/src/prompts/tools/irc.md +1 -1
  88. package/src/prompts/tools/lsp.md +2 -2
  89. package/src/prompts/tools/patch.md +6 -6
  90. package/src/prompts/tools/read.md +7 -7
  91. package/src/prompts/tools/replace.md +5 -5
  92. package/src/prompts/tools/retain.md +1 -1
  93. package/src/prompts/tools/rewind.md +2 -2
  94. package/src/prompts/tools/search.md +2 -2
  95. package/src/prompts/tools/ssh.md +2 -2
  96. package/src/prompts/tools/task.md +12 -6
  97. package/src/prompts/tools/web-search.md +2 -2
  98. package/src/prompts/tools/write.md +3 -3
  99. package/src/sdk.ts +69 -12
  100. package/src/session/agent-session.ts +231 -22
  101. package/src/session/client-bridge.ts +81 -0
  102. package/src/session/compaction/errors.ts +31 -0
  103. package/src/session/compaction/index.ts +1 -0
  104. package/src/slash-commands/acp-builtins.ts +46 -0
  105. package/src/slash-commands/builtin-registry.ts +699 -116
  106. package/src/slash-commands/helpers/context-report.ts +39 -0
  107. package/src/slash-commands/helpers/format.ts +23 -0
  108. package/src/slash-commands/helpers/marketplace-manager.ts +25 -0
  109. package/src/slash-commands/helpers/mcp.ts +532 -0
  110. package/src/slash-commands/helpers/parse.ts +85 -0
  111. package/src/slash-commands/helpers/ssh.ts +193 -0
  112. package/src/slash-commands/helpers/todo.ts +279 -0
  113. package/src/slash-commands/helpers/usage-report.ts +91 -0
  114. package/src/slash-commands/types.ts +126 -0
  115. package/src/task/executor.ts +10 -3
  116. package/src/task/index.ts +17 -1
  117. package/src/task/render.ts +6 -3
  118. package/src/tools/bash.ts +176 -2
  119. package/src/tools/conflict-detect.ts +6 -6
  120. package/src/tools/fetch.ts +15 -4
  121. package/src/tools/find.ts +19 -1
  122. package/src/tools/gh-renderer.ts +0 -12
  123. package/src/tools/gh.ts +682 -176
  124. package/src/tools/github-cache.ts +548 -0
  125. package/src/tools/index.ts +3 -0
  126. package/src/tools/read.ts +110 -27
  127. package/src/tools/write.ts +23 -1
  128. package/src/tui/code-cell.ts +70 -2
@@ -48,7 +48,7 @@ export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
48
48
  "session-switching-and-recent-listing.md": "# Session switching and recent session listing\n\nThis document describes how coding-agent discovers recent sessions, resolves `--resume` targets, presents session pickers, and switches the active runtime session.\n\nIt focuses on current implementation behavior, including fallback paths and caveats.\n\n## Implementation files\n\n- [`../src/session/session-manager.ts`](../packages/coding-agent/src/session/session-manager.ts)\n- [`../src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts)\n- [`../src/cli/session-picker.ts`](../packages/coding-agent/src/cli/session-picker.ts)\n- [`../src/modes/components/session-selector.ts`](../packages/coding-agent/src/modes/components/session-selector.ts)\n- [`../src/modes/controllers/selector-controller.ts`](../packages/coding-agent/src/modes/controllers/selector-controller.ts)\n- [`../src/main.ts`](../packages/coding-agent/src/main.ts)\n- [`../src/sdk.ts`](../packages/coding-agent/src/sdk.ts)\n- [`../src/modes/interactive-mode.ts`](../packages/coding-agent/src/modes/interactive-mode.ts)\n- [`../src/modes/utils/ui-helpers.ts`](../packages/coding-agent/src/modes/utils/ui-helpers.ts)\n\n## Recent-session discovery\n\n### Directory scope\n\n`SessionManager` stores sessions under a cwd-scoped directory by default:\n\n- `~/.omp/agent/sessions/--<cwd-encoded>--/*.jsonl`\n\n`SessionManager.list(cwd, sessionDir?)` reads only that directory unless an explicit `sessionDir` is provided.\n\n### Two listing paths with different payloads\n\nThere are two different listing pipelines:\n\n1. `getRecentSessions(sessionDir, limit)` (welcome/summary view)\n - Reads only a 4KB prefix (`readTextPrefix(..., 4096)`) from each file.\n - Parses header + earliest user text preview.\n - Returns lightweight `RecentSessionInfo` with lazy `name` and `timeAgo` getters.\n - Sorts by file `mtime` descending.\n\n2. `SessionManager.list(...)` / `SessionManager.listAll()` (resume pickers and ID matching)\n - Reads full session files.\n - Builds `SessionInfo` objects (`id`, `cwd`, `title`, `messageCount`, `firstMessage`, `allMessagesText`, timestamps).\n - Drops sessions with zero `message` entries.\n - Sorts by `modified` descending.\n\n### Metadata fallback behavior\n\nFor recent summaries (`RecentSessionInfo`):\n\n- display name preference: `header.title` -> first user prompt -> `header.id` -> filename\n- name is truncated to 40 chars for compact displays\n- control characters/newlines are stripped/sanitized from title-derived names\n\nFor `SessionInfo` list entries:\n\n- `title` is `header.title` or latest compaction `shortSummary`\n- `firstMessage` is first user message text or `\"(no messages)\"`\n\n## `--continue` resolution and terminal breadcrumb preference\n\n`SessionManager.continueRecent(cwd, sessionDir?)` resolves the target in this order:\n\n1. Read terminal-scoped breadcrumb (`~/.omp/agent/terminal-sessions/<terminal-id>`)\n2. Validate breadcrumb:\n - current terminal can be identified\n - breadcrumb cwd matches current cwd (resolved path compare)\n - referenced file still exists\n3. If breadcrumb is invalid/missing, fall back to newest file by mtime in the session dir (`findMostRecentSession`)\n4. If none found, create a new session\n\nTerminal ID derivation prefers TTY path and falls back to env-based identifiers (`KITTY_WINDOW_ID`, `TMUX_PANE`, `TERM_SESSION_ID`, `WT_SESSION`).\n\nBreadcrumb writes are best-effort and non-fatal.\n\n## Startup-time resume target resolution (`main.ts`)\n\n### `--resume <value>`\n\n`createSessionManager(...)` handles string-valued `--resume` in two modes:\n\n1. Path-like value (contains `/`, `\\\\`, or ends with `.jsonl`)\n - direct `SessionManager.open(sessionArg, parsed.sessionDir)`\n\n2. ID prefix value\n - find match in `SessionManager.list(cwd, sessionDir)` by `id.startsWith(sessionArg)`\n - if no local match and `sessionDir` is not forced, try `SessionManager.listAll()`\n - first match is used (no ambiguity prompt)\n\nCross-project match behavior:\n\n- if matched session cwd differs from current cwd, CLI prompts whether to fork into current project\n- yes -> `SessionManager.forkFrom(...)`\n- no -> throws error (`Session \"...\" is in another project (...)`)\n\nNo match -> throws error (`Session \"...\" not found.`).\n\n### `--resume` (no value)\n\nHandled after initial session-manager construction:\n\n1. list local sessions with `SessionManager.list(cwd, parsed.sessionDir)`\n2. if empty: print `No sessions found` and exit early\n3. open TUI picker (`selectSession`)\n4. if canceled: print `No session selected` and exit early\n5. if selected: `SessionManager.open(selectedPath)`\n\n### `--continue`\n\nUses `SessionManager.continueRecent(...)` directly (breadcrumb-first behavior above).\n\n## Picker-based selection internals\n\n## CLI picker (`src/cli/session-picker.ts`)\n\n`selectSession(sessions)` creates a standalone TUI with `SessionSelectorComponent` and resolves exactly once:\n\n- selection -> resolves selected path\n- cancel (Esc) -> resolves `null`\n- hard exit (Ctrl+C path) -> stops TUI and `process.exit(0)`\n\n## Interactive in-session picker (`SelectorController.showSessionSelector`)\n\nFlow:\n\n1. fetch sessions from current session dir via `SessionManager.list(currentCwd, currentSessionDir)`\n2. mount `SessionSelectorComponent` in editor area using `showSelector(...)`\n3. callbacks:\n - select -> close selector and call `handleResumeSession(sessionPath)`\n - cancel -> restore editor and rerender\n - exit -> `ctx.shutdown()`\n\n## Session selector component behavior\n\n`SessionList` supports:\n\n- arrow/page navigation\n- Enter to select\n- Esc to cancel\n- Ctrl+C to exit\n- fuzzy search across session id/title/cwd/first message/all messages/path\n\nEmpty-list render behavior:\n\n- renders a message instead of crashing\n- Enter on empty does nothing (no callback)\n- Esc/Ctrl+C still work\n\nCaveat: UI text says `Press Tab to view all`, but this component currently has no Tab handler and current wiring only lists current-scope sessions.\n\n## Runtime switch execution (`AgentSession.switchSession`)\n\n`switchSession(sessionPath)` is the core in-process switch path.\n\nLifecycle/state transition:\n\n1. capture `previousSessionFile`\n2. emit `session_before_switch` hook event (`reason: \"resume\"`, cancellable)\n3. if canceled -> return `false` with no switch\n4. disconnect from current agent event stream\n5. abort active generation/tool flow\n6. clear queued steering/follow-up/next-turn message buffers\n7. flush session writer (`sessionManager.flush()`) to persist pending writes\n8. `sessionManager.setSessionFile(sessionPath)`\n - updates session file pointer\n - writes terminal breadcrumb\n - loads entries / migrates / blob-resolves / reindexes\n - if missing/invalid file data: initializes a new session at that path and rewrites header\n9. update `agent.sessionId`\n10. rebuild display context via `buildDisplaySessionContext()`\n11. restore persisted/discovered MCP tool selections and rebuild active tools/system prompt when discovery is enabled\n12. emit `session_switch` hook event (`reason: \"resume\"`, `previousSessionFile`)\n13. replace agent messages with rebuilt context and sync todos\n14. close provider sessions when switching to a different session or when same-session reload changed replay messages\n15. restore default model from `sessionContext.models.default` if available and present in model registry\n16. restore thinking level and service tier:\n - thinking uses persisted `thinking_level_change`, otherwise the configured default clamped to model capability\n - service tier uses persisted `service_tier_change`, otherwise the configured `serviceTier` setting (`\"none\"` becomes unset)\n17. reconnect agent listeners and return `true`\n\n## UI state rebuild after interactive switch\n\n`SelectorController.handleResumeSession` performs UI reset around `switchSession`:\n\n- stop loading animation\n- clear status container\n- clear pending-message UI and pending tool map\n- reset streaming component/message references\n- call `session.switchSession(...)`\n- clear chat container and rerender from session context (`renderInitialMessages`)\n- reload todos from new session artifacts\n- show `Resumed session`\n\nSo visible conversation/todo state is rebuilt from the new session file.\n\n## Startup resume vs in-session switch\n\n### Startup resume (`--continue`, `--resume`, direct open)\n\n- Session file is chosen before `createAgentSession(...)`.\n- `sdk.ts` builds `existingSession = sessionManager.buildSessionContext()`.\n- Agent messages are restored once during session creation.\n- Model/thinking are selected during creation (including restore/fallback logic).\n- Interactive mode then runs `#restoreModeFromSession()` to re-enter persisted mode state (currently plan/plan_paused).\n\n### In-session switch (`/resume`-style selector path)\n\n- Uses `AgentSession.switchSession(...)` on an already-running `AgentSession`.\n- Messages/model/thinking are rebuilt immediately in place.\n- Hook `session_before_switch`/`session_switch` events are emitted.\n- UI chat/todos are refreshed.\n- No dedicated post-switch mode restore call is made in selector flow; mode re-entry behavior is not symmetric with startup `#restoreModeFromSession()`.\n\n## Failure and edge-case behavior\n\n### Cancellation paths\n\n- CLI picker cancel -> returns `null`, caller prints `No session selected`, process exits early.\n- Interactive picker cancel -> editor restored, no session change.\n- Hook cancellation (`session_before_switch`) -> `switchSession()` returns `false`.\n\n### Empty list paths\n\n- CLI `--resume` (no value): empty list prints `No sessions found` and exits.\n- Interactive selector: empty list renders message and remains cancellable.\n\n### Missing/invalid target session file\n\nWhen opening/switching to a specific path (`setSessionFile`):\n\n- ENOENT -> treated as empty -> new session initialized at that exact path and persisted.\n- malformed/invalid header (or effectively unreadable parsed entries) -> treated as empty -> new session initialized and persisted.\n\nThis is recovery behavior, not hard failure.\n\n### Hard failures\n\nSwitch/open can still throw on true I/O failures (permission errors, rewrite failures, etc.), which propagate to callers.\n\n### ID prefix matching caveats\n\n- ID matching uses `startsWith` and takes first match in sorted list.\n- No ambiguity UI if multiple sessions share prefix.\n- `SessionManager.list(...)` excludes sessions with zero messages, so those sessions are not resumable via ID match/list picker.\n",
49
49
  "session-tree-plan.md": "# Session tree architecture (current)\n\nReference: [session.md](../docs/session.md)\n\nThis document describes how session tree navigation works today: in-memory tree model, leaf movement rules, branching behavior, and extension/event integration.\n\n## What this subsystem is\n\nThe session is stored as an append-only entry log, but runtime behavior is tree-based:\n\n- Every non-header entry has `id` and `parentId`.\n- The active position is `leafId` in `SessionManager`.\n- Appending an entry always creates a child of the current leaf.\n- Branching does **not** rewrite history; it only changes where the leaf points before the next append.\n\nKey files:\n\n- `src/session/session-manager.ts` — tree data model, traversal, leaf movement, branch/session extraction\n- `src/session/agent-session.ts` — `/tree` navigation flow, summarization, hook/event emission\n- `src/modes/components/tree-selector.ts` — interactive tree UI behavior and filtering\n- `src/modes/controllers/selector-controller.ts` — selector orchestration for `/tree` and `/branch`\n- `src/modes/controllers/input-controller.ts` — command routing (`/tree`, `/branch`, double-escape behavior)\n- `src/session/messages.ts` — conversion of `branch_summary`, `compaction`, and `custom_message` entries into LLM context messages\n\n## Tree data model in `SessionManager`\n\nRuntime indices:\n\n- `#byId: Map<string, SessionEntry>` — fast lookup for any entry\n- `#leafId: string | null` — current position in the tree\n- `#labelsById: Map<string, string>` — resolved labels by target entry id\n\nTree APIs:\n\n- `getBranch(fromId?)` walks parent links to root and returns root→node path\n- `getTree()` returns `SessionTreeNode[]` (`entry`, `children`, `label`)\n - parent links become children arrays\n - entries with missing parents are treated as roots\n - children are sorted oldest→newest by timestamp\n- `getChildren(parentId)` returns direct children\n- `getLabel(id)` resolves current label from `labelsById`\n\n`getTree()` is a runtime projection; persistence remains append-only JSONL entries.\n\n## Leaf movement semantics\n\nThere are three leaf movement primitives:\n\n1. `branch(entryId)`\n - Validates entry exists\n - Sets `leafId = entryId`\n - No new entry is written\n\n2. `resetLeaf()`\n - Sets `leafId = null`\n - Next append creates a new root entry (`parentId = null`)\n\n3. `branchWithSummary(branchFromId, summary, details?, fromExtension?)`\n - Accepts `branchFromId: string | null`\n - Sets `leafId = branchFromId`\n - Appends a `branch_summary` entry as child of that leaf\n - When `branchFromId` is `null`, `fromId` is persisted as `\"root\"`\n\n## `/tree` navigation behavior (same session file)\n\n`AgentSession.navigateTree()` is navigation, not file forking.\n\nFlow:\n\n1. Validate target and compute abandoned path (`collectEntriesForBranchSummary`)\n2. Emit `session_before_tree` with `TreePreparation`\n3. Optionally summarize abandoned entries (hook-provided summary or built-in summarizer)\n4. Compute new leaf target:\n - selecting a **user** message: leaf moves to its parent, and message text is returned for editor prefill\n - selecting a **custom_message**: same rule as user message (leaf = parent, text prefills editor)\n - selecting any other entry: leaf = selected entry id\n5. Apply leaf move:\n - with summary: `branchWithSummary(newLeafId, ...)`\n - without summary and `newLeafId === null`: `resetLeaf()`\n - otherwise: `branch(newLeafId)`\n6. Rebuild agent context from new leaf and emit `session_tree`\n\nImportant: summary entries are attached at the **new navigation position**, not on the abandoned branch tail.\n\n## `/branch` behavior (new session file)\n\n`/branch` and `/tree` are intentionally different:\n\n- `/tree` navigates within the current session file.\n- `/branch` creates a new session branch file (or in-memory replacement for non-persistent mode).\n\nUser-facing `/branch` flow (`SelectorController.showUserMessageSelector` → `AgentSession.branch`):\n\n- Branch source must be a **user message**.\n- Selected user text is extracted for editor prefill.\n- If selected user message is root (`parentId === null`): start a new session via `newSession({ parentSession: previousSessionFile })`.\n- Otherwise: `createBranchedSession(selectedEntry.parentId)` to fork history up to the selected prompt boundary.\n\n`SessionManager.createBranchedSession(leafId)` specifics:\n\n- Builds root→leaf path via `getBranch(leafId)`; throws if missing.\n- Excludes existing `label` entries from copied path.\n- Rebuilds fresh label entries from resolved `labelsById` for entries that remain in path.\n- Persistent mode: writes new JSONL file and switches manager to it; returns new file path.\n- In-memory mode: replaces in-memory entries; returns `undefined`.\n\n## Context reconstruction and summary/custom integration\n\n`buildSessionContext()` (in `session-manager.ts`) resolves the active root→leaf path and builds effective LLM context state:\n\n- Tracks latest thinking/model/service-tier/mode/TTSR/MCP-selection state on path.\n- Handles latest compaction on path:\n - emits compaction summary first\n - replays kept messages from `firstKeptEntryId` to compaction point\n - then replays post-compaction messages\n- Includes `branch_summary` and `custom_message` entries as `AgentMessage` objects.\n\n`session/messages.ts` then maps these message types for model input:\n\n- `branchSummary` and `compactionSummary` become user-role templated context messages\n- `custom`/`hookMessage` become user-role content messages\n\nSo tree movement changes context by changing the active leaf path, not by mutating old entries.\n\n## Labels and tree UI behavior\n\nLabel persistence:\n\n- `appendLabelChange(targetId, label?)` writes `label` entries on the current leaf chain.\n- `labelsById` is updated immediately (set or delete).\n- `getTree()` resolves current label onto each returned node.\n\nTree selector behavior (`tree-selector.ts`):\n\n- Flattens tree for navigation, keeps active-path highlighting, and prioritizes displaying the active branch first.\n- Supports filter modes: `default`, `no-tools`, `user-only`, `labeled-only`, `all`.\n- Supports free-text search over rendered semantic content.\n- `Shift+L` opens inline label editing and writes via `appendLabelChange`.\n\nCommand routing:\n\n- `/tree` always opens tree selector.\n- `/branch` opens user-message selector unless `doubleEscapeAction=tree`, in which case it also uses tree selector UX.\n\n## Extension and hook touchpoints for tree operations\n\nCommand-time extension API (`ExtensionCommandContext`):\n\n- `branch(entryId)` — create branched session file\n- `navigateTree(targetId, { summarize? })` — move within current tree/file\n\nEvents around tree navigation:\n\n- `session_before_tree`\n - receives `TreePreparation`:\n - `targetId`\n - `oldLeafId`\n - `commonAncestorId`\n - `entriesToSummarize`\n - `userWantsSummary`\n - may cancel navigation\n - may provide summary payload used instead of built-in summarizer\n - receives abort `signal` (Escape cancellation path)\n- `session_tree`\n - emits `newLeafId`, `oldLeafId`\n - includes `summaryEntry` when a summary was created\n - `fromExtension` indicates summary origin\n\nAdjacent but related lifecycle hooks:\n\n- `session_before_branch` / `session_branch` for `/branch` flow\n- `session_before_compact`, `session.compacting`, `session_compact` for compaction entries that later affect tree-context reconstruction\n\n## Real constraints and edge conditions\n\n- `branch()` cannot target `null`; use `resetLeaf()` for root-before-first-entry state.\n- `branchWithSummary()` supports `null` target and records `fromId: \"root\"`.\n- Selecting current leaf in tree selector is a no-op.\n- Summarization requires an active model; if absent, summarize navigation fails fast.\n- If summarization is aborted, navigation is cancelled and leaf is unchanged.\n- In-memory sessions never return a branch file path from `createBranchedSession`.\n- Tree context reconstruction includes service-tier and MCP tool-selection state, but those entries do not become LLM messages.\n\n## Legacy compatibility still present\n\nSession migrations still run on load:\n\n- v1→v2 adds `id`/`parentId` and converts compaction index anchor to id anchor\n- v2→v3 migrates legacy `hookMessage` role to `custom`\n\nCurrent runtime behavior is version-3 tree semantics after migration.\n",
50
50
  "session.md": "# Session Storage and Entry Model\n\nThis document is the source of truth for how coding-agent sessions are represented, persisted, migrated, and reconstructed at runtime.\n\n## Scope\n\nCovers:\n\n- Session JSONL format and versioning\n- Entry taxonomy and tree semantics (`id`/`parentId` + leaf pointer)\n- Migration/compatibility behavior when loading old or malformed files\n- Context reconstruction (`buildSessionContext`)\n- Persistence guarantees, failure behavior, truncation/blob externalization\n- Storage abstractions (`FileSessionStorage`, `MemorySessionStorage`) and related utilities\n\nDoes not cover `/tree` UI rendering behavior beyond semantics that affect session data.\n\n## Implementation Files\n\n- [`src/session/session-manager.ts`](../packages/coding-agent/src/session/session-manager.ts)\n- [`src/session/messages.ts`](../packages/coding-agent/src/session/messages.ts)\n- [`src/session/session-storage.ts`](../packages/coding-agent/src/session/session-storage.ts)\n- [`src/session/history-storage.ts`](../packages/coding-agent/src/session/history-storage.ts)\n- [`src/session/blob-store.ts`](../packages/coding-agent/src/session/blob-store.ts)\n\n## On-Disk Layout\n\nDefault session file location:\n\n```text\n~/.omp/agent/sessions/--<cwd-encoded>--/<timestamp>_<sessionId>.jsonl\n```\n\n`<cwd-encoded>` is derived from the working directory by stripping leading slash and replacing `/`, `\\\\`, and `:` with `-`.\n\nBlob store location:\n\n```text\n~/.omp/agent/blobs/<sha256>\n```\n\nTerminal breadcrumb files are written under:\n\n```text\n~/.omp/agent/terminal-sessions/<terminal-id>\n```\n\nBreadcrumb content is two lines: original cwd, then session file path. `continueRecent()` prefers this terminal-scoped pointer before scanning most-recent mtime.\n\n## File Format\n\nSession files are JSONL: one JSON object per line.\n\n- Line 1 is always the session header (`type: \"session\"`).\n- Remaining lines are `SessionEntry` values.\n- Entries are append-only at runtime; branch navigation moves a pointer (`leafId`) rather than mutating existing entries.\n\n### Header (`SessionHeader`)\n\n```json\n{\n \"type\": \"session\",\n \"version\": 3,\n \"id\": \"1f9d2a6b9c0d1234\",\n \"timestamp\": \"2026-02-16T10:20:30.000Z\",\n \"cwd\": \"/work/pi\",\n \"title\": \"optional session title\",\n \"titleSource\": \"auto\",\n \"parentSession\": \"optional lineage marker\"\n}\n```\n\nNotes:\n\n- `version` is optional in v1 files; absence means v1.\n- `parentSession` is an opaque lineage string. Current code writes either a session id or a session path depending on flow (`fork`, `forkFrom`, `createBranchedSession`, or explicit `newSession({ parentSession })`). Treat as metadata, not a typed foreign key.\n\n### Entry Base (`SessionEntryBase`)\n\nAll non-header entries include:\n\n```json\n{\n \"type\": \"...\",\n \"id\": \"8-char-id\",\n \"parentId\": \"previous-or-branch-parent\",\n \"timestamp\": \"2026-02-16T10:20:30.000Z\"\n}\n```\n\n`parentId` can be `null` for a root entry (first append, or after `resetLeaf()`).\n\n## Entry Taxonomy\n\n`SessionEntry` is the union of:\n\n- `message`\n- `thinking_level_change`\n- `service_tier_change`\n- `compaction`\n- `branch_summary`\n- `custom`\n- `custom_message`\n- `label`\n- `ttsr_injection`\n- `session_init`\n- `mode_change`\n- `mcp_tool_selection`\n\n### `message`\n\nStores an `AgentMessage` directly.\n\n```json\n{\n \"type\": \"message\",\n \"id\": \"a1b2c3d4\",\n \"parentId\": null,\n \"timestamp\": \"2026-02-16T10:21:00.000Z\",\n \"message\": {\n \"role\": \"assistant\",\n \"provider\": \"anthropic\",\n \"model\": \"claude-sonnet-4-5\",\n \"content\": [{ \"type\": \"text\", \"text\": \"Done.\" }],\n \"usage\": {\n \"input\": 100,\n \"output\": 20,\n \"cacheRead\": 0,\n \"cacheWrite\": 0,\n \"cost\": {\n \"input\": 0,\n \"output\": 0,\n \"cacheRead\": 0,\n \"cacheWrite\": 0,\n \"total\": 0\n }\n },\n \"timestamp\": 1760000000000\n }\n}\n```\n\n### `model_change`\n\n```json\n{\n \"type\": \"model_change\",\n \"id\": \"b1c2d3e4\",\n \"parentId\": \"a1b2c3d4\",\n \"timestamp\": \"2026-02-16T10:21:30.000Z\",\n \"model\": \"openai/gpt-4o\",\n \"role\": \"default\"\n}\n```\n\n`role` is optional; missing is treated as `default` in context reconstruction.\n\n### `service_tier_change`\n\n```json\n{\n \"type\": \"service_tier_change\",\n \"id\": \"c1d2e3f4\",\n \"parentId\": \"b1c2d3e4\",\n \"timestamp\": \"2026-02-16T10:21:45.000Z\",\n \"serviceTier\": \"flex\"\n}\n```\n\n`serviceTier` can also be `null`.\n\n### `thinking_level_change`\n\n```json\n{\n \"type\": \"thinking_level_change\",\n \"id\": \"c1d2e3f4\",\n \"parentId\": \"b1c2d3e4\",\n \"timestamp\": \"2026-02-16T10:22:00.000Z\",\n \"thinkingLevel\": \"high\"\n}\n```\n\n### `compaction`\n\n```json\n{\n \"type\": \"compaction\",\n \"id\": \"d1e2f3a4\",\n \"parentId\": \"c1d2e3f4\",\n \"timestamp\": \"2026-02-16T10:23:00.000Z\",\n \"summary\": \"Conversation summary\",\n \"shortSummary\": \"Short recap\",\n \"firstKeptEntryId\": \"a1b2c3d4\",\n \"tokensBefore\": 42000,\n \"details\": { \"readFiles\": [\"src/a.ts\"] },\n \"preserveData\": { \"hookState\": true },\n \"fromExtension\": false\n}\n```\n\n### `branch_summary`\n\n```json\n{\n \"type\": \"branch_summary\",\n \"id\": \"e1f2a3b4\",\n \"parentId\": \"a1b2c3d4\",\n \"timestamp\": \"2026-02-16T10:24:00.000Z\",\n \"fromId\": \"a1b2c3d4\",\n \"summary\": \"Summary of abandoned path\",\n \"details\": { \"note\": \"optional\" },\n \"fromExtension\": true\n}\n```\n\nIf branching from root (`branchFromId === null`), `fromId` is the literal string `\"root\"`.\n\n### `custom`\n\nExtension state persistence; ignored by `buildSessionContext`.\n\n```json\n{\n \"type\": \"custom\",\n \"id\": \"f1a2b3c4\",\n \"parentId\": \"e1f2a3b4\",\n \"timestamp\": \"2026-02-16T10:25:00.000Z\",\n \"customType\": \"my-extension\",\n \"data\": { \"state\": 1 }\n}\n```\n\n### `custom_message`\n\nExtension-provided message that does participate in LLM context. `content` can be a string or text/image content blocks, and `attribution` records whether the user or agent initiated it.\n\n```json\n{\n \"type\": \"custom_message\",\n \"id\": \"a2b3c4d5\",\n \"parentId\": \"f1a2b3c4\",\n \"timestamp\": \"2026-02-16T10:26:00.000Z\",\n \"customType\": \"my-extension\",\n \"content\": \"Injected context\",\n \"display\": true,\n \"details\": { \"debug\": false },\n \"attribution\": \"agent\"\n}\n```\n\n### `label`\n\n```json\n{\n \"type\": \"label\",\n \"id\": \"b2c3d4e5\",\n \"parentId\": \"a2b3c4d5\",\n \"timestamp\": \"2026-02-16T10:27:00.000Z\",\n \"targetId\": \"a1b2c3d4\",\n \"label\": \"checkpoint\"\n}\n```\n\n`label: undefined` clears a label for `targetId`.\n\n### `ttsr_injection`\n\n```json\n{\n \"type\": \"ttsr_injection\",\n \"id\": \"c2d3e4f5\",\n \"parentId\": \"b2c3d4e5\",\n \"timestamp\": \"2026-02-16T10:28:00.000Z\",\n \"injectedRules\": [\"ruleA\", \"ruleB\"]\n}\n```\n\n### `mcp_tool_selection`\n\n```json\n{\n \"type\": \"mcp_tool_selection\",\n \"id\": \"d2e3f4a5\",\n \"parentId\": \"c2d3e4f5\",\n \"timestamp\": \"2026-02-16T10:28:30.000Z\",\n \"selectedToolNames\": [\"server.tool\"]\n}\n```\n\n### `session_init`\n\n```json\n{\n \"type\": \"session_init\",\n \"id\": \"d2e3f4a5\",\n \"parentId\": \"c2d3e4f5\",\n \"timestamp\": \"2026-02-16T10:29:00.000Z\",\n \"systemPrompt\": \"...\",\n \"task\": \"...\",\n \"tools\": [\"read\", \"edit\"],\n \"outputSchema\": { \"type\": \"object\" }\n}\n```\n\n### `mode_change`\n\n```json\n{\n \"type\": \"mode_change\",\n \"id\": \"e2f3a4b5\",\n \"parentId\": \"d2e3f4a5\",\n \"timestamp\": \"2026-02-16T10:30:00.000Z\",\n \"mode\": \"plan\",\n \"data\": { \"planFile\": \"/tmp/plan.md\" }\n}\n```\n\n## Versioning and Migration\n\nCurrent session version: `3`.\n\n### v1 -> v2\n\nApplied when header `version` is missing or `< 2`:\n\n- Adds `id` and `parentId` to each non-header entry.\n- Reconstructs a linear parent chain using file order.\n- Migrates compaction field `firstKeptEntryIndex` -> `firstKeptEntryId` when present.\n- Sets header `version = 2`.\n\n### v2 -> v3\n\nApplied when header `version < 3`:\n\n- For `message` entries: rewrites legacy `message.role === \"hookMessage\"` to `\"custom\"`.\n- Sets header `version = 3`.\n\n### Migration Trigger and Persistence\n\n- Migrations run during session load (`setSessionFile`).\n- If any migration ran, the entire file is rewritten to disk immediately.\n- Migration mutates in-memory entries first, then persists rewritten JSONL.\n\n## Load and Compatibility Behavior\n\n`loadEntriesFromFile(path)` behavior:\n\n- Missing file (`ENOENT`) -> returns `[]`.\n- Non-parseable lines are handled by lenient JSONL parser (`parseJsonlLenient`).\n- If first parsed entry is not a valid session header (`type !== \"session\"` or missing string `id`) -> returns `[]`.\n\n`SessionManager.setSessionFile()` behavior:\n\n- `[]` from loader is treated as empty/nonexistent session and replaced with a new initialized session file at that path.\n- Valid files are loaded, migrated if needed, blob refs resolved, then indexed.\n\n## Tree and Leaf Semantics\n\nThe underlying model is append-only tree + mutable leaf pointer:\n\n- Every append method creates exactly one new entry whose `parentId` is current `leafId`.\n- The new entry becomes the new `leafId`.\n- `branch(entryId)` moves only `leafId`; existing entries remain unchanged.\n- `resetLeaf()` sets `leafId = null`; next append creates a new root entry (`parentId: null`).\n- `branchWithSummary()` sets leaf to branch target and appends a `branch_summary` entry.\n\n`getEntries()` returns all non-header entries in insertion order. Existing entries are not deleted in normal operation; rewrites preserve logical history while updating representation (migrations, move, targeted rewrite helpers).\n\n## Context Reconstruction (`buildSessionContext`)\n\n`buildSessionContext(entries, leafId, byId?)` resolves what is sent to the model.\n\nAlgorithm:\n\n1. Determine leaf:\n - `leafId === null` -> return empty context.\n - explicit `leafId` -> use that entry if found.\n - otherwise fallback to last entry.\n2. Walk `parentId` chain from leaf to root and reverse to root->leaf path.\n3. Derive runtime state across path:\n - `thinkingLevel` from latest `thinking_level_change` (default `\"off\"`)\n - `serviceTier` from latest `service_tier_change`\n - model map from `model_change` entries (`role ?? \"default\"`)\n - fallback `models.default` from assistant message provider/model if no explicit model change\n - deduplicated `injectedTtsrRules` from all `ttsr_injection` entries\n - selected MCP discovery tools from latest `mcp_tool_selection`\n - mode/modeData from latest `mode_change` (default mode `\"none\"`)\n4. Build message list:\n - `message` entries pass through\n - `custom_message` entries become `custom` AgentMessages via `createCustomMessage`\n - `branch_summary` entries become `branchSummary` AgentMessages via `createBranchSummaryMessage`\n - if a `compaction` exists on path:\n - emit compaction summary first (`createCompactionSummaryMessage`)\n - emit path entries starting at `firstKeptEntryId` up to the compaction boundary\n - emit entries after the compaction boundary\n\n`custom`, `session_init`, `service_tier_change`, `mcp_tool_selection`, and `ttsr_injection` entries do not inject model context directly.\n\n## Persistence Guarantees and Failure Model\n\n### Persist vs in-memory\n\n- `SessionManager.create/open/continueRecent/forkFrom` -> persistent mode (`persist = true`).\n- `SessionManager.inMemory` -> non-persistent mode (`persist = false`) with `MemorySessionStorage`.\n\n### Write pipeline\n\nWrites are serialized through an internal promise chain (`#persistChain`) and `NdjsonFileWriter`.\n\n- `append*` updates in-memory state immediately.\n- Persistence is deferred until at least one assistant message exists.\n - Before first assistant: entries are retained in memory; no file append occurs.\n - When first assistant exists: full in-memory session is flushed to file.\n - Afterwards: new entries append incrementally.\n\nRationale in code: avoid persisting sessions that never produced an assistant response.\n\n### Durability operations\n\n- `flush()` flushes writer and calls `fsync()`.\n- Atomic full rewrites (`#rewriteFile`) write to temp file, flush+fsync, close, then rename over target.\n- Used for migrations, `setSessionName`, `rewriteEntries`, move operations, and tool-call arg rewrites.\n\n### Error behavior\n\n- Persistence errors are latched (`#persistError`) and rethrown on subsequent operations.\n- First error is logged once with session file context.\n- Writer close is best-effort but propagates the first meaningful error.\n\n## Data Size Controls and Blob Externalization\n\nBefore persisting entries:\n\n- Large strings are truncated to `MAX_PERSIST_CHARS` (500,000 chars) with notice:\n - `\"[Session persistence truncated large content]\"`\n- Transient fields `partialJson` and `jsonlEvents` are removed.\n- If object has both `content` and `lineCount`, line count is recomputed after truncation.\n- Image blocks in `content` arrays with base64 length >= 1024 are externalized to blob refs:\n - stored as `blob:sha256:<hash>`\n - raw bytes written to blob store (`BlobStore.put`)\n\nOn load, blob refs are resolved back to base64 for message/custom_message image blocks.\n\n## Storage Abstractions\n\n`SessionStorage` interface provides all filesystem operations used by `SessionManager`:\n\n- sync: `ensureDirSync`, `existsSync`, `writeTextSync`, `statSync`, `listFilesSync`\n- async: `exists`, `readText`, `readTextPrefix`, `writeText`, `rename`, `unlink`, `openWriter`\n\nImplementations:\n\n- `FileSessionStorage`: real filesystem (Bun + node fs)\n- `MemorySessionStorage`: map-backed in-memory implementation for tests/non-persistent sessions\n\n`SessionStorageWriter` exposes `writeLine`, `flush`, `fsync`, `close`, `getError`.\n\n## Session Discovery Utilities\n\nDefined in `session-manager.ts`:\n\n- `getRecentSessions(sessionDir, limit)` -> lightweight metadata for UI/session picker\n- `findMostRecentSession(sessionDir)` -> newest by mtime\n- `list(cwd, sessionDir?)` -> sessions in one project scope\n- `listAll()` -> sessions across all project scopes under `~/.omp/agent/sessions`\n\nMetadata extraction reads only a prefix (`readTextPrefix(..., 4096)`) where possible.\n\n## Related but Distinct: Prompt History Storage\n\n`HistoryStorage` (`history-storage.ts`) is a separate SQLite subsystem for prompt recall/search, not session replay.\n\n- DB: `~/.omp/agent/history.db`\n- Table: `history(id, prompt, created_at, cwd)`\n- FTS5 index: `history_fts` with trigger-maintained sync\n- Deduplicates consecutive identical prompts using in-memory last-prompt cache\n- Async insertion (`setImmediate`) so prompt capture does not block turn execution\n\nUse session files for conversation graph/state replay; use `HistoryStorage` for prompt history UX.\n",
51
- "skills.md": "# Skills\n\nSkills are file-backed capability packs discovered at startup and exposed to the model as:\n\n- lightweight metadata in the system prompt (name + description)\n- on-demand content via `read skill://...`\n- optional interactive `/skill:<name>` commands\n\nThis document covers current runtime behavior in `src/extensibility/skills.ts`, `src/discovery/builtin.ts`, `src/internal-urls/skill-protocol.ts`, and `src/discovery/agents-md.ts`.\n\n## What a skill is in this codebase\n\nA discovered skill is represented as:\n\n- `name`\n- `description`\n- `filePath` (the `SKILL.md` path)\n- `baseDir` (skill directory)\n- source metadata (`provider`, `level`, path)\n\nThe runtime only requires `name` and `path` for validity. In practice, matching quality depends on `description` being meaningful.\n\n## Required layout and SKILL.md expectations\n\n### Directory layout\n\nFor provider-based discovery (native/Claude/Codex/Agents/plugin providers), skills are discovered as **one level under `skills/`**:\n\n- `<skills-root>/<skill-name>/SKILL.md`\n\nNested patterns like `<skills-root>/group/<skill>/SKILL.md` are not discovered by provider loaders.\n\nFor `skills.customDirectories`, scanning uses the same non-recursive layout (`*/SKILL.md`).\n\n```text\nProvider-discovered layout (non-recursive under skills/):\n\n<root>/skills/\n ├─ postgres/\n │ └─ SKILL.md ✅ discovered\n ├─ pdf/\n │ └─ SKILL.md ✅ discovered\n └─ team/\n └─ internal/\n └─ SKILL.md ❌ not discovered by provider loaders\n\nCustom-directory scanning is also non-recursive, so nested paths are ignored unless you point `customDirectories` at that nested parent.\n```\n\n### `SKILL.md` frontmatter\n\nSupported frontmatter fields on the skill type:\n\n- `name?: string`\n- `description?: string`\n- `globs?: string[]`\n- `alwaysApply?: boolean`\n- additional keys are preserved as unknown metadata\n\nCurrent runtime behavior:\n\n- `name` defaults to the skill directory name\n- `description` is required for:\n - native `.omp` provider skill discovery (`requireDescription: true`)\n - `skills.customDirectories` scans via `scanSkillsFromDir` in `src/discovery/helpers.ts` (non-recursive)\n- non-native providers can load skills without description\n\n## Discovery pipeline\n\n`discoverSkills()` in `src/extensibility/skills.ts` does two passes:\n\n1. **Capability providers** via `loadCapability(\"skills\")`\n2. **Custom directories** via `scanSkillsFromDir(..., { requireDescription: true })` (one-level directory enumeration)\n\nIf `skills.enabled` is `false`, discovery returns no skills.\n\n### Built-in skill providers and precedence\n\nProvider ordering is priority-first (higher wins), then registration order for ties.\n\nCurrent registered skill providers:\n\n1. `native` (priority 100) — `.omp` user/project skills via `src/discovery/builtin.ts`\n2. `claude` (priority 80)\n3. priority 70 group (in registration order):\n - `claude-plugins`\n - `agents`\n - `codex`\n4. `opencode` (priority 55)\n Dedup key is skill name. First item with a given name wins.\n\n### Source toggles and filtering\n\n`discoverSkills()` applies these controls:\n\n- source toggles: `enableCodexUser`, `enableClaudeUser`, `enableClaudeProject`, `enablePiUser`, `enablePiProject`\n- `disabledExtensions` entries with `skill:<name>`\n- `ignoredSkills` (exclude)\n- `includeSkills` (include allowlist; empty means include all)\n\nFilter order is:\n\n1. not disabled by `disabledExtensions`\n2. source enabled\n3. not ignored\n4. included (if include list present)\n For providers other than codex/claude/native (for example `agents`, `claude-plugins`, `opencode`), enablement currently falls back to: enabled if **any** built-in source toggle is enabled.\n\n### Collision and duplicate handling\n\n- Capability dedup already keeps first skill per name (highest-precedence provider)\n- `extensibility/skills.ts` additionally:\n - de-duplicates identical files by `realpath` (symlink-safe)\n - emits collision warnings when a later skill name conflicts\n - keeps the convenience `discoverSkillsFromDir({ dir, source })` API as a thin adapter over `scanSkillsFromDir`\n- Custom-directory skills are merged after provider skills and follow the same collision behavior\n\n## Runtime usage behavior\n\n### System prompt exposure\n\nSystem prompt construction (`src/system-prompt.ts`) uses discovered skills as follows:\n\n- if `read` tool is available:\n - include discovered skills list in prompt\n- otherwise:\n - omit discovered list\n\nTask tool subagents receive the session's discovered/provided skills list via normal session creation; there is no per-task skill pinning override.\n\n### Interactive `/skill:<name>` commands\n\nIf `skills.enableSkillCommands` is true, interactive mode registers one slash command per discovered skill.\n\n`/skill:<name> [args]` behavior:\n\n- reads the skill file directly from `filePath`\n- strips frontmatter\n- injects skill body as a follow-up custom message\n- appends metadata (`Skill: <path>`, optional `User: <args>`)\n\n## `skill://` URL behavior\n\n`src/internal-urls/skill-protocol.ts` supports:\n\n- `skill://<name>` → resolves to that skill's `SKILL.md`\n- `skill://<name>/<relative-path>` → resolves inside that skill directory\n\n```text\nskill:// URL resolution\n\nskill://pdf\n -> <pdf-base>/SKILL.md\n\nskill://pdf/references/tables.md\n -> <pdf-base>/references/tables.md\n\nGuards:\n- reject absolute paths\n- reject `..` traversal\n- reject any resolved path escaping <pdf-base>\n```\n\nResolution details:\n\n- skill name must match exactly\n- relative paths are URL-decoded\n- absolute paths are rejected\n- path traversal (`..`) is rejected\n- resolved path must remain within `baseDir`\n- missing files return an explicit `File not found` error\n\nContent type:\n\n- `.md` => `text/markdown`\n- everything else => `text/plain`\n\nNo fallback search is performed for missing assets.\n\n## Skills vs AGENTS.md, commands, tools, hooks\n\n### Skills vs AGENTS.md\n\n- **Skills**: named, optional capability packs selected by task context or explicitly requested\n- **AGENTS.md/context files**: persistent instruction files loaded as context-file capability and merged by level/depth rules\n\n`src/discovery/agents-md.ts` specifically walks ancestor directories from `cwd` to discover standalone `AGENTS.md` files (up to depth 20), excluding hidden-directory segments.\n\n### Skills vs slash commands\n\n- **Skills**: model-readable knowledge/workflow content\n- **Slash commands**: user-invoked command entry points\n- `/skill:<name>` is a convenience wrapper that injects skill text; it does not change skill discovery semantics\n\n### Skills vs custom tools\n\n- **Skills**: documentation/workflow content loaded through prompt context and `read`\n- **Custom tools**: executable tool APIs callable by the model with schemas and runtime side effects\n\n### Skills vs hooks\n\n- **Skills**: passive content\n- **Hooks**: event-driven runtime interceptors that can block/modify behavior during execution\n\n## Practical authoring guidance tied to discovery logic\n\n- Put each skill in its own directory: `<skills-root>/<skill-name>/SKILL.md`\n- Always include explicit `name` and `description` frontmatter\n- Keep referenced assets under the same skill directory and access with `skill://<name>/...`\n- For nested taxonomy (`team/domain/skill`), point `skills.customDirectories` to the nested parent directory; scanning itself remains non-recursive\n- Avoid duplicate skill names across sources; first match wins by provider precedence\n",
51
+ "skills.md": "# Skills\n\nSkills are file-backed capability packs discovered at startup and exposed to the model as:\n\n- lightweight metadata in the system prompt (name + description)\n- on-demand content via the `read` tool against `skill://...`\n- optional interactive `/skill:<name>` commands\n\nThis document covers current runtime behavior in `src/extensibility/skills.ts`, `src/discovery/builtin.ts`, `src/internal-urls/skill-protocol.ts`, and `src/discovery/agents-md.ts`.\n\n## What a skill is in this codebase\n\nA discovered skill is represented as:\n\n- `name`\n- `description`\n- `filePath` (the `SKILL.md` path)\n- `baseDir` (skill directory)\n- source metadata (`provider`, `level`, path)\n\nThe runtime only requires `name` and `path` for validity. In practice, matching quality depends on `description` being meaningful.\n\n## Required layout and SKILL.md expectations\n\n### Directory layout\n\nFor provider-based discovery (native/Claude/Codex/Agents/plugin providers), skills are discovered as **one level under `skills/`**:\n\n- `<skills-root>/<skill-name>/SKILL.md`\n\nNested patterns like `<skills-root>/group/<skill>/SKILL.md` are not discovered by provider loaders.\n\nFor `skills.customDirectories`, scanning uses the same non-recursive layout (`*/SKILL.md`).\n\n```text\nProvider-discovered layout (non-recursive under skills/):\n\n<root>/skills/\n ├─ postgres/\n │ └─ SKILL.md ✅ discovered\n ├─ pdf/\n │ └─ SKILL.md ✅ discovered\n └─ team/\n └─ internal/\n └─ SKILL.md ❌ not discovered by provider loaders\n\nCustom-directory scanning is also non-recursive, so nested paths are ignored unless you point `customDirectories` at that nested parent.\n```\n\n### `SKILL.md` frontmatter\n\nSupported frontmatter fields on the skill type:\n\n- `name?: string`\n- `description?: string`\n- `globs?: string[]`\n- `alwaysApply?: boolean`\n- additional keys are preserved as unknown metadata\n\nCurrent runtime behavior:\n\n- `name` defaults to the skill directory name\n- `description` is required for:\n - native `.omp` provider skill discovery (`requireDescription: true`)\n - `skills.customDirectories` scans via `scanSkillsFromDir` in `src/discovery/helpers.ts` (non-recursive)\n- non-native providers can load skills without description\n\n## Discovery pipeline\n\n`discoverSkills()` in `src/extensibility/skills.ts` does two passes:\n\n1. **Capability providers** via `loadCapability(\"skills\")`\n2. **Custom directories** via `scanSkillsFromDir(..., { requireDescription: true })` (one-level directory enumeration)\n\nIf `skills.enabled` is `false`, discovery returns no skills.\n\n### Built-in skill providers and precedence\n\nProvider ordering is priority-first (higher wins), then registration order for ties.\n\nCurrent registered skill providers:\n\n1. `native` (priority 100) — `.omp` user/project skills via `src/discovery/builtin.ts`\n2. `claude` (priority 80)\n3. priority 70 group (in registration order):\n - `claude-plugins`\n - `agents`\n - `codex`\n4. `opencode` (priority 55)\n Dedup key is skill name. First item with a given name wins.\n\n### Source toggles and filtering\n\n`discoverSkills()` applies these controls:\n\n- source toggles: `enableCodexUser`, `enableClaudeUser`, `enableClaudeProject`, `enablePiUser`, `enablePiProject`\n- `disabledExtensions` entries with `skill:<name>`\n- `ignoredSkills` (exclude)\n- `includeSkills` (include allowlist; empty means include all)\n\nFilter order is:\n\n1. not disabled by `disabledExtensions`\n2. source enabled\n3. not ignored\n4. included (if include list present)\n For providers other than codex/claude/native (for example `agents`, `claude-plugins`, `opencode`), enablement currently falls back to: enabled if **any** built-in source toggle is enabled.\n\n### Collision and duplicate handling\n\n- Capability dedup already keeps first skill per name (highest-precedence provider)\n- `extensibility/skills.ts` additionally:\n - de-duplicates identical files by `realpath` (symlink-safe)\n - emits collision warnings when a later skill name conflicts\n - keeps the convenience `discoverSkillsFromDir({ dir, source })` API as a thin adapter over `scanSkillsFromDir`\n- Custom-directory skills are merged after provider skills and follow the same collision behavior\n\n## Runtime usage behavior\n\n### System prompt exposure\n\nSystem prompt construction (`src/system-prompt.ts`) uses discovered skills as follows:\n\n- if `read` tool is available:\n - include discovered skills list in prompt\n- otherwise:\n - omit discovered list\n\nTask tool subagents receive the session's discovered/provided skills list via normal session creation; there is no per-task skill pinning override.\n\n### Interactive `/skill:<name>` commands\n\nIf `skills.enableSkillCommands` is true, interactive mode registers one slash command per discovered skill.\n\n`/skill:<name> [args]` behavior:\n\n- reads the skill file directly from `filePath`\n- strips frontmatter\n- injects skill body as a custom message\n- delivery mode follows the **submission keybinding**:\n - **Enter** → invokes the skill on the `steer` queue while streaming (matches free-text Enter, which also steers), or as a normal idle prompt when the agent is not streaming\n - **Ctrl+Enter** (`app.message.followUp`) → invokes the skill on the `followUp` queue while streaming, or as a normal idle prompt when the agent is not streaming\n- appends metadata (`Skill: <path>`, optional `User: <args>`)\n\nThere is no flag, mode-selector, or frontmatter knob to override this — the keybinding *is* the choice, identical to how free text is routed during streaming (`input-controller.ts:243-249` for Enter, `input-controller.ts:462-500` for Ctrl+Enter; both dispatch through `#invokeSkillCommand`).\n\n## `skill://` URL behavior\n\n`src/internal-urls/skill-protocol.ts` supports:\n\n- `skill://<name>` → resolves to that skill's `SKILL.md`\n- `skill://<name>/<relative-path>` → resolves inside that skill directory\n\n```text\nskill:// URL resolution\n\nskill://pdf\n -> <pdf-base>/SKILL.md\n\nskill://pdf/references/tables.md\n -> <pdf-base>/references/tables.md\n\nGuards:\n- reject absolute paths\n- reject `..` traversal\n- reject any resolved path escaping <pdf-base>\n```\n\nResolution details:\n\n- skill name must match exactly\n- relative paths are URL-decoded\n- absolute paths are rejected\n- path traversal (`..`) is rejected\n- resolved path must remain within `baseDir`\n- missing files return an explicit `File not found` error\n\nContent type:\n\n- `.md` => `text/markdown`\n- everything else => `text/plain`\n\nNo fallback search is performed for missing assets.\n\n## Skills vs AGENTS.md, commands, tools, hooks\n\n### Skills vs AGENTS.md\n\n- **Skills**: named, optional capability packs selected by task context or explicitly requested\n- **AGENTS.md/context files**: persistent instruction files loaded as context-file capability and merged by level/depth rules\n\n`src/discovery/agents-md.ts` specifically walks ancestor directories from `cwd` to discover standalone `AGENTS.md` files (up to depth 20), excluding hidden-directory segments.\n\n### Skills vs slash commands\n\n- **Skills**: model-readable knowledge/workflow content\n- **Slash commands**: user-invoked command entry points\n- `/skill:<name>` is a convenience wrapper that injects skill text; it does not change skill discovery semantics\n\n### Skills vs custom tools\n\n- **Skills**: documentation/workflow content loaded through prompt context and `read`\n- **Custom tools**: executable tool APIs callable by the model with schemas and runtime side effects\n\n### Skills vs hooks\n\n- **Skills**: passive content\n- **Hooks**: event-driven runtime interceptors that can block/modify behavior during execution\n\n## Practical authoring guidance tied to discovery logic\n\n- Put each skill in its own directory: `<skills-root>/<skill-name>/SKILL.md`\n- Always include explicit `name` and `description` frontmatter\n- Keep referenced assets under the same skill directory and access with `skill://<name>/...`\n- For nested taxonomy (`team/domain/skill`), point `skills.customDirectories` to the nested parent directory; scanning itself remains non-recursive\n- Avoid duplicate skill names across sources; first match wins by provider precedence\n",
52
52
  "skills/authoring-extensions.md": "---\nname: authoring-extensions\ndescription: Use when creating a new omp extension. Covers ExtensionAPI, factory signature, tool/command/event registration, and local-dev testing.\n---\n\n# Authoring Extensions\n\nExtensions are the primary way to add capabilities to `oh-my-pi`. A single extension module can register tools the LLM can call, slash commands users can invoke, and event handlers that run throughout the session lifecycle — all from one TypeScript file.\n\n## Minimum viable extension\n\n```ts\nimport type { ExtensionAPI } from \"@oh-my-pi/pi-coding-agent\";\n\nexport default function (pi: ExtensionAPI) {\n pi.on(\"session_start\", async (_event, ctx) => {\n ctx.ui.notify(\"My extension loaded!\", \"info\");\n });\n}\n```\n\nThat is a working extension. Drop it into `~/.omp/agent/extensions/hello.ts` and restart omp to see the notification.\n\n## Full example\n\nThe following extension registers a slash command, a tool, and a session-start hook:\n\n```ts\nimport type { ExtensionAPI } from \"@oh-my-pi/pi-coding-agent\";\n\nexport default function myExtension(pi: ExtensionAPI) {\n const { Type } = pi.typebox;\n\n // Runs once when the session loads\n pi.on(\"session_start\", async (_event, ctx) => {\n ctx.ui.notify(`Session ready in ${ctx.cwd}`, \"info\");\n });\n\n // Slash command: /greet\n pi.registerCommand(\"greet\", {\n description: \"Send a greeting into the conversation\",\n handler: async (args, ctx) => {\n const name = args.trim() || \"world\";\n pi.sendMessage(\n {\n customType: \"greeting\",\n content: `Hello, ${name}!`,\n display: true,\n attribution: \"user\",\n },\n { triggerTurn: false }\n );\n ctx.ui.notify(`Greeted ${name}`, \"info\");\n },\n });\n\n // LLM-callable tool\n pi.registerTool({\n name: \"word_count\",\n label: \"Word Count\",\n description: \"Count the words in a string\",\n parameters: Type.Object({\n text: Type.String({ description: \"Text to count\" }),\n }),\n async execute(_id, params, _signal, _onUpdate, _ctx) {\n const count = params.text.split(/\\s+/).filter(Boolean).length;\n return {\n content: [{ type: \"text\", text: String(count) }],\n details: { count },\n };\n },\n });\n}\n```\n\n## Discovery path\n\nomp discovers extension modules in this order:\n\n1. **Project-scoped auto-discovery** — `<cwd>/.omp/extensions/`\n2. **User-scoped auto-discovery** — `~/.omp/agent/extensions/`\n3. **Marketplace-installed plugins** — `~/.omp/plugins/node_modules/` (extensions shipped inside installed plugin packages)\n4. **CLI flag** — `omp --extension ./my-ext.ts` (also `-e`; `--hook` is treated as an alias)\n5. **Settings `extensions` array** — paths listed in `~/.omp/agent/config.yml` or `<cwd>/.omp/settings.json`\n\nWithin each scope, de-duplication is by resolved absolute path — first seen wins.\n\nWhen a path points to a directory, omp resolves the entry point in this order:\n\n1. `package.json` with `omp.extensions` (or legacy `pi.extensions`) field\n2. `index.ts`\n3. `index.js`\n4. One-level scan for `*.ts` / `*.js` files and subdir `index.*` / `package.json` manifests\n\n## package.json manifest\n\nTo package an extension as an installable plugin, add an `omp` field to `package.json`:\n\n```json\n{\n \"name\": \"my-omp-extension\",\n \"omp\": {\n \"extensions\": [\"./src/main.ts\"]\n }\n}\n```\n\nThe legacy `pi` key is also accepted for backwards compatibility:\n\n```json\n{\n \"pi\": {\n \"extensions\": [\"./index.ts\"]\n }\n}\n```\n\nMultiple entry points are supported:\n\n```json\n{\n \"omp\": {\n \"extensions\": [\"./src/safety.ts\", \"./src/tools.ts\"]\n }\n}\n```\n\n## Registering commands\n\n```ts\npi.registerCommand(\"my-cmd\", {\n description: \"What the command does\",\n handler: async (args, ctx) => {\n // args: everything the user typed after /my-cmd\n // ctx: ExtensionCommandContext — includes ctx.ui, ctx.cwd, session controls\n ctx.ui.notify(\"Running!\", \"info\");\n await ctx.waitForIdle();\n await ctx.newSession();\n },\n});\n```\n\n`ExtensionCommandContext` session-control methods (safe to call from commands only):\n\n| Method | Effect |\n|---|---|\n| `waitForIdle()` | Wait for the agent to finish streaming |\n| `newSession(opts?)` | Open a fresh session |\n| `switchSession(path)` | Switch to an existing session file |\n| `branch(entryId)` | Fork from a specific history entry |\n| `navigateTree(id, opts?)` | Jump to a different point in the session tree |\n| `reload()` | Reload the session runtime |\n| `compact(opts?)` | Compact the current context |\n\n## Registering tools\n\nTools are called by the LLM. Parameters use [TypeBox](https://github.com/sinclairzx81/typebox) schemas, available at `pi.typebox`:\n\n```ts\nconst { Type } = pi.typebox;\n\npi.registerTool({\n name: \"search_notes\", // snake_case, unique\n label: \"Search Notes\", // human-readable label for TUI\n description: \"Full-text search through project notes\",\n parameters: Type.Object({\n query: Type.String({ description: \"Search query\" }),\n limit: Type.Optional(Type.Number({ description: \"Max results\", default: 10 })),\n }),\n async execute(toolCallId, params, signal, onUpdate, ctx) {\n if (signal?.aborted) {\n return { content: [{ type: \"text\", text: \"Cancelled\" }] };\n }\n onUpdate?.({ content: [{ type: \"text\", text: \"Searching...\" }] });\n // ... do work ...\n return {\n content: [{ type: \"text\", text: `Found N results for \"${params.query}\"` }],\n details: { query: params.query, count: 0 },\n };\n },\n});\n```\n\n## Subscribing to events\n\n```ts\npi.on(\"tool_call\", async (event, ctx) => {\n // event.toolName, event.input, event.toolCallId\n if (event.toolName !== \"bash\") return;\n\n const command = String((event.input as { command?: unknown }).command ?? \"\");\n if (command.includes(\"rm -rf /\")) {\n return { block: true, reason: \"Blocked by safety policy\" };\n }\n});\n\npi.on(\"turn_end\", async (_event, ctx) => {\n ctx.ui.setStatus(\"tokens\", `~${ctx.getContextUsage()?.tokens ?? \"?\"} tokens`);\n});\n```\n\nFull event catalog: see [hooks authoring guide](./authoring-hooks.md).\n\n## Extension vs hook — when to use which\n\n| Need | Use |\n|---|---|\n| Tools + commands + events in one module | **Extension** (`ExtensionAPI`) |\n| Pure event interception (policy, redaction) | **Extension** or **Hook** (both work; extension is preferred) |\n| Legacy hook module already exists | **Hook** (`HookAPI` from `@oh-my-pi/pi-coding-agent/extensibility/hooks`) |\n| Registering provider / custom message renderer | **Extension only** |\n| Shipping as a marketplace plugin | **Extension** (use `package.json` manifest) |\n\nExtensions are a strict superset of hooks. New authoring should use `ExtensionAPI`.\n\n## Debugging\n\nStart omp with `--log-level debug` to see extension load messages:\n\n```\nomp --log-level debug\n```\n\nWatch for lines like:\n\n```\n[extension-loader] loading /home/you/.omp/agent/extensions/my-ext.ts\n[extension-loader] loaded: my-ext (1 tool, 1 command, 2 handlers)\n```\n\nTo temporarily disable a specific extension by name without removing the file:\n\n```yaml\n# ~/.omp/agent/config.yml\ndisabledExtensions:\n - extension-module:my-ext\n```\n\nThe derived name is the filename stem (or directory name for `index.ts`-style entries): `/path/to/my-ext.ts` → `my-ext`.\n\n## Important constraints\n\n- **Do not call runtime actions during load.** Methods like `pi.sendMessage()` throw `ExtensionRuntimeNotInitializedError` if called synchronously during module evaluation (before a session is active). Register handlers/tools/commands during load; perform runtime actions only from event handlers, tools, or commands.\n- **`tool_call` errors are fail-closed.** If a `tool_call` handler throws, the tool is blocked.\n- **Command names must not clash with built-ins.** Conflicts are skipped with a diagnostic log.\n- **Reserved shortcuts are ignored** (`ctrl+c`, `ctrl+d`, `ctrl+z`, `ctrl+k`, `ctrl+p`, `ctrl+l`, `ctrl+o`, `ctrl+t`, `ctrl+g`, `shift+tab`, `shift+ctrl+p`, `alt+enter`, `escape`, `enter`).\n\n## Further reading\n\n- `docs/extensions.md` — runtime internals and full API surface reference\n- `docs/extension-loading.md` — detailed path resolution rules\n- `docs/hooks.md` — hook subsystem internals\n- `docs/skills/examples/hello-extension/` — complete working example\n",
53
53
  "skills/authoring-hooks.md": "---\nname: authoring-hooks\ndescription: Use when creating a new omp hook. Covers HookAPI, event catalog, blocking/overriding tool calls, and context modification.\n---\n\n# Authoring Hooks\n\nHooks are event-driven interceptors that run alongside the agent loop. They are best used for cross-cutting concerns: safety policy, secret redaction, context pruning, audit logging. A hook module registers handlers via `pi.on(event, handler)` and can block tool execution, override tool output, or rewrite the message context before each LLM call.\n\n> **Relationship to extensions:** The hook subsystem (`HookAPI`) is the legacy API. The extension runner now handles everything hooks can do plus more. `ExtensionAPI` supports the hook event model plus extension-only events. Use `ExtensionAPI` for new work; use `HookAPI` only if you are maintaining an existing hook module.\n\n## Factory signature\n\n```ts\nimport type { HookAPI } from \"@oh-my-pi/pi-coding-agent/extensibility/hooks\";\n\nexport default function myHook(omp: HookAPI): void {\n omp.on(\"tool_call\", async (event, ctx) => {\n // intercept every tool call\n });\n}\n```\n\nThe default export must be a plain function (not async, not a class). It receives a `HookAPI` instance and must register all handlers synchronously during execution.\n\nAlternatively, using `ExtensionAPI` (preferred):\n\n```ts\nimport type { ExtensionAPI } from \"@oh-my-pi/pi-coding-agent\";\n\nexport default function myExtension(pi: ExtensionAPI): void {\n pi.on(\"tool_call\", async (event, ctx) => { /* ... */ });\n}\n```\n\n## Event catalog\n\n### Tool lifecycle\n\n| Event | Fires | Can return |\n|---|---|---|\n| `tool_call` | Before every tool execution | `{ block?: boolean; reason?: string }` |\n| `tool_result` | After every tool execution | `{ content?; details?; isError?: boolean }` |\n\n### Session lifecycle\n\n| Event | Fires | Can return |\n|---|---|---|\n| `session_start` | On initial session load | — |\n| `session_before_switch` | Before session switch | `{ cancel?: boolean }` |\n| `session_switch` | After session switch | — |\n| `session_before_branch` | Before session branch | `{ cancel?: boolean; skipConversationRestore?: boolean }` |\n| `session_branch` | After session branch | — |\n| `session_before_compact` | Before compaction | `{ cancel?: boolean; compaction?: CompactionResult }` |\n| `session.compacting` | During compaction (inject context) | `{ context?: string[]; prompt?: string; preserveData?: Record<string, unknown> }` |\n| `session_compact` | After compaction | — |\n| `session_before_tree` | Before tree navigation | `{ cancel?: boolean; summary?: { summary: string; details?: unknown } }` |\n| `session_tree` | After tree navigation | — |\n| `session_shutdown` | On session shutdown | — |\n\n### Agent/turn lifecycle\n\n| Event | Fires | Can return |\n|---|---|---|\n| `before_agent_start` | Before agent starts a turn | `{ message?: { customType; content; display; details; attribution? } }` |\n| `agent_start` | Agent streaming starts | — |\n| `agent_end` | Agent streaming ends | — |\n| `turn_start` | Start of a user→agent turn | — |\n| `turn_end` | End of a user→agent turn | — |\n| `context` | Before each LLM API call | `{ messages?: Message[] }` |\n| `auto_compaction_start` | Auto-compaction begins | — |\n| `auto_compaction_end` | Auto-compaction ends | — |\n| `auto_retry_start` | Auto-retry begins | — |\n| `auto_retry_end` | Auto-retry ends | — |\n| `ttsr_triggered` | TTSR (too-short response) triggered | — |\n| `todo_reminder` | Todo reminder fires | — |\n\nExtension-only events such as `tool_execution_start`, `tool_execution_update`, `tool_execution_end`, `input`, `user_bash`, and `user_python` require `ExtensionAPI`.\n\n## Pre-tool blocking contract\n\nReturn `{ block: true, reason: \"...\" }` from a `tool_call` handler to prevent execution:\n\n```ts\nomp.on(\"tool_call\", async (event, ctx) => {\n if (event.toolName === \"bash\") {\n const cmd = String(event.input.command ?? \"\");\n if (/\\brm\\s+-rf\\s+\\//.test(cmd)) {\n return { block: true, reason: \"Refusing to delete root filesystem\" };\n }\n }\n});\n```\n\nContract:\n\n- If **any** handler returns `{ block: true }`, execution stops immediately.\n- `reason` is returned to the LLM as the tool error text.\n- If a handler **throws**, the tool is also blocked (fail-closed).\n- Last non-blocking return wins for non-blocking results; first `block: true` short-circuits.\n\n## Post-tool override contract\n\nReturn `{ content, details, isError }` from a `tool_result` handler to patch what the LLM sees:\n\n```ts\nomp.on(\"tool_result\", async (event, ctx) => {\n if (event.toolName === \"read\" && !event.isError) {\n const redacted = event.content.map(chunk => {\n if (chunk.type !== \"text\") return chunk;\n return {\n ...chunk,\n text: chunk.text.replace(/(?:sk|pk)-[a-zA-Z0-9]{20,}/g, \"[REDACTED_API_KEY]\"),\n };\n });\n return { content: redacted };\n }\n});\n```\n\nContract:\n\n- Handlers run in registration order. For `HookAPI`, each handler receives the original tool result event, and the last returned override wins.\n- `content` replaces the full content array for the LLM.\n- `details` replaces the structured details object.\n- `isError` overrides the error flag (typed, but note: `HookToolWrapper` behavior for error path rethrows regardless).\n- On a tool failure, `tool_result` is still emitted with `isError: true`; the original error is rethrown after handlers complete.\n\n## Context modification contract\n\nReturn `{ messages: [...] }` from a `context` handler to rewrite the message list before each LLM API call:\n\n```ts\nomp.on(\"context\", async (event, ctx) => {\n // Remove debug-only custom messages from LLM context\n const filtered = event.messages.filter(\n msg => !(msg.role === \"custom\" && msg.customType === \"debug-only\")\n );\n return { messages: filtered };\n});\n```\n\nContract:\n\n- `event.messages` is the current accumulated list.\n- Handlers run in order; each receives the output of the previous handler.\n- Return `undefined` (or nothing) to pass messages through unmodified.\n\n## Three complete examples\n\n### 1. rm-rf blocker\n\n```ts\nimport type { HookAPI } from \"@oh-my-pi/pi-coding-agent/extensibility/hooks\";\n\nexport default function rmRfBlocker(omp: HookAPI): void {\n omp.on(\"tool_call\", async (event, ctx) => {\n if (event.toolName !== \"bash\") return;\n\n const cmd = String(event.input.command ?? \"\");\n if (!/\\brm\\s+-rf\\s+\\//.test(cmd)) return;\n\n // Allow if user explicitly confirms (interactive mode only)\n if (ctx.hasUI) {\n const allow = await ctx.ui.confirm(\n \"Dangerous command\",\n `This command deletes from root:\\n${cmd}\\n\\nProceed?`\n );\n if (allow) return;\n }\n\n return { block: true, reason: \"rm -rf / blocked by safety policy\" };\n });\n}\n```\n\n### 2. API-key redactor\n\n```ts\nimport type { HookAPI } from \"@oh-my-pi/pi-coding-agent/extensibility/hooks\";\n\n// Matches common API key patterns: sk-..., pk-..., AKIA..., ghp_..., etc.\nconst SECRET_PATTERNS = [\n /\\b(sk|pk)-[a-zA-Z0-9]{20,}\\b/g,\n /\\bAKIA[A-Z0-9]{16}\\b/g,\n /\\bghp_[a-zA-Z0-9]{36}\\b/g,\n /\\b[a-zA-Z0-9_-]{20,}\\s*=\\s*[\"']?[a-zA-Z0-9._/+=-]{20,}[\"']?/g,\n];\n\nexport default function apiKeyRedactor(omp: HookAPI): void {\n omp.on(\"tool_result\", async (event) => {\n if (event.isError) return;\n\n let changed = false;\n const redacted = event.content.map(chunk => {\n if (chunk.type !== \"text\") return chunk;\n let text = chunk.text;\n for (const pattern of SECRET_PATTERNS) {\n const next = text.replace(pattern, \"[REDACTED]\");\n if (next !== text) { changed = true; text = next; }\n }\n return { ...chunk, text };\n });\n\n if (changed) return { content: redacted };\n });\n}\n```\n\n### 3. Context filter\n\n```ts\nimport type { HookAPI } from \"@oh-my-pi/pi-coding-agent/extensibility/hooks\";\n\nexport default function contextFilter(omp: HookAPI): void {\n omp.on(\"context\", async (event) => {\n const MAX_TOOL_OUTPUT_CHARS = 8_000;\n\n const trimmed = event.messages.map(msg => {\n // Truncate very large tool results to keep context manageable\n if (msg.role !== \"tool\") return msg;\n const content = msg.content.map(chunk => {\n if (chunk.type !== \"text\" || chunk.text.length <= MAX_TOOL_OUTPUT_CHARS) return chunk;\n return {\n ...chunk,\n text: chunk.text.slice(0, MAX_TOOL_OUTPUT_CHARS) + \"\\n[... truncated by context-filter hook]\",\n };\n });\n return { ...msg, content };\n });\n\n return { messages: trimmed };\n });\n}\n```\n\n## UI methods in hook context\n\n`ctx.ui` is a `HookUIContext`. Available methods:\n\n| Method | Description |\n|---|---|\n| `notify(message, type?)` | Show an in-app notification |\n| `setStatus(key, text)` | Set footer status text (keyed, sorted by key) |\n| `select(title, options)` | Show a selection dialog |\n| `confirm(title, message)` | Show a yes/no dialog |\n| `input(title, placeholder?)` | Show a text input dialog |\n| `editor(title, prefill?, { signal }?)` | Show a multi-line editor |\n| `setEditorText(text)` | Set the input editor content |\n| `getEditorText()` | Get current input editor content |\n| `custom(factory)` | Render a custom TUI component |\n| `theme` | Current theme object |\n\n`ctx.hasUI` is `false` in headless/print/subagent mode — always guard interactive calls.\n\n## Further reading\n\n- `docs/hooks.md` — hook subsystem internals, ordering rules, error propagation\n- `docs/extensions.md` — `ExtensionAPI` (superset of `HookAPI`)\n- `docs/skills/examples/safety-hook/` — complete working example\n",
54
54
  "skills/authoring-marketplaces.md": "---\nname: authoring-marketplaces\ndescription: Use when creating a new omp marketplace. Covers marketplace.json schema, source types, install commands, and publishing.\n---\n\n# Authoring Marketplaces\n\nA marketplace is a Git repository (or local directory) that contains a catalog file at `.claude-plugin/marketplace.json`. Anyone can author one. Users add it with `/marketplace add owner/repo` and then install individual plugins from it.\n\n## Minimum viable marketplace\n\n```\nmy-marketplace/\n .claude-plugin/\n marketplace.json\n plugins/\n my-plugin/\n package.json\n index.ts\n```\n\n```json\n{\n \"name\": \"my-marketplace\",\n \"owner\": { \"name\": \"Your Name\" },\n \"plugins\": [\n {\n \"name\": \"my-plugin\",\n \"description\": \"What it does\",\n \"source\": \"./plugins/my-plugin\"\n }\n ]\n}\n```\n\nPush to GitHub. Users install with:\n\n```\n/marketplace add your-github-username/my-marketplace\n/marketplace install my-plugin@my-marketplace\n```\n\n## marketplace.json schema\n\nThe catalog file must live at `.claude-plugin/marketplace.json` in the repository root.\n\n### Top-level fields\n\n| Field | Required | Description |\n|---|---|---|\n| `name` | yes | Marketplace name. Lowercase alphanumeric, hyphens, dots. Must start and end with alphanumeric. Max 64 chars. |\n| `owner` | yes | Object with at minimum `owner.name` (string) |\n| `owner.name` | yes | Marketplace owner name |\n| `owner.email` | no | Owner contact email |\n| `description` | no | Short description of the marketplace |\n| `plugins` | yes | Array of plugin entries (see below) |\n| `metadata.pluginRoot` | no | String prepended to all relative plugin source paths |\n\n### Plugin entry fields\n\n| Field | Required | Description |\n|---|---|---|\n| `name` | yes | Plugin name (same naming rules as marketplace name) |\n| `source` | yes | Where to find the plugin — string or object (see source types below) |\n| `description` | no | Short plugin description |\n| `version` | no | Version string |\n| `author` | no | `{ name, email? }` |\n| `homepage` | no | URL |\n| `category` | no | e.g. `development`, `productivity`, `security` |\n| `tags` | no | Array of string tags |\n\n### Full catalog example\n\n```json\n{\n \"$schema\": \"https://anthropic.com/claude-code/marketplace.schema.json\",\n \"name\": \"acme-plugins\",\n \"owner\": {\n \"name\": \"Acme Corp\",\n \"email\": \"plugins@acme.example\"\n },\n \"description\": \"Official Acme plugins for oh-my-pi\",\n \"plugins\": [\n {\n \"name\": \"acme-linter\",\n \"description\": \"Enforce Acme coding standards\",\n \"category\": \"development\",\n \"source\": \"./plugins/linter\"\n },\n {\n \"name\": \"acme-deploy\",\n \"description\": \"One-command deploy to Acme cloud\",\n \"category\": \"devops\",\n \"source\": {\n \"source\": \"github\",\n \"repo\": \"acme-corp/omp-deploy-plugin\",\n \"ref\": \"main\"\n }\n }\n ]\n}\n```\n\n## Plugin source types\n\n### 1. Relative path string\n\nPoints to a subdirectory inside the marketplace repository itself. Must start with `./`.\n\n```json\n\"source\": \"./plugins/my-plugin\"\n```\n\nThe path is resolved relative to the marketplace repository root. Path traversal outside the repo root is rejected.\n\nUse `metadata.pluginRoot` to avoid repeating a common prefix:\n\n```json\n{\n \"metadata\": { \"pluginRoot\": \"./plugins\" },\n \"plugins\": [\n { \"name\": \"plugin-a\", \"source\": \"./plugin-a\" },\n { \"name\": \"plugin-b\", \"source\": \"./plugin-b\" }\n ]\n}\n```\n\n### 2. Git URL\n\nA full Git repository URL. Optionally pin to a branch/tag (`ref`) or exact commit (`sha`):\n\n```json\n\"source\": {\n \"source\": \"url\",\n \"url\": \"https://github.com/org/my-plugin.git\",\n \"ref\": \"main\",\n \"sha\": \"a1b2c3d4...\"\n}\n```\n\n### 3. GitHub shorthand\n\nShorthand for GitHub repositories. Functionally equivalent to a Git URL but more concise:\n\n```json\n\"source\": {\n \"source\": \"github\",\n \"repo\": \"org/my-plugin\",\n \"ref\": \"v2.1.0\",\n \"sha\": \"a1b2c3d4...\"\n}\n```\n\n### 4. Git subdirectory (monorepo)\n\nFor plugins living inside a subdirectory of a larger repository. `url` accepts a full HTTPS URL or a GitHub `owner/repo` shorthand:\n\n```json\n\"source\": {\n \"source\": \"git-subdir\",\n \"url\": \"https://github.com/org/monorepo.git\",\n \"path\": \"packages/my-plugin\",\n \"ref\": \"main\",\n \"sha\": \"a1b2c3d4...\"\n}\n```\n\nThe `path` must resolve inside the cloned repository — directory escape is rejected.\n\n### 5. NPM package\n\nDeclares the plugin as an npm package. `version` is optional:\n\n```json\n\"source\": {\n \"source\": \"npm\",\n \"package\": \"@acme/omp-plugin\",\n \"version\": \"1.2.0\"\n}\n```\n\n> Note: npm plugin sources are declared in the schema but installation support is not yet fully implemented. Use Git-based sources for plugins that need to work today.\n\n## Plugin structure\n\nEach plugin directory (regardless of source type) should contain:\n\n```\nmy-plugin/\n package.json ← required: declares omp.extensions entry points\n src/\n main.ts ← extension factory\n README.md ← recommended: description + usage\n```\n\nMinimum `package.json`:\n\n```json\n{\n \"name\": \"my-plugin\",\n \"omp\": {\n \"extensions\": [\"./src/main.ts\"]\n }\n}\n```\n\n## Install command\n\n```\n/marketplace install name@marketplace-name\n/marketplace install --force name@marketplace-name # reinstall\n/marketplace install --scope project name@marketplace # project-scoped\n```\n\nCLI equivalent:\n\n```\nomp plugin marketplace add owner/repo\nomp plugin install name@marketplace-name\n```\n\nScope behavior:\n\n- **user** (default) — installed in `~/.omp/plugins/installed_plugins.json`, available in all projects\n- **project** — installed in `<project>/.omp/plugins/installed_plugins.json`, available only in that project\n\nProject-scoped installs shadow user-scoped installs of the same plugin name.\n\n## Naming rules\n\nMarketplace names and plugin names must:\n\n- Contain only lowercase letters, digits, hyphens (`-`), and dots (`.`)\n- Start and end with a lowercase letter or digit\n- Be at most 64 characters\n\nPlugin IDs (`name@marketplace`) must be at most 128 characters total.\n\nValid: `my-plugin`, `code-review`, `acme.tools`, `ai-v2`\nInvalid: `-bad-start`, `bad-end-`, `.dot-start`, `Under_score`, `HAS_CAPS`\n\n## Publishing workflow\n\n1. Create `marketplace.json` at `.claude-plugin/marketplace.json` in a new Git repo.\n2. Add plugin entries pointing to subdirectories (or external sources).\n3. Push to GitHub.\n4. Share the `owner/repo` string. Users add it with `/marketplace add owner/repo`.\n5. When you update the catalog, users run `/marketplace update your-marketplace-name` to pull the latest.\n\nTo test locally before publishing:\n\n```\n/marketplace add ./path/to/my-marketplace\n```\n\nLocal path sources also accept `~/` and absolute paths.\n\n## Further reading\n\n- `docs/marketplace.md` — marketplace system internals, on-disk layout, command reference\n- `docs/skills/authoring-extensions.md` — how to author the extension modules inside plugins\n- `docs/skills/examples/mini-marketplace/` — minimal working marketplace example\n",
@@ -61,21 +61,21 @@ export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
61
61
  "tools/ask.md": "# ask\n\n> Prompts the interactive user for one or more choices or free-form answers.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/ask.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/ask.md`\n- Key collaborators:\n - `packages/coding-agent/src/config/settings-schema.ts` — `ask.timeout` / `ask.notify` defaults\n - `packages/coding-agent/src/modes/theme/theme.ts` — checkbox and tree glyphs for TUI rendering\n - `packages/coding-agent/src/tui.ts` — status-line rendering\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `questions` | `Question[]` | Yes | One or more questions. Empty arrays are rejected by schema and also guarded at runtime. |\n\n### `Question`\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `id` | `string` | Yes | Stable identifier used in multi-question results. |\n| `question` | `string` | Yes | Prompt text shown to the user. |\n| `options` | `{ label: string }[]` | Yes | Explicit options. The UI always appends `Other (type your own)`; callers must not include it. |\n| `multi` | `boolean` | No | Enables multi-select mode. Default: `false`. |\n| `recommended` | `number` | No | Zero-based recommended option index. In single-select mode the label gets ` (Recommended)` appended in the UI. |\n\n## Outputs\n- Single-shot result.\n- `content[0].text` is plain text:\n - single question: `User selected: ...` and/or `User provided custom input: ...`\n - multiple questions: `User answers:` followed by one line per `id`\n- `details`:\n - single question: `{ question, options, multi, selectedOptions, customInput? }`\n - multiple questions: `{ results: QuestionResult[] }`, where each item includes `id`, `question`, `options`, `multi`, `selectedOptions`, and optional `customInput`\n- Cancellation and headless cases throw instead of returning a structured success result.\n\n## Flow\n1. `AskTool.createIf()` only registers the tool when `session.hasUI` is true; headless sessions never get it.\n2. `execute()` requires `context.ui`; if missing it aborts the context and throws `ToolAbortError(\"Ask tool requires interactive mode\")`.\n3. It reads `ask.timeout` from settings, converts seconds to milliseconds, and disables timeout entirely while plan mode is enabled (`packages/coding-agent/src/tools/ask.ts`).\n4. If `ask.notify` is not `off`, it sends a terminal notification: `Waiting for input`.\n5. For each question, `askSingleQuestion()` drives either:\n - single-select list + optional editor for `Other`\n - multi-select checkbox loop + `Done selecting` sentinel + optional editor for `Other`\n6. In multi-question mode, left/right arrow handlers enable back/forward navigation between questions and preserve prior selections.\n7. If a timeout fires before any selection/custom input, the tool auto-selects the recommended option, or the first option when no valid `recommended` index exists.\n8. If the user cancels without timeout, `execute()` aborts the tool context and throws `ToolAbortError(\"Ask tool was cancelled by the user\")`.\n9. On success it formats human-readable text plus structured `details`; the TUI renderer uses `details` for rich display.\n\n## Modes / Variants\n- Single question: returns flattened `details` fields for one question.\n- Multiple questions: returns `details.results[]` and allows back/forward navigation across questions.\n- Single-select: one option or custom input.\n- Multi-select: toggled checkbox list, `Done selecting` sentinel only when forward navigation is not active.\n\n## Side Effects\n- User-visible prompts / interactive UI\n - Opens a selection dialog via `context.ui.select(...)`.\n - Opens a text editor dialog via `context.ui.editor(...)` for `Other`.\n - Sends a terminal notification unless `ask.notify=off`.\n- Session state\n - Reads plan-mode state to disable timeouts.\n - Calls `context.abort()` on headless use or user cancellation.\n- Background work / cancellation\n - Wraps UI waits in `untilAborted(...)` so abort signals interrupt pending dialogs.\n\n## Limits & Caps\n- `questions` must contain at least 1 item (`askSchema` in `packages/coding-agent/src/tools/ask.ts`).\n- `ask.timeout` default is `30` seconds; `0` disables timeout (`packages/coding-agent/src/config/settings-schema.ts`).\n- Prompt guidance says provide 2-5 options, but code does not enforce that (`packages/coding-agent/src/prompts/tools/ask.md`).\n- Timeout only applies to the option picker; once the user chooses `Other`, the editor has no timeout (`packages/coding-agent/src/prompts/tools/ask.md`).\n\n## Errors\n- Missing interactive UI: throws `ToolAbortError(\"Ask tool requires interactive mode\")`.\n- User cancels picker/editor without timeout: throws `ToolAbortError(\"Ask tool was cancelled by the user\")`.\n- Abort signal during input: converted to `ToolAbortError(\"Ask input was cancelled\")`.\n- Empty `questions` at runtime returns a text error payload instead of throwing: `Error: questions must not be empty`.\n\n## Notes\n- `recommended` is only a UI hint; invalid indexes are ignored.\n- In single-select mode the returned `selectedOptions` value strips the appended ` (Recommended)` suffix.\n- Multi-select results preserve selection order by `Set` insertion order, not original option order after arbitrary toggles.\n- Option labels and prompt text are returned verbatim in `details`; the tool does not interpret them beyond UI affordances like `Other` and ` (Recommended)`.\n",
62
62
  "tools/ast-edit.md": "# ast_edit\n\n> Preview and apply structural rewrites over source files via native ast-grep.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/ast-edit.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/ast-edit.md`\n- Key collaborators:\n - `crates/pi-natives/src/ast.rs` — native rewrite planning and file mutation\n - `crates/pi-natives/src/language/mod.rs` — language aliases and extension inference\n - `packages/coding-agent/src/tools/path-utils.ts` — path/glob parsing and multi-path resolution\n - `packages/coding-agent/src/tools/resolve.ts` — preview/apply queueing\n - `packages/coding-agent/src/tools/render-utils.ts` — parse-error dedupe and display caps\n - `packages/coding-agent/src/utils/file-display-mode.ts` — hashline vs line-number diff references\n - `packages/coding-agent/src/hashline/hash.ts` — stable hashline diff anchors\n - `packages/natives/native/index.d.ts` — JS-visible native binding contract\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `ops` | `{ pat: string; out: string }[]` | Yes | One or more rewrite rules. `pat` must be non-empty. Duplicate `pat` values fail before native execution. Empty `out` deletes the matched node. |\n| `paths` | `string[]` | Yes | One or more files, directories, globs, or internal URLs with backing files. Empty entries are rejected. Globs are forbidden for internal URLs. |\n\nShared AST pattern grammar and language catalog: see [`ast_grep`](./ast-grep.md#inputs).\n\n- `ast_edit` uses the same `$NAME`, `$_`, `$$$NAME`, and `$$$` metavariable semantics.\n- The tool prompt adds rewrite-specific constraints:\n - metavariable names must be uppercase and must stand for whole AST nodes,\n - captures from `pat` are substituted into `out`,\n - each rewrite is a 1:1 structural substitution; one capture cannot expand into multiple sibling nodes unless the grammar itself permits that expansion at that position.\n\n## Outputs\n- Single-shot preview result from `ast_edit` itself.\n- Model-facing `content` is one text block showing proposed edits, grouped by file for directory/multi-file runs.\n - Each change renders as two lines: `-REF|before` and `+REF|after` in hashline mode, or `-LINE:COLUMN before` / `+LINE:COLUMN after` when hashlines are off.\n - Only the first line of each `before`/`after` snippet is shown, truncated to 120 characters in the wrapper.\n - `Limit reached; narrow paths.` and formatted parse issues are appended when applicable.\n- If no rewrites match, text is `No replacements made` plus formatted parse issues when present.\n- `details` includes aggregate preview metadata:\n - `totalReplacements`, `filesTouched`, `filesSearched`, `applied`, `limitReached`\n - optional `parseErrors`, `scopePath`, `files`, `fileReplacements`, `displayContent`, `meta`\n- The tool always previews first (`applied: false` in the direct result). Actual file writes happen only later through `resolve(action: \"apply\", ...)`.\n- When preview produced replacements, `ast_edit` also queues a pending `resolve` action. Successful apply returns a separate `resolve` result, not another `ast_edit` result.\n\n## Flow\n1. `AstEditTool.execute()` validates each op in `packages/coding-agent/src/tools/ast-edit.ts`:\n - empty `pat` fails,\n - at least one op is required,\n - duplicate `pat` values fail,\n - ops are converted to a `Record<pattern, replacement>`.\n2. The wrapper reads `PI_MAX_AST_FILES` via `$envpos(..., 1000)` and uses that as the native `maxFiles` cap for both preview and apply.\n3. Path normalization, internal URL handling, missing-path partitioning, and multi-path resolution follow the same `path-utils.ts` flow as `ast_grep`.\n4. The wrapper stats the resolved base path to decide whether to render grouped directory output.\n5. `runAstEditOnce(...)` always runs native `astEdit(...)` with `dryRun: true` and `failOnParseError: false` on the first pass.\n6. Native `ast_edit` in `crates/pi-natives/src/ast.rs`:\n - normalizes the rewrite map and sorts rules by pattern string,\n - resolves strictness (`smart` by default),\n - collects candidate files from a file or gitignore-aware directory scan,\n - infers a single language for the whole call unless `lang` was supplied,\n - compiles every rewrite pattern for that language,\n - parses each file, skips files with syntax-error trees, collects `replace_by(...)` edits for every match, enforces replacement and file caps, and returns textual before/after slices plus source ranges.\n7. The TS wrapper deduplicates parse errors, groups changes by file, and renders preview diff lines.\n8. If preview found replacements and `applied` is false, `queueResolveHandler(...)` registers a forced `resolve` action and injects a `resolve-reminder` steering message.\n9. On `resolve(action: \"apply\")`, the queued callback reruns the same rewrite set with `dryRun: false`, recomputes counts, and rejects the apply as an error if the live result no longer matches the preview (`stalePreview`).\n10. On a non-stale apply, the callback returns `Applied N replacements in M files.`; on discard, `resolve` returns a discard message without mutating files.\n\n## Modes / Variants\n- Single file: preview or apply against one file.\n- Directory + optional glob: native scan walks the directory, then filters by compiled glob.\n- Multiple explicit paths/globs: wrapper unions them into one synthetic scope or runs per-target native calls when paths only meet at root.\n- Internal URL inputs: only supported when the router resolves them to a backing file path.\n- Preview mode: always the direct `ast_edit` tool result.\n- Apply mode: only reachable through the queued `resolve` callback after a preview.\n- Hashline output mode vs plain line/column mode: controlled by `resolveFileDisplayMode()`.\n\n## Side Effects\n- Filesystem\n - Preview reads files and scans directories.\n - Apply rewrites files in place with `std::fs::write(...)`, but only when the computed output differs from the original source.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Queues a one-shot forced `resolve` tool choice through `queueResolveHandler(...)`.\n - Adds a `resolve-reminder` steering message.\n- User-visible prompts / interactive UI\n - Direct `ast_edit` results are previews.\n - Follow-up apply/discard is exposed through the hidden `resolve` tool.\n- Background work / cancellation\n - Native preview/apply work runs on a blocking worker via `task::blocking(...)`.\n - Cancellation and optional native timeout are cooperative through `CancelToken::heartbeat()`.\n\n## Limits & Caps\n- File cap exposed by the wrapper: `PI_MAX_AST_FILES`, default `1000`, in `packages/coding-agent/src/tools/ast-edit.ts`.\n- Native `maxFiles` and `maxReplacements` are both clamped to at least `1` when provided in `crates/pi-natives/src/ast.rs`.\n- The wrapper never sets `maxReplacements`; native behavior therefore defaults to effectively unbounded replacements for a run.\n- Parse issues are rendered with at most `PARSE_ERRORS_LIMIT = 20` lines in `packages/coding-agent/src/tools/render-utils.ts`; `details.parseErrors` is deduplicated but not capped.\n- Directory scans use `include_hidden: true`, `use_gitignore: true`, and skip `node_modules` unless the glob text explicitly mentions `node_modules` in `crates/pi-natives/src/ast.rs`.\n- No separate glob-expansion count cap exists. Candidate count is whatever the resolved path/glob expands to after gitignore filtering, then native `maxFiles` stops mutations after the configured number of touched files.\n- Preview text truncates each rendered `before` and `after` first line to 120 characters in `packages/coding-agent/src/tools/ast-edit.ts`.\n\n## Errors\n- TS wrapper throws `ToolError` for empty patterns, duplicate rewrite patterns, empty path entries, unsupported internal-URL globs, internal URLs without `sourcePath`, and missing paths.\n- Native code returns hard errors for:\n - inability to infer one language across all candidates when `lang` is absent,\n - unsupported explicit `lang`,\n - bad glob compilation or unreadable search roots,\n - overlapping computed edits (`Overlapping replacements detected; refine pattern to avoid ambiguous edits`),\n - out-of-bounds edit ranges or non-UTF-8 replacement text,\n - write failures during apply,\n - cancellation or timeout.\n- With `failOnParseError: false` (the wrapper always uses this), pattern compile failures and file parse failures become `parseErrors` instead of aborting the whole run.\n- If every rewrite pattern fails to compile, native `ast_edit` returns a successful zero-replacement result with `parseErrors` populated.\n- Files containing tree-sitter error nodes are skipped for rewriting; they do not get partial edits.\n- Apply can fail after a successful preview if the preview becomes stale. The resolve callback compares replacement totals and per-file counts and returns an error result rather than applying a mismatched preview silently.\n\n## Notes\n- `ast_edit` does not expose the native `lang`, `strictness`, `selector`, `maxReplacements`, `failOnParseError`, or `timeoutMs` fields to the model. The runtime fixes the call shape to a preview-first, smart-strictness, best-effort parse mode.\n- Because the wrapper does not expose `lang`, mixed-language rewrites only succeed when every candidate infers to the same canonical language. This is stricter than `ast_grep`.\n- Idempotency is not enforced syntactically. A rewrite like `foo($A) -> foo($A)` previews zero changes because output equals input; a rewrite that keeps matching its own output may still produce replacements on repeated calls.\n- Rewrites are accumulated per file, then applied from the end of the file backward after an overlap check. Independent matches can coexist; overlapping matches abort the run.\n- Native rewrite rule order is by pattern-string sort, not by the original `ops` array order, because `normalize_rewrite_map(...)` sorts the `(pattern, rewrite)` pairs.\n- Preview/apply parity is validated only by totals and per-file counts, not by a byte-for-byte diff of every replacement payload.",
63
63
  "tools/ast-grep.md": "# ast_grep\n\n> Structural code search over supported source files via native ast-grep.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/ast-grep.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/ast-grep.md`\n- Key collaborators:\n - `crates/pi-natives/src/ast.rs` — native scan, parse, match engine\n - `crates/pi-natives/src/language/mod.rs` — language aliases and extension inference\n - `packages/coding-agent/src/tools/path-utils.ts` — path/glob parsing and multi-path resolution\n - `packages/coding-agent/src/tools/render-utils.ts` — parse-error dedupe and display caps\n - `packages/coding-agent/src/tools/match-line-format.ts` — anchor-prefixed match rendering\n - `packages/coding-agent/src/utils/file-display-mode.ts` — hashline vs line-number output mode\n - `packages/natives/native/index.d.ts` — JS-visible native binding contract\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `pat` | `string` | Yes | Single AST pattern. The wrapper trims it and rejects empty strings. |\n| `paths` | `string[]` | Yes | One or more files, directories, globs, or internal URLs with backing files. Empty entries are rejected. Globs are forbidden for internal URLs. |\n| `skip` | `number` | No | Match offset. Defaults to `0`, then `Math.floor(...)`; negatives and non-finite values fail. |\n\nPattern grammar and language support exposed to the model:\n- `$NAME` — capture one AST node.\n- `$_` — match one AST node without binding.\n- `$$$NAME` — capture zero or more AST nodes; ast-grep stops lazily at the next satisfiable node.\n- `$$$` — match zero or more AST nodes without binding.\n- Metavariable names must be uppercase and must stand for whole AST nodes, not partial tokens or string fragments.\n- Reusing the same metavariable requires identical code at each occurrence.\n- Patterns must parse as one valid AST node for the inferred target language.\n- Supported canonical languages come from `SupportLang::all_langs()` in `crates/pi-natives/src/language/mod.rs`: `astro`, `bash`, `c`, `cmake`, `cpp`, `csharp`, `dart`, `clojure`, `css`, `diff`, `dockerfile`, `elixir`, `erlang`, `go`, `graphql`, `haskell`, `hcl`, `html`, `ini`, `java`, `javascript`, `json`, `just`, `julia`, `kotlin`, `lua`, `make`, `markdown`, `nix`, `objc`, `ocaml`, `odin`, `perl`, `php`, `powershell`, `protobuf`, `python`, `r`, `regex`, `ruby`, `rust`, `scala`, `solidity`, `sql`, `starlark`, `svelte`, `swift`, `toml`, `tlaplus`, `tsx`, `typescript`, `verilog`, `vue`, `xml`, `yaml`, `zig`.\n\n## Outputs\n- Single-shot tool result.\n- Model-facing `content` is one text block:\n - grouped by file for directory/multi-file searches,\n - match lines rendered as `*LINE+HASH|text` in hashline mode or `*LINE|text` otherwise,\n - continuation lines for multi-line matches rendered with a leading space,\n - optional `meta: NAME=value` lines when ast-grep captured metavariables.\n- If no matches are found, text is `No matches found` or `No matches found. Parse issues mean the query may be mis-scoped; narrow paths before concluding absence.` plus formatted parse issues.\n- If the wrapper truncates visible results, the text ends with `Result limit reached; narrow paths or increase limit.`\n- `details` includes counts and metadata, not full match payloads:\n - `matchCount`, `fileCount`, `filesSearched`, `limitReached`\n - optional `parseErrors`, `scopePath`, `files`, `fileMatches`, `displayContent`, `meta`\n- Native ranges (`byteStart`, `byteEnd`, `startLine`, `startColumn`, `endLine`, `endColumn`) exist only inside the native result; the wrapper does not emit them directly to the model.\n\n## Flow\n1. `AstGrepTool.execute()` validates `pat`, normalizes `skip`, and normalizes each `paths` entry in `packages/coding-agent/src/tools/ast-grep.ts`.\n2. Internal URLs are resolved through `session.internalRouter`; entries without `sourcePath` fail, and internal-URL globs fail early.\n3. For multiple path inputs, `partitionExistingPaths()` drops missing bases only when at least one surviving base remains; if all bases are missing the call fails.\n4. `parseSearchPath()` splits a single path into `basePath` plus optional `glob`. `resolveExplicitSearchPaths()` collapses multiple inputs into a common base plus a brace-union glob, or separate `targets` when the only common base is a filesystem root.\n5. The wrapper stats the resolved base path to decide whether output should be grouped as a directory result.\n6. Execution dispatches to either:\n - one native `astGrep(...)` call for a single resolved base, or\n - `runMultiTargetAstGrep(...)`, which calls the native binding once per target, rebases paths back to the common root, sorts globally, then applies `skip` and the wrapper limit.\n7. Native `ast_grep` in `crates/pi-natives/src/ast.rs`:\n - normalizes and deduplicates patterns,\n - resolves a `MatchStrictness` (`smart` by default),\n - collects candidate files from a file or gitignore-aware directory scan,\n - infers language per candidate from extension unless `lang` was provided,\n - compiles the pattern separately for each language present,\n - reads each file, reports syntax-error trees as parse issues, runs `find_all`, and optionally captures metavariable bindings.\n8. Native results are sorted by path and source position, then paged by `offset`/`limit`.\n9. The TS wrapper normalizes parse-error strings, deduplicates them, groups matches by formatted path, renders anchor lines, appends limit/parse notices, and returns `toolResult(...).text(...).done()`.\n\n## Modes / Variants\n- Single file: native path is the file; output is a flat list of rendered match lines.\n- Directory + optional glob: native scan walks the directory, then filters by compiled glob.\n- Multiple explicit paths/globs: wrapper unions them into one synthetic scope or runs per-target native calls when paths only meet at root.\n- Internal URL inputs: only supported when the router can resolve them to a backing file path.\n- Hashline output mode vs plain line-number mode: controlled by `resolveFileDisplayMode()`; hashline mode requires the edit tool and non-raw, mutable sources.\n\n## Side Effects\n- Filesystem\n - Stats input paths in the TS wrapper.\n - Native code reads matched files and scans directories through `fs_cache`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - None beyond normal tool transcript/result metadata.\n- Background work / cancellation\n - Native work runs on a blocking worker via `task::blocking(...)`.\n - Cancellation and optional native timeout are cooperative through `CancelToken::heartbeat()`.\n\n## Limits & Caps\n- Wrapper-visible result cap: `DEFAULT_AST_LIMIT = 50` in `packages/coding-agent/src/tools/ast-grep.ts`.\n - Single-target calls rely on the native default limit of 50 in `crates/pi-natives/src/ast.rs`.\n - Multi-target calls fetch `skip + 50 + 1` matches per target, then re-page after global sort.\n- Native `limit` is clamped to at least `1`; omitted `offset` defaults to `0` in `crates/pi-natives/src/ast.rs`.\n- Parse issues are rendered with at most `PARSE_ERRORS_LIMIT = 20` lines in `packages/coding-agent/src/tools/render-utils.ts`; `details.parseErrors` itself is only deduplicated, not capped.\n- Directory scans use `include_hidden: true`, `use_gitignore: true`, and skip `node_modules` unless the glob text explicitly mentions `node_modules` in `crates/pi-natives/src/ast.rs`.\n- No hard file-count cap is applied by the wrapper or native `ast_grep`; candidate count is whatever the resolved path/glob expands to after gitignore filtering.\n- Multi-path union deduplicates identical path inputs before resolution in `resolveExplicitSearchPaths()`.\n\n## Errors\n- TS wrapper throws `ToolError` for empty patterns, invalid `skip`, empty path entries, unsupported internal-URL globs, internal URLs without `sourcePath`, and missing paths.\n- Native code returns hard errors for:\n - unsupported explicit `lang`,\n - inability to infer language for a candidate when `lang` is not supplied,\n - invalid AST pattern compilation for every relevant language,\n - unreadable search roots or bad glob compilation,\n - cancellation (`Aborted: Signal`) or timeout (`Aborted: Timeout`).\n- File-level parse failures and many per-language pattern compile failures are non-fatal: they are accumulated in `parseErrors` and surfaced alongside successful matches.\n- `no matches` is not an error, even when parse issues were recorded.\n\n## Notes\n- `pat` is always wrapped into a one-element `patterns` array by the TS tool; the model cannot send multiple patterns through `ast_grep` even though the native binding supports it.\n- `ast_grep` can search mixed-language trees because native compilation happens per discovered language, but the prompt still tells the model to keep calls single-language when possible to reduce parse noise.\n- Pattern compilation is per language present in the candidate set. One pattern can succeed for some languages and generate per-file parse errors for others in the same run.\n- A file with tree-sitter error nodes still gets searched; the syntax warning is additive, not a skip condition.\n- For glob semantics, `*.ts` matches only direct children while `**/*.ts` recurses; this is covered by native tests in `crates/pi-natives/src/ast.rs`.\n- Output anchors are intended for follow-up tools, but the exact anchor format depends on session edit mode (`hashline` vs line-number mode).",
64
- "tools/bash.md": "# bash\n\n> Execute a shell command in the session workspace, with optional PTY or background-job handling.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/bash.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/bash.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/bash-interactive.ts` — PTY/TUI execution path.\n - `packages/coding-agent/src/tools/bash-interceptor.ts` — blocks tool-better shell patterns.\n - `packages/coding-agent/src/tools/bash-skill-urls.ts` — expands internal URLs to paths.\n - `packages/coding-agent/src/exec/bash-executor.ts` — non-PTY shell execution.\n - `packages/coding-agent/src/session/streaming-output.ts` — tail buffer, truncation, artifact spill.\n - `packages/coding-agent/src/tools/tool-timeouts.ts` — timeout clamp bounds.\n - `packages/coding-agent/src/config/settings-schema.ts` — default interceptor rules.\n - `docs/bash-tool-runtime.md` — deeper executor/runtime notes; use as the companion doc for shell-session internals.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `command` | `string` | Yes | Shell command text to execute. A leading `cd <path> && ...` is rewritten into `cwd` only when `cwd` was omitted. |\n| `env` | `Record<string, string>` | No | Extra environment variables. Keys must match `^[A-Za-z_][A-Za-z0-9_]*$` or the tool throws. Values also go through internal-URL expansion. |\n| `timeout` | `number` | No | Timeout in seconds. Default `300`; clamped to `1..3600` by `clampTimeout(\"bash\", ...)`. |\n| `cwd` | `string` | No | Working directory, resolved against `session.cwd` via `resolveToCwd`. Must exist and be a directory. |\n| `pty` | `boolean` | No | Request PTY mode. Default `false`. PTY is used only when `pty: true`, `PI_NO_PTY !== \"1\"`, and the tool context has a UI. |\n| `async` | `boolean` | No | Background execution request. Present only when `async.enabled` is true for the session. Returns immediately with a job id instead of waiting. |\n\n## Outputs\nThe tool returns a single `text` content block plus optional `details`.\n\n- Success, foreground:\n - `content[0].text`: command output, or `(no output)` when the command produced nothing.\n - `details.timeoutSeconds`: effective timeout after clamping.\n - `details.requestedTimeoutSeconds`: only present when the requested timeout was clamped.\n - `details.meta.truncation`: present when output was truncated in memory; includes `artifactId` when full output spilled to an artifact.\n- Success, background start (`async: true` or auto-background):\n - `content[0].text`: optional preview tail, timeout notice if any, then `Background job <id> started: <label>` with follow-up instructions.\n - `details.async`: `{ state: \"running\", jobId, type: \"bash\" }`.\n- Background progress / completion:\n - delivered through `onUpdate` / async job manager, not the initial return.\n - running updates contain tail text and `details.async.state: \"running\"` only after the job is considered backgrounded.\n - completion/failure updates carry final text and `details.async.state: \"completed\" | \"failed\"`.\n- Failure:\n - the tool throws `ToolError` / `ToolAbortError`; non-zero exits are surfaced as errors, not success results.\n\nStdout and stderr are merged before the model sees them. Non-zero exit codes are appended to the thrown error text as `Command exited with code <n>`.\n\n## Flow\n1. `BashTool.execute()` in `packages/coding-agent/src/tools/bash.ts` reads `command`, normalizes `env`, and defaults `timeout` to `300`.\n2. If `cwd` is absent, it rewrites a leading `cd <path> && ...` into the structured `cwd` field and strips that prefix from `command`.\n3. If `async: true` is requested while `async.enabled` is off, it throws `ToolError` before any execution.\n4. If `bashInterceptor.enabled` is on, `checkBashInterception()` runs against both the original command and the `cd`-stripped command. A matching enabled rule throws before URL expansion or execution.\n5. `expandInternalUrls()` rewrites supported internal URLs inside `command`, each `env` value, and protocol-looking `cwd` values. Command/env replacements are shell-escaped unless `noEscape` is requested by the caller path.\n6. `resolveToCwd()` resolves `cwd` against `session.cwd`; `fs.stat()` verifies that the target exists and is a directory.\n7. `clampTimeout(\"bash\", requestedTimeoutSec)` enforces `TOOL_TIMEOUTS.bash` (`default: 300`, `min: 1`, `max: 3600`). When clamped, `#buildCompletedResult()` / `#buildBackgroundStartResult()` append a notice line.\n8. Execution path splits:\n 1. `async: true` -> `#startManagedBashJob()` registers a session async job and returns immediately.\n 2. Non-PTY with `bash.autoBackground.enabled` and an async job manager -> starts a managed job, waits up to `min(thresholdMs, timeoutMs - 1000)`, and either returns the completed result or converts the run into a background job.\n 3. Otherwise runs foreground execution.\n9. Foreground non-PTY calls `executeBash()` from `packages/coding-agent/src/exec/bash-executor.ts`.\n10. Foreground PTY calls `runInteractiveBashPty()` from `packages/coding-agent/src/tools/bash-interactive.ts`.\n11. Both paths allocate an output artifact first when `session.allocateOutputArtifact` is available. The artifact path/id are passed into the sink so large output can spill to disk.\n12. `executeBash()` loads shell settings, optional shell snapshot, and shell minimizer settings, then runs via a persistent native `Shell` session or one-shot `executeShell()`. `docs/bash-tool-runtime.md` covers that path in detail.\n13. `runInteractiveBashPty()` creates a `PtySession`, overlays an xterm-backed console UI, forwards user key input into the PTY, captures output through `OutputSink`, and kills the PTY on dismiss/dispose.\n14. On completion, `#buildCompletedResult()` formats `(no output)` when needed, attaches truncation metadata from the `OutputSink` summary, and re-checks exit status / timeout / cancellation before returning.\n15. On non-zero exit, timeout, missing exit status, or cancellation, `#buildResultText()` throws with the captured output included in the error message.\n\n## Modes / Variants\n1. Foreground non-PTY\n - Default path.\n - Uses `executeBash()`.\n - Streams tail-only updates through `streamTailUpdates()` and `TailBuffer(DEFAULT_MAX_BYTES)`.\n2. Foreground PTY\n - Requires `pty: true`, UI context, and `PI_NO_PTY !== \"1\"`.\n - Uses `runInteractiveBashPty()` and a `PtySession` overlay.\n - Supports interactive input; `Esc` kills the session from the overlay.\n3. Explicit background job\n - Requires `async: true` and `async.enabled`.\n - Registers a job with `session.asyncJobManager` and returns `{ state: \"running\", jobId }` immediately.\n4. Auto-backgrounded non-PTY job\n - Requires `bash.autoBackground.enabled`, no PTY, and an async job manager.\n - Starts like a foreground managed job, then backgrounds it when it outlives the wait window.\n5. Intercepted command\n - No subprocess created.\n - Returns a `ToolError` pointing the model at `read`, `search`, `find`, `edit`, or `write`.\n\n## Side Effects\n- Filesystem\n - Validates `cwd` with `fs.stat()`.\n - May allocate and write artifact files for full output (`bash`) and minimizer-preserved raw output (`bash-original`).\n - `expandInternalUrls(..., { ensureLocalParentDirs: true })` creates parent directories for `local://` paths before execution.\n- Subprocesses / native bindings\n - Non-PTY uses native shell execution via `@oh-my-pi/pi-natives` (`Shell.run()` or `executeShell()`).\n - PTY uses native `PtySession.start()`.\n- Session state\n - Reads session settings for async, auto-background, interceptor, tool availability, and shell configuration.\n - Registers jobs with `session.asyncJobManager` for explicit/auto background runs.\n - Uses `session.getSessionId()` to isolate shell reuse and async session keys.\n - Uses `session.allocateOutputArtifact()` for spill files.\n- User-visible prompts / interactive UI\n - PTY mode opens a TUI overlay titled `Console` and forwards input to the PTY.\n - Background start messages direct the agent to `job` and `read jobs://<id>`.\n- Background work / cancellation\n - Async and auto-background jobs continue after the initial tool return.\n - Cancellation aborts the native run; PTY overlay dismissal also kills the PTY.\n\n## Limits & Caps\n- Default timeout: `300s` (`TOOL_TIMEOUTS.bash.default` in `packages/coding-agent/src/tools/tool-timeouts.ts`).\n- Timeout clamp: `1..3600s` (`TOOL_TIMEOUTS.bash.min/max`).\n- Auto-background default threshold: `60_000ms` (`DEFAULT_AUTO_BACKGROUND_THRESHOLD_MS` in `packages/coding-agent/src/tools/bash.ts`), further capped to `timeoutMs - 1000` by `#resolveAutoBackgroundWaitMs()`.\n- Hard kill grace beyond requested timeout in non-PTY executor: `5_000ms` (`HARD_TIMEOUT_GRACE_MS` in `packages/coding-agent/src/exec/bash-executor.ts`).\n- In-memory output tail cap: `50 * 1024` bytes (`DEFAULT_MAX_BYTES` in `packages/coding-agent/src/session/streaming-output.ts`). Once exceeded, the sink keeps only the tail window in memory.\n- Streaming callback throttle in `executeBash()`: `50ms` between `onChunk` calls when streaming is enabled.\n- TUI collapsed preview: `10` visual lines (`BASH_DEFAULT_PREVIEW_LINES`) when rendered inline in the agent UI; this is a renderer cap, not a tool output cap.\n\n## Errors\n- Input validation:\n - invalid env key -> `ToolError(\"Invalid bash env name: <key>\")`.\n - async requested while disabled -> `ToolError(\"Async bash execution is disabled...\")`.\n - missing async job manager -> `ToolError(\"Async job manager unavailable for this session.\")`.\n - missing/bad `cwd` -> `ToolError(\"Working directory does not exist: ...\")` or `ToolError(\"Working directory is not a directory: ...\")`.\n- Interceptor:\n - matched command -> `ToolError` with `Blocked: <rule.message>` and the original command.\n - invalid interceptor regexes are silently skipped by `compileRules()`.\n- Internal URL expansion:\n - unsupported scheme, unknown skill, path traversal, missing router support, or router resolution failures all throw `ToolError` from `packages/coding-agent/src/tools/bash-skill-urls.ts`.\n- Execution:\n - non-zero exit -> thrown `ToolError` containing captured output plus `Command exited with code <n>`.\n - missing exit code -> thrown `ToolError` with `Command failed: missing exit status`.\n - timeout -> thrown `ToolError`; PTY uses `Command timed out after <n> seconds`, non-PTY executor returns cancelled output that `BashTool` converts to an error.\n - user abort -> `ToolAbortError` when the caller signal is aborted.\n- Artifact allocation / artifact save failures are swallowed in `saveBashOriginalArtifact()` and `OutputSink.#createFileSink()`; execution continues without that artifact.\n\n## Notes\n- `strict = true` and `concurrency = \"exclusive\"` are set on `BashTool`; the tool does not run concurrently with another bash tool call in the same session.\n- `command` and `env` URL expansions shell-escape replacements; `cwd` expansion uses `noEscape: true` because it becomes a filesystem path argument, not shell text.\n- `checkBashInterception()` blocks only when the matching rule's `tool` name is present in `ctx.toolNames`; missing tools disable their corresponding rule.\n- Default interceptor rules come from `DEFAULT_BASH_INTERCEPTOR_RULES` in `packages/coding-agent/src/config/settings-schema.ts`:\n - `cat|head|tail|less|more` -> `read`\n - `grep|rg|ripgrep|ag|ack` -> `search`\n - `find|fd|locate` with name/type/glob flags -> `find`\n - `sed -i`, `perl -i`, `awk -i inplace` -> `edit`\n - `echo|printf|cat <<` with redirection -> `write`\n- PTY mode is ignored in non-UI contexts and when `PI_NO_PTY=1`; the tool silently falls back to non-PTY execution.\n- Non-PTY runs merge `NON_INTERACTIVE_ENV` with `env`; PTY runs also prepend `NON_INTERACTIVE_ENV` before custom env values.\n- When the shell minimizer rewrites output inside `executeBash()`, the visible output is replaced with minimized text and a `[raw output: artifact://<id>]` footer may be appended if `onMinimizedSave` persisted the original text.\n- The TUI renderer parses partial JSON to recover `env` assignments early in streaming previews; that behavior is display-only.\n- For executor internals that are not tool-specific — shell session reuse keys, snapshots, prefix handling, and native timeout behavior — see `docs/bash-tool-runtime.md`.\n",
64
+ "tools/bash.md": "# bash\n\n> Execute a shell command in the session workspace, with optional PTY or background-job handling.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/bash.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/bash.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/bash-interactive.ts` — PTY/TUI execution path.\n - `packages/coding-agent/src/tools/bash-interceptor.ts` — blocks tool-better shell patterns.\n - `packages/coding-agent/src/tools/bash-skill-urls.ts` — expands internal URLs to paths.\n - `packages/coding-agent/src/exec/bash-executor.ts` — non-PTY shell execution.\n - `packages/coding-agent/src/session/streaming-output.ts` — tail buffer, truncation, artifact spill.\n - `packages/coding-agent/src/tools/tool-timeouts.ts` — timeout clamp bounds.\n - `packages/coding-agent/src/config/settings-schema.ts` — default interceptor rules.\n - `docs/bash-tool-runtime.md` — deeper executor/runtime notes; use as the companion doc for shell-session internals.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `command` | `string` | Yes | Shell command text to execute. A leading `cd <path> && ...` is rewritten into `cwd` only when `cwd` was omitted. |\n| `env` | `Record<string, string>` | No | Extra environment variables. Keys must match `^[A-Za-z_][A-Za-z0-9_]*$` or the tool throws. Values also go through internal-URL expansion. |\n| `timeout` | `number` | No | Timeout in seconds. Default `300`; clamped to `1..3600` by `clampTimeout(\"bash\", ...)`. |\n| `cwd` | `string` | No | Working directory, resolved against `session.cwd` via `resolveToCwd`. Must exist and be a directory. |\n| `pty` | `boolean` | No | Request PTY mode. Default `false`. PTY is used only when `pty: true`, `PI_NO_PTY !== \"1\"`, and the tool context has a UI. |\n| `async` | `boolean` | No | Background execution request. Present only when `async.enabled` is true for the session. Returns immediately with a job id instead of waiting. |\n\n## Outputs\nThe tool returns a single `text` content block plus optional `details`.\n\n- Success, foreground:\n - `content[0].text`: command output, or `(no output)` when the command produced nothing.\n - `details.timeoutSeconds`: effective timeout after clamping.\n - `details.requestedTimeoutSeconds`: only present when the requested timeout was clamped.\n - `details.meta.truncation`: present when output was truncated in memory; includes `artifactId` when full output spilled to an artifact.\n- Success, background start (`async: true` or auto-background):\n - `content[0].text`: optional preview tail, timeout notice if any, then `Background job <id> started: <label>` with follow-up instructions.\n - `details.async`: `{ state: \"running\", jobId, type: \"bash\" }`.\n- Background progress / completion:\n - delivered through `onUpdate` / async job manager, not the initial return.\n - running updates contain tail text and `details.async.state: \"running\"` only after the job is considered backgrounded.\n - completion/failure updates carry final text and `details.async.state: \"completed\" | \"failed\"`.\n- Failure:\n - the tool throws `ToolError` / `ToolAbortError`; non-zero exits are surfaced as errors, not success results.\n\nStdout and stderr are merged before the model sees them. Non-zero exit codes are appended to the thrown error text as `Command exited with code <n>`.\n\n## Flow\n1. `BashTool.execute()` in `packages/coding-agent/src/tools/bash.ts` reads `command`, normalizes `env`, and defaults `timeout` to `300`.\n2. If `cwd` is absent, it rewrites a leading `cd <path> && ...` into the structured `cwd` field and strips that prefix from `command`.\n3. If `async: true` is requested while `async.enabled` is off, it throws `ToolError` before any execution.\n4. If `bashInterceptor.enabled` is on, `checkBashInterception()` runs against both the original command and the `cd`-stripped command. A matching enabled rule throws before URL expansion or execution.\n5. `expandInternalUrls()` rewrites supported internal URLs inside `command`, each `env` value, and protocol-looking `cwd` values. Command/env replacements are shell-escaped unless `noEscape` is requested by the caller path.\n6. `resolveToCwd()` resolves `cwd` against `session.cwd`; `fs.stat()` verifies that the target exists and is a directory.\n7. `clampTimeout(\"bash\", requestedTimeoutSec)` enforces `TOOL_TIMEOUTS.bash` (`default: 300`, `min: 1`, `max: 3600`). When clamped, `#buildCompletedResult()` / `#buildBackgroundStartResult()` append a notice line.\n8. Execution path splits:\n 1. `async: true` -> `#startManagedBashJob()` registers a session async job and returns immediately.\n 2. Non-PTY with `bash.autoBackground.enabled` and an async job manager -> starts a managed job, waits up to `min(thresholdMs, timeoutMs - 1000)`, and either returns the completed result or converts the run into a background job.\n 3. Otherwise runs foreground execution.\n9. Foreground non-PTY calls `executeBash()` from `packages/coding-agent/src/exec/bash-executor.ts`.\n10. Foreground PTY calls `runInteractiveBashPty()` from `packages/coding-agent/src/tools/bash-interactive.ts`.\n11. Both paths allocate an output artifact first when `session.allocateOutputArtifact` is available. The artifact path/id are passed into the sink so large output can spill to disk.\n12. `executeBash()` loads shell settings, optional shell snapshot, and shell minimizer settings, then runs via a persistent native `Shell` session or one-shot `executeShell()`. `docs/bash-tool-runtime.md` covers that path in detail.\n13. `runInteractiveBashPty()` creates a `PtySession`, overlays an xterm-backed console UI, forwards user key input into the PTY, captures output through `OutputSink`, and kills the PTY on dismiss/dispose.\n14. On completion, `#buildCompletedResult()` formats `(no output)` when needed, attaches truncation metadata from the `OutputSink` summary, and re-checks exit status / timeout / cancellation before returning.\n15. On non-zero exit, timeout, missing exit status, or cancellation, `#buildResultText()` throws with the captured output included in the error message.\n\n## Modes / Variants\n1. Foreground non-PTY\n - Default path.\n - Uses `executeBash()`.\n - Streams tail-only updates through `streamTailUpdates()` and `TailBuffer(DEFAULT_MAX_BYTES)`.\n2. Foreground PTY\n - Requires `pty: true`, UI context, and `PI_NO_PTY !== \"1\"`.\n - Uses `runInteractiveBashPty()` and a `PtySession` overlay.\n - Supports interactive input; `Esc` kills the session from the overlay.\n3. Explicit background job\n - Requires `async: true` and `async.enabled`.\n - Registers a job with `session.asyncJobManager` and returns `{ state: \"running\", jobId }` immediately.\n4. Auto-backgrounded non-PTY job\n - Requires `bash.autoBackground.enabled`, no PTY, and an async job manager.\n - Starts like a foreground managed job, then backgrounds it when it outlives the wait window.\n5. Intercepted command\n - No subprocess created.\n - Returns a `ToolError` pointing the model at `read`, `search`, `find`, `edit`, or `write`.\n\n## Side Effects\n- Filesystem\n - Validates `cwd` with `fs.stat()`.\n - May allocate and write artifact files for full output (`bash`) and minimizer-preserved raw output (`bash-original`).\n - `expandInternalUrls(..., { ensureLocalParentDirs: true })` creates parent directories for `local://` paths before execution.\n- Subprocesses / native bindings\n - Non-PTY uses native shell execution via `@oh-my-pi/pi-natives` (`Shell.run()` or `executeShell()`).\n - PTY uses native `PtySession.start()`.\n- Session state\n - Reads session settings for async, auto-background, interceptor, tool availability, and shell configuration.\n - Registers jobs with `session.asyncJobManager` for explicit/auto background runs.\n - Uses `session.getSessionId()` to isolate shell reuse and async session keys.\n - Uses `session.allocateOutputArtifact()` for spill files.\n- User-visible prompts / interactive UI\n - PTY mode opens a TUI overlay titled `Console` and forwards input to the PTY.\n - Background start messages direct the agent to `job` and to read `jobs://<id>`.\n- Background work / cancellation\n - Async and auto-background jobs continue after the initial tool return.\n - Cancellation aborts the native run; PTY overlay dismissal also kills the PTY.\n\n## Limits & Caps\n- Default timeout: `300s` (`TOOL_TIMEOUTS.bash.default` in `packages/coding-agent/src/tools/tool-timeouts.ts`).\n- Timeout clamp: `1..3600s` (`TOOL_TIMEOUTS.bash.min/max`).\n- Auto-background default threshold: `60_000ms` (`DEFAULT_AUTO_BACKGROUND_THRESHOLD_MS` in `packages/coding-agent/src/tools/bash.ts`), further capped to `timeoutMs - 1000` by `#resolveAutoBackgroundWaitMs()`.\n- Hard kill grace beyond requested timeout in non-PTY executor: `5_000ms` (`HARD_TIMEOUT_GRACE_MS` in `packages/coding-agent/src/exec/bash-executor.ts`).\n- In-memory output tail cap: `50 * 1024` bytes (`DEFAULT_MAX_BYTES` in `packages/coding-agent/src/session/streaming-output.ts`). Once exceeded, the sink keeps only the tail window in memory.\n- Streaming callback throttle in `executeBash()`: `50ms` between `onChunk` calls when streaming is enabled.\n- TUI collapsed preview: `10` visual lines (`BASH_DEFAULT_PREVIEW_LINES`) when rendered inline in the agent UI; this is a renderer cap, not a tool output cap.\n\n## Errors\n- Input validation:\n - invalid env key -> `ToolError(\"Invalid bash env name: <key>\")`.\n - async requested while disabled -> `ToolError(\"Async bash execution is disabled...\")`.\n - missing async job manager -> `ToolError(\"Async job manager unavailable for this session.\")`.\n - missing/bad `cwd` -> `ToolError(\"Working directory does not exist: ...\")` or `ToolError(\"Working directory is not a directory: ...\")`.\n- Interceptor:\n - matched command -> `ToolError` with `Blocked: <rule.message>` and the original command.\n - invalid interceptor regexes are silently skipped by `compileRules()`.\n- Internal URL expansion:\n - unsupported scheme, unknown skill, path traversal, missing router support, or router resolution failures all throw `ToolError` from `packages/coding-agent/src/tools/bash-skill-urls.ts`.\n- Execution:\n - non-zero exit -> thrown `ToolError` containing captured output plus `Command exited with code <n>`.\n - missing exit code -> thrown `ToolError` with `Command failed: missing exit status`.\n - timeout -> thrown `ToolError`; PTY uses `Command timed out after <n> seconds`, non-PTY executor returns cancelled output that `BashTool` converts to an error.\n - user abort -> `ToolAbortError` when the caller signal is aborted.\n- Artifact allocation / artifact save failures are swallowed in `saveBashOriginalArtifact()` and `OutputSink.#createFileSink()`; execution continues without that artifact.\n\n## Notes\n- `strict = true` and `concurrency = \"exclusive\"` are set on `BashTool`; the tool does not run concurrently with another bash tool call in the same session.\n- `command` and `env` URL expansions shell-escape replacements; `cwd` expansion uses `noEscape: true` because it becomes a filesystem path argument, not shell text.\n- `checkBashInterception()` blocks only when the matching rule's `tool` name is present in `ctx.toolNames`; missing tools disable their corresponding rule.\n- Default interceptor rules come from `DEFAULT_BASH_INTERCEPTOR_RULES` in `packages/coding-agent/src/config/settings-schema.ts`:\n - `cat|head|tail|less|more` -> `read`\n - `grep|rg|ripgrep|ag|ack` -> `search`\n - `find|fd|locate` with name/type/glob flags -> `find`\n - `sed -i`, `perl -i`, `awk -i inplace` -> `edit`\n - `echo|printf|cat <<` with redirection -> `write`\n- PTY mode is ignored in non-UI contexts and when `PI_NO_PTY=1`; the tool silently falls back to non-PTY execution.\n- Non-PTY runs merge `NON_INTERACTIVE_ENV` with `env`; PTY runs also prepend `NON_INTERACTIVE_ENV` before custom env values.\n- When the shell minimizer rewrites output inside `executeBash()`, the visible output is replaced with minimized text and a `[raw output: artifact://<id>]` footer may be appended if `onMinimizedSave` persisted the original text.\n- The TUI renderer parses partial JSON to recover `env` assignments early in streaming previews; that behavior is display-only.\n- For executor internals that are not tool-specific — shell session reuse keys, snapshots, prefix handling, and native timeout behavior — see `docs/bash-tool-runtime.md`.\n",
65
65
  "tools/browser.md": "# browser\n\n> Open, reuse, close, and script Puppeteer tabs against headless Chromium or CDP-attached apps.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/browser.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/browser.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/browser/tab-supervisor.ts` — global tab registry; worker lifecycle; run/close coordination.\n - `packages/coding-agent/src/tools/browser/tab-worker.ts` — executes `run` code; implements the `tab` helper API.\n - `packages/coding-agent/src/tools/browser/tab-worker-entry.ts` — worker-thread transport bootstrap.\n - `packages/coding-agent/src/tools/browser/registry.ts` — browser-handle registry keyed by browser kind.\n - `packages/coding-agent/src/tools/browser/launch.ts` — Puppeteer loading, Chromium resolution/download, headless launch, stealth injection.\n - `packages/coding-agent/src/tools/browser/attach.ts` — CDP attach/reuse, target picking, spawned-app process handling.\n - `packages/coding-agent/src/tools/browser/tab-protocol.ts` — worker init/run/result message schema.\n - `packages/coding-agent/src/tools/browser/readable.ts` — `tab.extract()` readability extraction.\n - `packages/coding-agent/src/tools/browser/render.ts` — TUI rendering for `open`/`close` status lines and `run` JS cells.\n - `packages/coding-agent/src/tools/puppeteer/00_stealth_tampering.txt` — mask patched functions/descriptors as native.\n - `packages/coding-agent/src/tools/puppeteer/01_stealth_activity.txt` — synthesize visibility/focus/scroll activity.\n - `packages/coding-agent/src/tools/puppeteer/02_stealth_hairline.txt` — fix Modernizr hairline detection.\n - `packages/coding-agent/src/tools/puppeteer/03_stealth_botd.txt` — spoof `navigator.webdriver`, `window.chrome`, and Chrome fingerprint surfaces.\n - `packages/coding-agent/src/tools/puppeteer/04_stealth_iframe.txt` — patch iframe `contentWindow`/`srcdoc` behavior.\n - `packages/coding-agent/src/tools/puppeteer/05_stealth_webgl.txt` — spoof WebGL vendor/renderer/precision.\n - `packages/coding-agent/src/tools/puppeteer/06_stealth_screen.txt` — normalize screen/viewport/device-pixel-ratio values.\n - `packages/coding-agent/src/tools/puppeteer/07_stealth_fonts.txt` — spoof local fonts and perturb canvas text rendering.\n - `packages/coding-agent/src/tools/puppeteer/08_stealth_audio.txt` — spoof audio latency/sample-rate and perturb offline rendering.\n - `packages/coding-agent/src/tools/puppeteer/09_stealth_locale.txt` — force locale/languages/timezone/date strings.\n - `packages/coding-agent/src/tools/puppeteer/10_stealth_plugins.txt` — synthesize `navigator.plugins`/`navigator.mimeTypes`.\n - `packages/coding-agent/src/tools/puppeteer/11_stealth_hardware.txt` — spoof `navigator.hardwareConcurrency`.\n - `packages/coding-agent/src/tools/puppeteer/12_stealth_codecs.txt` — spoof media codec support.\n - `packages/coding-agent/src/tools/puppeteer/13_stealth_worker.txt` — carry UA/platform spoofing into `Worker`/`SharedWorker`.\n\n## Inputs\n\n### Shared fields\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `action` | `\"open\" \\| \"close\" \\| \"run\"` | Yes | Dispatches to the open/close/run path. |\n| `name` | `string` | No | Tab id. Defaults to `\"main\"`. Tabs live in a process-global map, so the same name is reused across later calls and in-process subagents until closed. |\n| `timeout` | `number` | No | Tool wall-clock timeout in seconds. Defaults to `30`; clamped to the browser tool range before execution. |\n\n### `action: \"open\"`\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `url` | `string` | No | Navigate after the tab is ready. Existing reusable tabs also navigate when `url` is supplied. |\n| `viewport` | `{ width: number; height: number; scale?: number }` | No | Requested viewport. For headless launch this becomes the initial viewport; for a page it is applied with `page.setViewport()`. `scale` maps to Puppeteer `deviceScaleFactor`. |\n| `wait_until` | `\"load\" \\| \"domcontentloaded\" \\| \"networkidle0\" \\| \"networkidle2\"` | No | Navigation wait condition. Defaults to `\"networkidle2\"` where omitted. |\n| `dialogs` | `\"accept\" \\| \"dismiss\"` | No | Installs a page `dialog` handler that auto-accepts or auto-dismisses dialogs. Omitted means no handler. |\n| `app` | `{ path?: string; cdp_url?: string; args?: string[]; target?: string }` | No | Selects browser kind. No `app` uses the session `browser.headless` setting. `app.path` is resolved against the session cwd and used as the executable path for spawn/attach reuse. `app.cdp_url` connects to an existing CDP endpoint. `args` are appended only when spawning `app.path`. `target` is only used for attached/spawned-app page selection. |\n\n### `action: \"close\"`\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `all` | `boolean` | No | Close every known tab. Omitted closes only `name`. |\n| `kill` | `boolean` | No | When a tab release drops a spawned-app browser handle to refcount 0, also terminate its process tree. Has no effect on headless shutdown and only disconnects connected CDP browsers. |\n\n### `action: \"run\"`\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `code` | `string` | Yes | Async-function body executed in a VM context with `page`, `browser`, `tab`, `display`, `assert`, `wait`, `console`, timers, `URL`, `TextEncoder`, `TextDecoder`, and `Buffer` in scope. |\n\n## Outputs\nThe tool returns one result per call; no streaming partial output is emitted from the browser implementation itself.\n\n- `open`: text content with `Opened` or `Reused`, browser description, URL, and optional title. `details` includes `action`, `name`, `browser`, `url`, `viewport`, and the same text in `details.result`.\n- `close`: text content with either `Closed ...` or `No tab named ...`. `details` includes `action`, `name`, and `details.result`.\n- `run`: ordered `content` array built as:\n 1. every `display(value)` call in execution order,\n 2. final return value, JSON-stringified unless already a string,\n 3. or `Ran code on tab \"...\"` if nothing else was produced.\n- `display(value)` coercion in `packages/coding-agent/src/tools/browser/tab-worker.ts`:\n - `{ type: \"image\", data: string, mimeType: string }` becomes image content,\n - `string` becomes text content,\n - other values become pretty JSON text when serializable, else `String(value)`.\n- `tab.screenshot()` also appends text plus an image content item unless `silent: true`; `details.screenshots` records persisted screenshot metadata `{ dest, mimeType, bytes, width, height }`.\n- `run` `details` includes `action`, `name`, current `browser`/`url` when the tab exists, optional `screenshots`, and `details.result` containing only the concatenated text outputs.\n\n## Flow\n1. `BrowserTool.execute()` (`packages/coding-agent/src/tools/browser.ts`) abort-checks, clamps `timeout` via `clampTimeout(\"browser\", ...)`, defaults `name` to `\"main\"`, and dispatches on `action`.\n2. `open` resolves browser kind with `resolveBrowserKind()`:\n - `app.cdp_url` → `{ kind: \"connected\" }` after trimming trailing slashes.\n - `app.path` → `{ kind: \"spawned\" }` after resolving against session cwd.\n - otherwise → `{ kind: \"headless\", headless: session.settings.get(\"browser.headless\") }`.\n3. `open` rejects reusing the same tab name across different browser kinds (`sameBrowserKind()`); callers must close first.\n4. `open` acquires a browser handle through `acquireBrowser()` (`packages/coding-agent/src/tools/browser/registry.ts`):\n - existing connected handle is reused by browser-kind key;\n - stale disconnected handles are disposed and recreated;\n - headless launches via `launchHeadlessBrowser()`;\n - `connected` waits for `${cdpUrl}/json/version`, then `puppeteer.connect()`;\n - `spawned` first tries `findReusableCdp()`, else kills same-path processes, allocates a free loopback port, spawns the executable with `--remote-debugging-port=<port>`, waits for CDP, then connects.\n5. `open` acquires a tab through `acquireTab()` (`packages/coding-agent/src/tools/browser/tab-supervisor.ts`):\n - same-name + same-browser + alive tab is reused unless `dialogs` changed;\n - same-name but different browser handle, dead state, or changed dialog policy forces release and recreation;\n - reusing with a new `url` navigates by issuing `await tab.goto(...)` through the worker.\n6. New tabs build a `WorkerInitPayload` in `buildInitPayload()`:\n - headless mode sends `url`, `waitUntil`, `viewport`, `dialogs`, and timeout;\n - attach mode resolves a page with `pickElectronTarget()`, gets its target id, and sends `targetId` plus `dialogs`.\n7. `acquireTab()` spawns a dedicated Bun `Worker` from `tab-worker-entry.ts`; if that fails it falls back to inline execution in the main thread (`spawnInlineWorker()`), preserving behavior but losing protection against synchronous infinite loops.\n8. `WorkerCore.#init()` (`packages/coding-agent/src/tools/browser/tab-worker.ts`) connects back to the browser websocket endpoint. Headless mode opens a new page, applies stealth patches, applies viewport, installs dialog handling if requested, and optionally navigates. Attach mode resolves the requested target page and optionally installs dialog handling.\n9. On success the worker sends `ready` with `{ url, title, viewport, targetId }`; the supervisor stores a `TabSession`, increments browser-handle refcount with `holdBrowser()`, and keeps the tab in a process-global `Map<string, TabSession>`.\n10. `run` requires non-empty `code`, looks up the tab with `getTab()`, then delegates to `runInTab()`.\n11. `runInTabWithSnapshot()` rejects dead tabs and concurrent runs (`Tab ... is busy`), captures session cwd plus optional `browser.screenshotDir`, registers an abort hook, sends a `run` message to the worker, and races the result against `timeoutMs + 750` ms. Timeouts force-kill the tab worker and, for headless tabs, close the orphaned page target.\n12. `WorkerCore.#run()` creates a VM context, exposes the raw Puppeteer `page`/`browser` plus a synthetic `tab` API, and executes `(async () => { ...code... })()` via `vm.runInContext()`.\n13. The `tab` helper API implemented in `#createTabApi()` is:\n - `tab.name: string`\n - `tab.page: Page`\n - `tab.signal?: AbortSignal`\n - `tab.url(): string`\n - `tab.title(): Promise<string>`\n - `tab.goto(url, { waitUntil? })`\n - `tab.observe({ includeAll?, viewportOnly? })`\n - `tab.screenshot({ selector?, fullPage?, save?, silent? })`\n - `tab.extract(format = \"markdown\")`\n - `tab.click(selector)`\n - `tab.type(selector, text)`\n - `tab.fill(selector, value)`\n - `tab.press(key, { selector? })`\n - `tab.scroll(deltaX, deltaY)`\n - `tab.drag(from, to)`\n - `tab.waitFor(selector)`\n - `tab.evaluate(fn, ...args)`\n - `tab.scrollIntoView(selector)`\n - `tab.select(selector, ...values)`\n - `tab.uploadFile(selector, ...filePaths)`\n - `tab.waitForUrl(pattern, { timeout? })`\n - `tab.waitForResponse(pattern, { timeout? })`\n - `tab.id(n)`\n14. Selector handling in `normalizeSelector()` accepts plain CSS and Puppeteer query handlers, and rewrites legacy Playwright-style prefixes `p-text/`, `p-xpath/`, `p-pierce/`, `p-aria/`; other `p-*` prefixes throw a `ToolError`.\n15. `tab.observe()` clears the element cache, takes a Puppeteer accessibility snapshot, filters to interactive nodes unless `includeAll`, optionally filters to viewport-visible nodes, assigns numeric ids, caches `ElementHandle`s, and returns URL/title/viewport/scroll metadata plus `elements`.\n16. `tab.id(n)` resolves the cached `ElementHandle`, verifies `el.isConnected`, and throws a stale-id error after cache invalidation if the DOM changed or the cache was cleared.\n17. `tab.goto()` clears the cached element ids before navigating. Any new `tab.observe()` also clears and rebuilds the cache.\n18. `tab.click()` uses a custom retry loop for `text/...` selectors to find an actionable visible match; other selectors use `page.locator(...).click()` with the run timeout.\n19. `tab.screenshot()` captures either the whole page or a selector PNG, downsizes a copy for model output, chooses a persistence path, writes the image to disk, records metadata, and optionally emits text + image display entries.\n20. `display()` calls accumulate in an array. After code finishes, the worker posts `{ displays, returnValue, screenshots }`; `BrowserTool.#run()` appends the return value as trailing text content when not `undefined`.\n21. `close` releases one tab or all tabs via `releaseTab()` / `releaseAllTabs()`. Each tab aborts pending runs, asks the worker to close, waits up to `750` ms for a `closed` ack, terminates the worker, decrements browser refcount, and disposes the browser handle when refcount reaches zero.\n\n## Modes / Variants\n- **Action dispatch**\n - `open` — acquire/reuse browser + tab.\n - `close` — release one tab or all tabs.\n - `run` — execute JS inside the tab worker.\n- **Browser kind**\n - **Headless**: launches local Chromium with Puppeteer, applies stealth patches, and creates a fresh page per tab.\n - **Spawned app (`app.path`)**: reuses an existing CDP-enabled process for that executable when possible; otherwise kills same-path processes, spawns the executable with remote debugging enabled, then attaches. No stealth patches are injected.\n - **Connected browser (`app.cdp_url`)**: attaches to an already-running CDP endpoint. No process ownership; close only disconnects.\n- **Target selection for attached/spawned browsers**\n - With `app.target`, `pickElectronTarget()` returns the first page whose URL or title contains the case-insensitive substring.\n - Without `app.target`, it skips titles/URLs matching `request handler|devtools|background page|background host|service worker` and otherwise falls back to the first page.\n- **Worker mode**\n - **Dedicated worker**: normal path; user code runs off the main thread and can be aborted even when it blocks synchronously.\n - **Inline fallback**: activated when Bun worker spawn fails; behavior matches, but synchronous infinite loops on user code cannot be interrupted.\n- **Dialog policy**\n - No `dialogs` field: no auto-handler.\n - `accept`/`dismiss`: page `dialog` events are handled automatically.\n - Changing dialog policy on an existing live tab forces tab recreation instead of mutating the worker in place.\n- **Screenshot persistence**\n - `save` provided: persist full-resolution PNG at the resolved cwd-relative or absolute path.\n - `browser.screenshotDir` session setting set: persist full-resolution PNG under that directory with a timestamped filename.\n - Neither set: persist the resized image to a temp-file path under the OS temp dir.\n\n## Side Effects\n- Filesystem\n - `loadPuppeteer()` writes `{}` to `<puppeteer-safe-dir>/package.json` before importing `puppeteer-core`.\n - First headless launch may download Chromium into the Puppeteer cache directory returned by `getPuppeteerDir()`.\n - `tab.screenshot()` creates parent directories and writes image files.\n - `tab.uploadFile()` resolves supplied paths against the session cwd.\n- Network\n - CDP attach paths poll `http://127.0.0.1:<port>/json/version` or the supplied `cdp_url` `/json/version`.\n - Headless/browser-attach sessions create CDP websocket connections.\n - Headless first-use Chromium download uses `@puppeteer/browsers`.\n - User `page` / `tab` operations perform normal browser network traffic.\n- Subprocesses / native bindings\n - Headless mode launches Chromium through Puppeteer.\n - `app.path` mode may spawn the target executable via `Bun.spawn()`.\n - `killExistingByPath()` / `gracefulKillTreeOnce()` use `@oh-my-pi/pi-natives` process inspection/termination.\n - Worker mode uses Bun `Worker`; fallback mode does not.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Browser handles are cached in a process-global `Map` keyed by browser kind in `packages/coding-agent/src/tools/browser/registry.ts`.\n - Tabs are cached in a process-global `Map` keyed by `name` in `packages/coding-agent/src/tools/browser/tab-supervisor.ts`.\n - `run` captures session cwd and optional `browser.screenshotDir` for screenshot/save path resolution.\n - `restartForModeChange()` drops only headless tabs.\n- User-visible prompts / interactive UI\n - None beyond normal tool output. Dialog auto-handling is invisible unless it fails and emits debug logs.\n- Background work / cancellation\n - `open`, `run`, CDP waits, and browser actions thread through abort signals.\n - A timed-out `run` aborts the worker execution path and can tear down the tab.\n\n## Limits & Caps\n- Tool timeout clamp: default `30` s, min `1` s, max `30` s (`TOOL_TIMEOUTS.browser` in `packages/coding-agent/src/tools/tool-timeouts.ts`).\n- Supervisor grace period around init/run/close: `750` ms (`GRACE_MS` in `packages/coding-agent/src/tools/browser/tab-supervisor.ts`).\n- Puppeteer protocol timeout for launch/connect operations: `60_000` ms (`BROWSER_PROTOCOL_TIMEOUT_MS` in `packages/coding-agent/src/tools/browser/launch.ts`).\n- Connected-browser CDP readiness wait: `5_000` ms before `puppeteer.connect()` (`packages/coding-agent/src/tools/browser/registry.ts`).\n- Spawned-app CDP readiness wait after spawn: `30_000` ms (`packages/coding-agent/src/tools/browser/registry.ts`).\n- CDP polling cadence: 150 ms in `waitForCdp()` (`packages/coding-agent/src/tools/browser/attach.ts`).\n- Headless default viewport: `1365x768` at `deviceScaleFactor: 1.25` (`DEFAULT_VIEWPORT` in `packages/coding-agent/src/tools/browser/launch.ts`).\n- Screenshot model-attachment resize cap: `maxWidth 1024`, `maxHeight 1024`, `maxBytes 150 * 1024`, `jpegQuality 70` (`packages/coding-agent/src/tools/browser/tab-worker.ts`).\n- `tab.waitForUrl()` polling interval: `200` ms (`packages/coding-agent/src/tools/browser/tab-worker.ts`).\n- Drag simulation uses `12` mouse-move steps (`packages/coding-agent/src/tools/browser/tab-worker.ts`).\n\n## Errors\n- `BrowserTool.execute()` converts DOM-style `AbortError` into `ToolAbortError`; other errors propagate.\n- `run` hard-fails on missing code: `Missing required parameter 'code' for action 'run'.`\n- `open` fails when reusing a name across browser kinds: `Tab \"...\" is bound to a different browser (...). Close it first.`\n- `runInTabWithSnapshot()` fails when the tab is absent/dead (`Tab \"...\" is not alive. Reopen it.`) or already running (`Tab \"...\" is busy`).\n- Worker init failures and run failures are serialized through `RunErrorPayload`; `ToolError` and abort state are reconstructed on the host side by `errorFromPayload()`.\n- Attached-target mismatches surface as:\n - `No page targets available on the attached browser`\n - `No page target matched \"...\". Available pages:\\n...`\n - `Target ... is no longer available on the attached browser`\n- Spawned-app path validation requires an absolute executable path, not an app bundle path.\n- Spawn/attach failures are wrapped into `ToolError`s such as `Timed out waiting for CDP endpoint ...`, `Failed to attach to ...`, or `Connected to ... but puppeteer.connect failed: ...`.\n- `tab` helper errors are user-visible `ToolError`s, including unsupported selector prefix, stale/unknown element id, invalid drag target, missing upload files, non-`<select>` for `tab.select()`, non-file-input for `tab.uploadFile()`, and screenshot selector misses.\n- On run timeout, the worker reports `Browser code execution timed out after <ms>ms`; the supervisor may escalate to `Browser code execution hung past grace; tab killed` if the worker does not respond after the grace window.\n\n## Notes\n- `loadPuppeteer()` and `loadPuppeteerInWorker()` temporarily redirect `cwd` to a safe Puppeteer directory before importing `puppeteer-core`, because Puppeteer probes the current working directory during module load.\n- Headless launch prefers a detected system Chrome/Chromium, then `PUPPETEER_EXECUTABLE_PATH`, and only then downloads Chromium.\n- Headless launch always passes `--no-sandbox`, `--disable-setuid-sandbox`, `--disable-blink-features=AutomationControlled`, and a `--window-size=...` matching the initial viewport. It also ignores Puppeteer default args `--disable-extensions`, `--disable-default-apps`, and `--disable-component-extensions-with-background-pages`.\n- Proxy-related env vars only affect headless launch: `PUPPETEER_PROXY`, `PUPPETEER_PROXY_BYPASS_LOOPBACK`, and `PUPPETEER_PROXY_IGNORE_CERT_ERRORS`.\n- Stealth patches are applied only in headless mode. Spawned or externally connected browsers are intentionally left untouched.\n- `applyStealthPatches()` also strips Puppeteer's `//# sourceURL=__puppeteer_evaluation_script__` suffix from CDP `Runtime.evaluate` / `Runtime.callFunctionOn` payloads.\n- `tab.extract()` reads `page.content()`, runs Readability first, then falls back to `main article`/`article`/`main`/`[role='main']`/`body`, and returns `null` if neither extraction path yields content.\n- `close(all: true, kill: false)` disconnects from spawned/connected browsers when the last tab closes but leaves spawned app processes running.\n- Headless orphan cleanup is best-effort: if a worker dies before closing its page, the supervisor searches browser targets by `targetId` and closes that page.\n- Console methods inside `run` do not appear in tool output; they are forwarded as debug/warn/error logs through the worker transport.",
66
66
  "tools/calc.md": "# calc\n\n> Evaluates one or more arithmetic expressions and returns formatted numeric results.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/calculator.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/calculator.md`\n- Key collaborators:\n - `packages/coding-agent/src/tui.ts` — status lines and tree-list rendering\n - `packages/coding-agent/src/tools/render-utils.ts` — preview limits and formatting helpers\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `calculations` | `Calculation[]` | Yes | Batch of expressions to evaluate in order. |\n\n### `Calculation`\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `expression` | `string` | Yes | Arithmetic expression string. |\n| `prefix` | `string` | Yes | Prepended verbatim to the rendered numeric result. |\n| `suffix` | `string` | Yes | Appended verbatim to the rendered numeric result. |\n\n## Outputs\n- Single-shot result.\n- `content[0].text` is the newline-joined `prefix + value + suffix` string for each calculation.\n- `details.results` is an array of `{ expression, value, output }`.\n- On renderer fallback, if `details` is missing but `content[0].text` exists, the TUI tries to pair each output line with the original expressions from call args.\n\n## Flow\n1. `execute()` wraps evaluation in `untilAborted(...)`.\n2. For each entry, `evaluateExpression(...)` tokenizes the expression, parses it with a recursive-descent parser, rejects non-finite outputs, and normalizes `-0` to `0`.\n3. `tokenizeExpression(...)` accepts whitespace, parentheses, operators, and number literals; any other character throws immediately.\n4. `ExpressionParser` applies precedence in this order: `+ -`, `* / %`, unary `+ -`, exponentiation `**`, parentheses/literals.\n5. Exponentiation is right-associative (`2 ** 3 ** 2` parses as `2 ** (3 ** 2)`).\n6. Each numeric result is formatted with `String(value)` and wrapped with the provided `prefix` and `suffix`.\n7. The tool returns text output plus structured `details`.\n\n## Side Effects\n- Background work / cancellation\n - Supports abort via `untilAborted(...)`.\n- Session state\n - None.\n- Filesystem / Network / Subprocesses\n - None.\n\n## Limits & Caps\n- Supported operators: `+`, `-`, `*`, `/`, `%`, `**` (`packages/coding-agent/src/tools/calculator.ts`).\n- Supported numeric literals:\n - decimal integers/floats, including leading-dot forms like `.5`\n - scientific notation like `1e10`, `2.5E-3`\n - hexadecimal `0x...`\n - binary `0b...`\n - octal `0o...`\n- Results must be finite; `Infinity` and `NaN` are rejected.\n- The renderer collapses long result lists using `PREVIEW_LIMITS.COLLAPSED_ITEMS` from `packages/coding-agent/src/tools/render-utils.ts`.\n\n## Errors\n- Invalid characters: e.g. `Invalid character \"x\" in expression`.\n- Malformed numbers: invalid prefixed literal, invalid exponent, invalid number.\n- Syntax errors: `Unexpected token in expression`, `Unexpected end of expression`, `Missing closing parenthesis`, `Expression is empty`.\n- Non-finite arithmetic: `Expression result is not a finite number`.\n- Any evaluation error aborts the whole batch; the tool does not return partial successes.\n\n## Notes\n- Despite the schema example showing `sqrt(16)`, the parser does not support functions, identifiers, units, or constants; only numeric literals, operators, and parentheses are accepted.\n- Precision is plain JavaScript `number` semantics throughout, including floating-point rounding behavior.\n- `/` and `%` use JavaScript numeric operators directly; there is no integer-only mode or unit handling.\n- Unary operators bind tighter than `*`/`/`/`%` but looser than exponentiation because unary parsing delegates to `#parsePower()`.\n",
67
67
  "tools/checkpoint.md": "# checkpoint\n\n> Mark the current top-level conversation state so later `rewind` can collapse exploratory context into a report.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/checkpoint.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/checkpoint.md`\n- Key collaborators:\n - `packages/coding-agent/src/session/agent-session.ts` — captures the active checkpoint after tool success.\n - `packages/coding-agent/src/session/session-manager.ts` — persists the normal session entry stream; not the active checkpoint marker.\n - `packages/coding-agent/src/tools/index.ts` — registers the tool and gates it behind `checkpoint.enabled`.\n - `packages/coding-agent/src/config/settings-schema.ts` — defines the disabled-by-default feature flag.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `goal` | `string` | Yes | Investigation goal. Required by the TypeBox schema and echoed in the tool result. |\n\n## Outputs\nThe tool returns a single text result plus structured details:\n\n- text body:\n - `Checkpoint created.`\n - `Goal: <goal>`\n - `Run your investigation, then call rewind with a concise report.`\n- `details`:\n - `goal: string`\n - `startedAt: string` — ISO timestamp created inside `CheckpointTool.execute()`\n\nNo checkpoint ID, artifact URI, job handle, file path, or restore token is returned.\n\n## Flow\n1. `CheckpointTool.createIf()` in `packages/coding-agent/src/tools/checkpoint.ts` returns `null` for subagents by checking `session.taskDepth`; only top-level sessions can see the tool.\n2. `CheckpointTool.execute()` rejects subagent calls again with `ToolError(\"Checkpoint not available in subagents.\")`.\n3. It rejects nested checkpoints with `ToolError(\"Checkpoint already active.\")` when `session.getCheckpointState?.()` is already set.\n4. It creates `startedAt = new Date().toISOString()` and returns a normal `toolResult()` payload. The tool itself does not persist anything.\n5. On the later `tool_execution_end` event, `AgentSession` in `packages/coding-agent/src/session/agent-session.ts` detects successful `checkpoint` execution and captures three in-memory fields:\n - `checkpointMessageCount` — current `agent.state.messages.length`, after the checkpoint tool result has already been appended\n - `checkpointEntryId` — `sessionManager.getEntries().at(-1)?.id ?? null`, i.e. the last persisted session entry ID at checkpoint time\n - `startedAt` — copied from tool details or regenerated\n6. `AgentSession` stores that object in its private `#checkpointState` field and clears `#pendingRewindReport`.\n\n## Side Effects\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Sets `AgentSession.#checkpointState` in memory.\n - Records the checkpoint boundary as a message count plus a session entry ID.\n - Enables the later yield guard: if a checkpoint is active and no rewind report is pending, `#enforceRewindBeforeYield()` injects a developer-role warning and schedules another turn.\n- User-visible prompts / interactive UI\n - The tool result tells the model to call `rewind` after the investigation.\n - If the agent tries to `yield` first, `AgentSession` injects:\n\n```text\n<system-warning>\nYou are in an active checkpoint. You MUST call rewind with your investigation findings before yielding. Do NOT yield without completing the checkpoint.\n</system-warning>\n```\n\n## Limits & Caps\n- Availability is gated by `checkpoint.enabled`, default `false`, in `packages/coding-agent/src/config/settings-schema.ts`.\n- The tool is registered as discoverable in `packages/coding-agent/src/tools/index.ts`.\n- Only one active checkpoint is allowed per top-level session.\n- Checkpoint state is not persisted as a dedicated session entry. If the process exits, a resumed session can reload the conversation history, but not the live `#checkpointState` guard.\n- Session persistence still applies to the ordinary checkpoint tool call message. Global session persistence truncation is `MAX_PERSIST_CHARS = 500_000` in `packages/coding-agent/src/session/session-manager.ts`.\n\n## Errors\n- `ToolError(\"Checkpoint not available in subagents.\")` — thrown for subagent sessions.\n- `ToolError(\"Checkpoint already active.\")` — thrown when a prior checkpoint has not been rewound or cleared.\n- The tool body has no local `try/catch`; unexpected exceptions propagate.\n\n## Notes\n- Despite the summary string `Create a git-based checkpoint to save and restore session state`, the implementation does not call git and does not snapshot filesystem state.\n- Captured state is conversation/session metadata only:\n - in-memory message count\n - session entry ID in the session tree\n - timestamp\n- Not captured:\n - working tree contents\n - staged changes\n - artifacts\n - blob-store contents\n - SQLite history rows from `packages/coding-agent/src/session/history-storage.ts`\n - auth or agent records from `packages/coding-agent/src/session/agent-storage.ts`\n- If the turn ends with `stopReason === \"aborted\"` while a checkpoint is active, `AgentSession` clears `#checkpointState` and `#pendingRewindReport` instead of preserving a half-finished checkpoint.\n",
68
68
  "tools/debug.md": "# debug\n\n> Drive one DAP debug session; adjacent debug UI code reuses the same subsystem for logs, raw SSE capture, reports, profiling, and system diagnostics.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/debug.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/debug.md`\n- Key collaborators:\n - `packages/coding-agent/src/dap/session.ts` — session lifecycle, breakpoint/state cache\n - `packages/coding-agent/src/dap/client.ts` — adapter process/socket transport, DAP message loop\n - `packages/coding-agent/src/dap/config.ts` — adapter resolution and auto-selection\n - `packages/coding-agent/src/dap/defaults.json` — built-in adapter definitions\n - `packages/coding-agent/src/dap/types.ts` — request/response/capability shapes\n - `packages/coding-agent/src/tools/tool-timeouts.ts` — per-tool timeout clamp\n - `packages/coding-agent/src/debug/index.ts` — interactive debug selector menu\n - `packages/coding-agent/src/debug/log-viewer.ts` — recent-log TUI viewer\n - `packages/coding-agent/src/debug/raw-sse.ts` — raw SSE TUI viewer\n - `packages/coding-agent/src/debug/raw-sse-buffer.ts` — bounded SSE capture buffer\n - `packages/coding-agent/src/debug/profiler.ts` — CPU/heap profiling helpers\n - `packages/coding-agent/src/debug/report-bundle.ts` — `.tar.gz` report bundling, log source, cache cleanup\n - `packages/coding-agent/src/debug/system-info.ts` — system snapshot collection and env redaction\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `action` | `\"launch\" \\| \"attach\" \\| \"set_breakpoint\" \\| \"remove_breakpoint\" \\| \"set_instruction_breakpoint\" \\| \"remove_instruction_breakpoint\" \\| \"data_breakpoint_info\" \\| \"set_data_breakpoint\" \\| \"remove_data_breakpoint\" \\| \"continue\" \\| \"step_over\" \\| \"step_in\" \\| \"step_out\" \\| \"pause\" \\| \"evaluate\" \\| \"stack_trace\" \\| \"threads\" \\| \"scopes\" \\| \"variables\" \\| \"disassemble\" \\| \"read_memory\" \\| \"write_memory\" \\| \"modules\" \\| \"loaded_sources\" \\| \"custom_request\" \\| \"output\" \\| \"terminate\" \\| \"sessions\"` | Yes | Dispatch key for the tool switch in `packages/coding-agent/src/tools/debug.ts`. |\n| `program` | `string` | No | Launch target path. Required for `launch`. Resolved relative to `cwd` if provided, otherwise session cwd. |\n| `args` | `string[]` | No | Program argv for `launch`. |\n| `adapter` | `string` | No | Explicit adapter name. Otherwise `selectLaunchAdapter()` / `selectAttachAdapter()` auto-pick from `packages/coding-agent/src/dap/config.ts`. |\n| `cwd` | `string` | No | Launch/attach working directory. Defaults to session cwd. |\n| `file` | `string` | No | Source file path for source breakpoints. |\n| `line` | `number` | No | Source line for source breakpoints. |\n| `function` | `string` | No | Function breakpoint name. Mutually exclusive with `file`+`line` in breakpoint actions. |\n| `name` | `string` | No | Data breakpoint info target name. Required for `data_breakpoint_info`. |\n| `condition` | `string` | No | Conditional expression for source/function/instruction/data breakpoints. |\n| `hit_condition` | `string` | No | Hit-count condition for instruction/data breakpoints. |\n| `expression` | `string` | No | Expression or raw debugger command. Required for `evaluate`. |\n| `context` | `string` | No | Evaluate context. Defaults to `\"repl\"`. Passed through as DAP evaluate context. |\n| `frame_id` | `number` | No | Frame selector for `evaluate`, `scopes`, `data_breakpoint_info`. `scopes` and `evaluate` default to the current stopped frame when omitted. |\n| `scope_id` | `number` | No | Variables reference from a scope. Accepted by `variables`; also used as a fallback variables reference for `data_breakpoint_info`. |\n| `variable_ref` | `number` | No | Variables reference for `variables`; preferred over `scope_id` when both are present. |\n| `pid` | `number` | No | Local process id for `attach`. `attach` requires `pid` or `port`. |\n| `port` | `number` | No | Remote attach port. If no adapter is forced, attach prefers `debugpy` when `port` is present. |\n| `host` | `string` | No | Remote attach host for `attach`. |\n| `levels` | `number` | No | Max stack frames for `stack_trace`. |\n| `memory_reference` | `string` | No | Memory reference/address for `disassemble`, `read_memory`, `write_memory`. `disassemble` also accepts it via `instruction_reference` fallback logic in `resolveDisassemblyReference()`. |\n| `instruction_reference` | `string` | No | Instruction breakpoint reference; required for instruction breakpoint actions. |\n| `instruction_count` | `number` | No | Required for `disassemble`. |\n| `instruction_offset` | `number` | No | Instruction offset for `disassemble`. |\n| `count` | `number` | No | Byte count for `read_memory`. Required there. |\n| `data` | `string` | No | Base64 payload for `write_memory`. Required there. |\n| `data_id` | `string` | No | Data breakpoint id. Required for `set_data_breakpoint` / `remove_data_breakpoint`. |\n| `access_type` | `\"read\" \\| \"write\" \\| \"readWrite\"` | No | Access filter for `set_data_breakpoint`. |\n| `command` | `string` | No | Custom DAP request command. Required for `custom_request`. |\n| `arguments` | `Record<string, unknown>` | No | Custom DAP request body for `custom_request`. |\n| `offset` | `number` | No | Offset for instruction breakpoints, disassembly, memory read, memory write. |\n| `resolve_symbols` | `boolean` | No | `disassemble` symbol-resolution flag. |\n| `allow_partial` | `boolean` | No | `write_memory` partial-write allowance. |\n| `start_module` | `number` | No | Modules pagination start index for `modules`. |\n| `module_count` | `number` | No | Modules pagination count for `modules`. |\n| `timeout` | `number` | No | Per-request timeout in seconds. Default `30`, clamped to `5..300`. |\n\n### Action-specific requirements\n- `launch`: `program`\n- `attach`: `pid` or `port`\n- `set_breakpoint` / `remove_breakpoint`: `function`, or `file` + `line`\n- `set_instruction_breakpoint` / `remove_instruction_breakpoint`: `instruction_reference`\n- `data_breakpoint_info`: `name`\n- `set_data_breakpoint` / `remove_data_breakpoint`: `data_id`\n- `evaluate`: `expression`\n- `variables`: `variable_ref` or `scope_id`\n- `disassemble`: capability `supportsDisassembleRequest`, plus `instruction_count`\n- `read_memory`: capability `supportsReadMemoryRequest`, plus `memory_reference` and `count`\n- `write_memory`: capability `supportsWriteMemoryRequest`, plus `memory_reference` and `data`\n- `modules`: capability `supportsModulesRequest`\n- `loaded_sources`: capability `supportsLoadedSourcesRequest`\n- `custom_request`: `command`\n\n### Interactive selector values\n`packages/coding-agent/src/debug/index.ts` also exposes a fixed UI-only selector with values `open-artifacts`, `performance`, `work`, `dump`, `memory`, `logs`, `system`, `raw-sse`, `transcript`, `clear-cache`. These are not model-callable through `debugSchema`; they are local TUI menu routes.\n\n## Outputs\nThe agent tool returns a standard `toolResult()` payload from `packages/coding-agent/src/tools/debug.ts`:\n- `content`: one text block. Every action renders human-readable text; there is no structured JSON block in `content`.\n- `details.action`: echoed action.\n- `details.success`: always initialized `true`; failures surface by throwing before a result is returned.\n- `details.snapshot`: present for actions that operate on or create a session, using `DapSessionSummary` from `packages/coding-agent/src/dap/types.ts`.\n- Action-specific `details` fields:\n - `launch` / `attach`: `adapter`\n - breakpoint actions: `breakpoints`, `functionBreakpoints`, `instructionBreakpoints`, `dataBreakpoints`\n - `data_breakpoint_info`: `dataBreakpointInfo`\n - `continue` / `step_*`: `state`, `timedOut`\n - `threads`: `threads`\n - `stack_trace`: `stackFrames`\n - `scopes`: `scopes`\n - `variables`: `variables`\n - `evaluate`: `evaluation`\n - `disassemble`: `disassembly`\n - `read_memory`: `memoryAddress`, `memoryData`, `unreadableBytes`\n - `write_memory`: `bytesWritten`\n - `modules`: `modules`\n - `loaded_sources`: `sources`\n - `custom_request`: `customBody`\n - `output`: `output`\n - `sessions`: `sessions`\n\nStreaming/UI behavior:\n- The tool renderer merges call and result (`mergeCallAndResult: true`) and renders inline.\n- `debug.ts` itself does not emit progress updates through `_onUpdate`; result delivery is single-shot.\n- The interactive selector is UI-driven instead of model-driven. It swaps TUI components, appends status lines to the chat pane, opens files in external viewers, or writes archives/temp files.\n\nSide-channel artifacts outside the model tool result:\n- `createReportBundle()` writes `omp-report-<timestamp>.tar.gz` under the reports dir and returns the filesystem path to the UI handler.\n- `#handleWorkReport()` writes `/tmp/work-profile-<Date.now()>.svg` before opening it.\n- `RawSseViewerComponent` and `DebugLogViewerComponent` can copy captured text to the clipboard.\n\n## Flow\n1. Tool registration is conditional: `DebugTool.createIf()` in `packages/coding-agent/src/tools/debug.ts` returns `null` unless `session.settings.get(\"debug.enabled\")` is true. `packages/coding-agent/src/tools/index.ts` wires the factory and rechecks the same setting in tool filtering.\n2. `DebugTool.execute()` clamps `params.timeout` through `clampTimeout(\"debug\", params.timeout)` and composes the caller `AbortSignal` with `AbortSignal.timeout(...)`.\n3. `launch` and `attach` resolve cwd/program paths, select an adapter in `packages/coding-agent/src/dap/config.ts`, then delegate to `dapSessionManager.launch()` / `.attach()`.\n4. `DapSessionManager.launch()` / `.attach()` enforce the single-session rule with `#ensureLaunchSlot()`, spawn the adapter through `DapClient.spawn()`, register listeners, send `initialize`, cache capabilities, start listening for an initial stop event before sending `launch`/`attach`, then complete the `initialized` → `configurationDone` handshake in `#completeConfigurationHandshake()`.\n5. `DapClient.spawn()` starts the adapter detached with `NON_INTERACTIVE_ENV`. Most adapters use stdio; socket-mode adapters (`dlv`) use `#spawnSocketUnix()` on Linux or `#spawnSocketClientAddr()` on macOS/other.\n6. `#registerSession()` in `packages/coding-agent/src/dap/session.ts` installs reverse-request handlers:\n - `runInTerminal`: spawns the requested debuggee command detached via `ptree.spawn()` and returns `{ processId }`\n - `startDebugging`: logs the child-session request and returns `{}`; it does not create nested sessions\n - events: `output`, `initialized`, `stopped`, `continued`, `exited`, `terminated` update cached session state\n7. Operational actions (`set_breakpoint`, `evaluate`, `threads`, `read_memory`, `custom_request`, and similar) call `dapSessionManager` methods. Most flow through `#sendRequestWithConfig()`, which first sends `configurationDone` when required, then sends the DAP request, then updates `lastUsedAt`.\n8. Breakpoint actions maintain local cached breakpoint sets in `DapSessionManager` and remap adapter responses back onto those cached records.\n9. `continue` and the three step actions clear cached stop state, subscribe for `stopped`/`terminated`/`exited` before sending the DAP request, then `#awaitStopOutcome()` either returns the new stopped location or reports that the program is still running after timeout.\n10. `pause` sends DAP `pause`, waits for a stopped event if needed, and reuses cached stop state if the program was already stopped.\n11. `stack_trace`, `scopes`, `variables`, and `evaluate` default to the current stopped thread/frame when the caller omits ids and cached state is available.\n12. `output` reads the in-memory output ring from `DapSessionManager.getOutput()`. `terminate` sends `terminate` when supported, always attempts `disconnect`, marks the session terminated, and disposes the client.\n13. `sessions` reads the manager’s current map and formats all summaries. Although the manager stores a map, only one active session can exist because new launch/attach calls are blocked until the active one is terminated or cleaned up.\n14. The interactive selector in `packages/coding-agent/src/debug/index.ts` builds a `SelectList` of fixed values and dispatches each to a handler:\n - `performance`: `startCpuProfile()`, wait for Enter/Escape, stop profiling, read a 30-second work profile with `getWorkProfile(30)`, then bundle via `createReportBundle()`\n - `work`: read `getWorkProfile(30)`, write a temp SVG, open it externally\n - `dump`: create a report bundle immediately\n - `memory`: force GC, call `Bun.generateHeapSnapshot(\"v8\")`, then bundle\n - `logs`: build a `DebugLogSource` and mount `DebugLogViewerComponent`\n - `raw-sse`: resolve a `RawSseDebugBuffer` from the session and mount `RawSseViewerComponent`\n - `system`: call `collectSystemInfo()` and render `formatSystemInfo()` into the chat pane\n - `open-artifacts`: open the current session artifact directory if it exists\n - `transcript`: delegates to `ctx.handleDebugTranscriptCommand()`\n - `clear-cache`: show confirmation, then remove artifact directories older than 30 days with `clearArtifactCache()`\n\n## Modes / Variants\n- **Availability gate**\n - Tool hidden when `debug.enabled` is false.\n- **Adapter selection**\n - `launch`: explicit `adapter` wins; otherwise `selectLaunchAdapter()` ranks available adapters by extension match, root-marker match, then native-debugger preference (`gdb`, `lldb-dap`) for extensionless binaries.\n - `attach`: explicit `adapter` wins; otherwise remote `port` prefers `debugpy`, then native debuggers, then first available adapter.\n- **Transport**\n - stdio adapters: direct `stdin`/`stdout` framing.\n - socket adapters: Unix domain socket on Linux; TCP callback on macOS/other.\n- **DAP agent-tool actions**\n - `launch` — spawn adapter, initialize session, maybe stop on entry; returns formatted session snapshot and `details.adapter`.\n - `attach` — connect to a live process or remote port; same output shape as `launch`.\n - `set_breakpoint` — source or function breakpoint add/update; returns the current breakpoint list for that target.\n - `remove_breakpoint` — source or function breakpoint removal; returns the remaining breakpoint list.\n - `set_instruction_breakpoint` / `remove_instruction_breakpoint` — require `supportsInstructionBreakpoints`; return current instruction breakpoint list.\n - `data_breakpoint_info` — require `supportsDataBreakpoints`; asks the adapter for a `dataId`, access types, and description for `name`.\n - `set_data_breakpoint` / `remove_data_breakpoint` — require `supportsDataBreakpoints`; return the cached data-breakpoint list.\n - `continue` / `step_over` / `step_in` / `step_out` — return text describing whether execution stopped, terminated, or kept running, plus `details.state` and `details.timedOut`.\n - `pause` — interrupts a running target and returns a stopped snapshot.\n - `evaluate` — adapter expression evaluation; defaults context to `repl`.\n - `stack_trace` — fetches frames for the resolved thread.\n - `threads` — fetches current threads.\n - `scopes` — frame scopes for an explicit `frame_id` or the current stopped frame.\n - `variables` — variables for `variable_ref` or `scope_id`.\n - `disassemble` — require `supportsDisassembleRequest`; disassembles around a memory reference.\n - `read_memory` — require `supportsReadMemoryRequest`; returns address, base64 data, unreadable-byte count.\n - `write_memory` — require `supportsWriteMemoryRequest`; writes base64 data and reports bytes written.\n - `modules` — require `supportsModulesRequest`; optional pagination via `start_module` / `module_count`.\n - `loaded_sources` — require `supportsLoadedSourcesRequest`; returns loaded source descriptors.\n - `custom_request` — sends any DAP request name with arbitrary arguments.\n - `output` — dumps captured stdout/stderr/console text from the session cache.\n - `terminate` — disconnects and disposes the active session; returns `No debug session to terminate.` when none exists.\n - `sessions` — lists all cached session summaries.\n- **Interactive selector routes (UI-only)**\n - `logs` — loads today’s log tail and optional older daily log files into `DebugLogViewerComponent`; supports copy, range selection, pid filtering, load-older.\n - `raw-sse` — live view over the session’s `RawSseDebugBuffer`; supports tail-follow, scrolling, copy-all.\n - `performance` — CPU profile + 30-second work profile + report bundle.\n - `memory` — heap snapshot + report bundle.\n - `dump` — report bundle without profiler artifacts.\n - `work` — standalone work-profile flamegraph export/open.\n - `system` — formatted OS/arch/CPU/memory/version/cwd/shell/terminal dump.\n - `open-artifacts` / `transcript` / `clear-cache` — artifact directory open, transcript export, artifact-cache pruning.\n\n## Side Effects\n- Filesystem\n - Resolves program/file/cwd paths against the session cwd.\n - Report creation writes `.tar.gz` bundles and may read the session JSONL, artifact files, subagent session JSONLs, and log files.\n - Work-profile export writes `/tmp/work-profile-<timestamp>.svg`.\n - Log source reads daily log files from the logs dir.\n - Artifact-cache cleanup removes session artifact directories older than the cutoff.\n - `resolveRawSseDebugBuffer()` may attach a non-enumerable `rawSseDebugBuffer` property to the owner object.\n- Network\n - Socket-mode adapters bind/connect local sockets.\n - Remote attach may connect through the adapter to a remote debug port.\n- Subprocesses / native bindings\n - Spawns debugger adapters (`gdb`, `lldb-dap`, `python -m debugpy.adapter`, `dlv`, and others from `defaults.json`) detached.\n - Reverse DAP `runInTerminal` requests spawn the debuggee detached via `ptree.spawn()`.\n - `getWorkProfile(30)` comes from `@oh-my-pi/pi-natives`.\n - CPU profiling uses `node:inspector/promises`; heap snapshots use `Bun.generateHeapSnapshot(\"v8\")`; raw/log viewers sanitize text via `@oh-my-pi/pi-natives`.\n - `openPath()` launches the OS default file/browser handler for artifact dirs and SVGs.\n - Log/raw-SSE viewers can call `copyToClipboard()`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - `DapSessionManager` keeps session summaries, breakpoints, threads, stack frames, stop location, output capture, capabilities, and last-used timestamps in memory.\n - Active-session id is global to the singleton `dapSessionManager`.\n - `RawSseDebugBuffer` stores recent SSE events per owner/session.\n - The tool is `exclusive`; concurrent debug tool calls are blocked by the scheduler.\n- User-visible prompts / interactive UI\n - Debug selector shows confirmation before cache deletion.\n - Performance profiling temporarily hijacks editor Enter/Escape handlers until profiling stops.\n - Log/raw-SSE viewers replace the editor pane with custom components.\n- Background work / cancellation\n - Every DAP request accepts an `AbortSignal`; timeouts and caller cancellation abort the active request, not the whole session lifetime.\n - `DapSessionManager` runs a background cleanup loop every 30 seconds.\n - Raw SSE viewers subscribe to buffer updates until closed.\n\n## Limits & Caps\n- Tool timeout clamp: `default=30`, `min=5`, `max=300` in `packages/coding-agent/src/tools/tool-timeouts.ts`.\n- Per-request DAP default timeout: `DEFAULT_REQUEST_TIMEOUT_MS = 30_000` in `packages/coding-agent/src/dap/client.ts`.\n- Single active session: enforced by `#ensureLaunchSlot()` in `packages/coding-agent/src/dap/session.ts`.\n- Idle session cleanup: `IDLE_TIMEOUT_MS = 10 * 60 * 1000`, checked every `CLEANUP_INTERVAL_MS = 30 * 1000`.\n- Adapter liveness heartbeat: `HEARTBEAT_INTERVAL_MS = 5 * 1000`.\n- Output capture cap: `MAX_OUTPUT_BYTES = 128 * 1024`; older text is trimmed in ~1 KiB slices and `outputTruncated` is recorded.\n- Initial stop capture timeout after launch/attach: `STOP_CAPTURE_TIMEOUT_MS = 5_000`.\n- Socket-mode adapter readiness timeout: `10_000` ms in `waitForCondition()` and TCP connect timeout logic in `packages/coding-agent/src/dap/client.ts`.\n- Raw SSE buffer caps in `packages/coding-agent/src/debug/raw-sse-buffer.ts`:\n - `MAX_RAW_SSE_EVENTS = 1_000`\n - `MAX_RAW_SSE_CHARS = 512_000`\n - `MAX_RAW_SSE_EVENT_CHARS = 64_000` per event, with `: omp-debug-truncated ...` marker appended on trim\n- Log viewer window in `packages/coding-agent/src/debug/log-viewer.ts`:\n - `INITIAL_LOG_CHUNK = 50`\n - `LOAD_OLDER_CHUNK = 50`\n- Report/log ingestion caps in `packages/coding-agent/src/debug/report-bundle.ts`:\n - `MAX_LOG_LINES = 5000` for interactive log reading\n - `MAX_LOG_BYTES = 2 * 1024 * 1024` tail-read ceiling\n - report bundles include only the last `1000` log lines\n - subagent session inclusion is capped at the most recent `10` JSONL files\n- Interactive profiling windows in `packages/coding-agent/src/debug/index.ts`: both performance and work reports request `getWorkProfile(30)`.\n- Artifact cache pruning default: `30` days in `clearArtifactCache()` and the selector confirmation text.\n\n## Errors\n- Parameter validation in `packages/coding-agent/src/tools/debug.ts` throws `ToolError` with explicit messages such as:\n - `program is required for launch`\n - `attach requires pid or port`\n - `set_breakpoint requires file+line or function`\n - `variables requires variable_ref or scope_id`\n - `memory_reference is required for read_memory`\n - `count is required for read_memory`\n - `data is required for write_memory`\n - `command is required for custom_request`\n- Adapter selection failure throws `No debugger adapter available. Installed adapters: ...`.\n- Capability-gated actions throw from `requireCapability(...)`, e.g. `Active adapter does not support memory reads.`\n- No-session and state errors come from `DapSessionManager`, e.g. `No active debug session. Launch or attach first.`, `No active stack frame. Run stack_trace first or supply frame_id.`, `Debugger reported no threads.`\n- Launching a second live session throws `Debug session <id> is still active. Terminate it before launching another.`\n- DAP transport/request failures surface as thrown errors from `DapClient`:\n - `DAP request <command> timed out after <ms>ms`\n - `DAP event <event> timed out after <ms>ms`\n - `DAP adapter <name> is not running`\n - `DAP adapter exited (code N): <stderr>` or `DAP adapter exited unexpectedly (code N)`\n - adapter response `message` when a DAP request fails\n- `continue` / `step_*` are intentionally non-fatal when the target stays running past the timeout: they return `details.timedOut = true` and `state: \"running\"` instead of throwing.\n- `terminate` suppresses adapter errors while sending `terminate`/`disconnect`; it still disposes the client and returns the last summary when possible.\n- Interactive selector handlers report UI errors instead of throwing:\n - profiler start/stop, report bundling, log reading, system-info collection, cache clearing, and artifact opening use `ctx.showError(...)` / `ctx.showWarning(...)`\n - empty logs and empty artifact caches are warnings/status messages, not failures\n - copy failures in log/raw-SSE viewers become status/error text in the UI\n- Report-bundle helpers are intentionally best-effort for many file reads: missing session files, missing artifact dirs, unreadable artifact files, missing log dirs, inaccessible cache dirs, and missing subagent files are skipped silently.\n- `collectSystemInfo()` is best-effort for CPU probing; failure there falls back to `Unknown CPU`.\n\n## Notes\n- `packages/coding-agent/src/prompts/tools/debug.md` tells the model only one active session is supported; that is not advisory, it is enforced in code.\n- `configurationDone` is sent automatically both during launch/attach handshake and lazily before later requests if the adapter required it and the initial handshake did not complete.\n- `startDebugging` reverse requests are acknowledged but not implemented; child debug sessions are not spawned.\n- `output` exposes the merged `output` event stream only; the tool does not distinguish stdout, stderr, and console categories.\n- Session summaries expose `needsConfigurationDone`; this is derived from adapter capabilities and whether `configurationDone` has been sent.\n- Source breakpoint file paths are normalized with `path.resolve()` before caching and sending to the adapter.\n- `evaluate` defaults to `repl`, so the tool can forward raw debugger commands when the adapter supports them.\n- `disassemble` resolves its target from `memory_reference` first, then `instruction_reference`; it throws if neither is present.\n- `RawSseDebugBuffer.recordEvent()` increments `totalEvents` before bounded retention. A snapshot can therefore show fewer retained records than total observed events.\n- Raw SSE buffer listener failures are swallowed so viewer bugs do not break capture.\n- `createDebugLogSource()` walks daily log files newest-first, but `loadOlderLogs()` reverses each requested slice before concatenation so older chunks prepend in chronological order.\n- `clearArtifactCache()` deletes directories by directory mtime, not per-file age.\n- `addDirectoryToArchive()` reads artifact files as text with `Bun.file(...).text()`. Binary artifact contents are not preserved byte-for-byte in the report bundle.\n- The tool renderer truncates displayed output for the TUI preview, but the underlying text result still contains the full returned string.\n",
69
69
  "tools/edit.md": "# edit\n\n> Applies source edits; default mode is the hashline patch language consumed from a single `input` string.\n\n## Source\n- Entry: `packages/coding-agent/src/edit/index.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/hashline.md`\n- Key collaborators:\n - `packages/coding-agent/src/utils/edit-mode.ts` — selects active edit mode\n - `packages/coding-agent/src/hashline/grammar.lark` — custom-tool grammar for hashline mode\n - `packages/coding-agent/src/hashline/input.ts` — splits `@@ PATH` sections (legacy single-`@` headers are still accepted)\n - `packages/coding-agent/src/hashline/parser.ts` — parses ops and payload lines\n - `packages/coding-agent/src/hashline/apply.ts` — validates anchors and applies edits\n - `packages/coding-agent/src/hashline/anchors.ts` — stale-anchor mismatch formatting\n - `packages/coding-agent/src/hashline/recovery.ts` — cache-based stale-anchor recovery\n - `packages/coding-agent/src/hashline/hash.ts` — computes `LINEhh|` anchors shared with `read`/`search`\n - `packages/coding-agent/src/edit/file-read-cache.ts` — per-session read snapshot cache\n - `packages/coding-agent/src/tools/read.ts` — emits anchored lines and records read snapshots\n - `packages/coding-agent/src/tools/search.ts` — records sparse snapshots from matches/context\n - `packages/coding-agent/src/tools/fs-cache-invalidation.ts` — invalidates FS scan caches after writes\n - `packages/coding-agent/src/edit/streaming.ts` — computes in-flight diff previews for the TUI\n\n## Inputs\n\n### Hashline mode (default)\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `input` | `string` | Yes | One or more edit sections. First non-blank line must be `@@ PATH` (legacy single-`@` is still accepted) unless the caller supplies the legacy fallback `path` outside the model schema and the body already looks like hashline ops (`packages/coding-agent/src/hashline/input.ts`). Optional `*** Begin Patch` / `*** End Patch` envelope is ignored if present. |\n\nPatch language inside `input`:\n\n- Section header: `@@ PATH`\n- Insert after: `+ ANCHOR`\n- Insert before: `< ANCHOR`\n- Delete range: `- A..B`\n- Replace range: `= A..B`\n- Payload line: `~TEXT` by default; separator is `HL_EDIT_SEP` and can be overridden once at process start by `PI_HL_SEP` (`packages/coding-agent/src/hashline/hash.ts`)\n- Special anchors: `BOF`, `EOF`\n- Anchor token: `<line><2-char-hash>`, for example `41th`\n\nAnchors come from `read`/`search` output. `read` formats lines as `LINEhh|TEXT` via `formatHashLine` / `formatHashLines` in `packages/coding-agent/src/hashline/hash.ts`; copy only the token left of `|` into op lines.\n\nOther edit modes exist (`replace`, `patch`, `vim`, `apply_patch`) and are selected outside the tool payload by `resolveEditMode()` in `packages/coding-agent/src/utils/edit-mode.ts`. Their schemas are different; this document covers the default hashline mode.\n\n## Outputs\n- Single-shot tool result; hashline mode does not use a `resolve` preview/apply handshake.\n- `content` contains one text block per call. For a successful single-file edit it is either:\n - `<path>:` plus a compact diff preview from `packages/coding-agent/src/hashline/diff-preview.ts`, or\n - `Updated <path>` / `Created <path>` when no compact preview text is emitted.\n- Parse or recovery warnings are appended as:\n\n```text\nWarnings:\n...\n```\n\n- `details` is `EditToolDetails` from `packages/coding-agent/src/edit/renderer.ts`:\n - `diff`: unified diff string\n - `firstChangedLine`: first changed post-edit line\n - `diagnostics`: LSP/format result if available\n - `op`: `\"create\"` or `\"update\"` for hashline mode\n - `meta`: output metadata\n - `perFileResults`: present for multi-section input\n- Multi-section input returns one aggregated result with combined text and per-file details.\n- While the model is still typing arguments, the TUI can compute a diff preview with `packages/coding-agent/src/edit/streaming.ts`; that preview is not a deferred action and does not block execution.\n\n## Flow\n1. `EditTool.execute()` in `packages/coding-agent/src/edit/index.ts` resolves the active mode. Default is `hashline`; `customFormat` exposes `packages/coding-agent/src/hashline/grammar.lark` with `$HFMT$` / `$HSEP$` placeholders filled from `packages/coding-agent/src/hashline/hash.ts`.\n2. `executeHashlineSingle()` in `packages/coding-agent/src/hashline/execute.ts` splits the raw `input` into `@PATH` sections with `splitHashlineInputs()`.\n3. If multiple sections target the same path, `mergeSamePathSections()` concatenates them before execution so every op still refers to the original file snapshot.\n4. Multi-section calls run a preflight pass (`preflightHashlineSection()`): parse ops, enforce plan-mode write rules, load the current file, reject anchor-scoped edits against missing files, reject auto-generated files, apply edits in memory, and fail if the result is a no-op. This prevents partial batches.\n5. `parseHashlineWithWarnings()` in `packages/coding-agent/src/hashline/parser.ts` tokenizes the diff body:\n - ignores blank lines and optional `*** Begin Patch`\n - stops at `*** End Patch`\n - stops at `*** Abort` and emits `ABORT_WARNING`\n - turns `+` / `<` payload runs into one `insert` edit per payload line\n - turns `- A..B` into one `delete` edit per line in the range\n - turns `= A..B` into inserts before `A`, then deletes for `A..B`; no payload means replace with a single empty line\n6. `applyHashlineEdits()` in `packages/coding-agent/src/hashline/apply.ts` validates every referenced anchor before mutating anything. Each anchor hash is recomputed from current file content with `computeLineHash()`.\n7. If any anchor hash differs, `applyHashlineEdits()` throws `HashlineMismatchError`. `execute.ts` catches only that class and calls `tryRecoverHashlineWithCache()`.\n8. Recovery replays the edits against the most recent cached read/search snapshot for that path (`packages/coding-agent/src/edit/file-read-cache.ts`), then 3-way merges the result onto current disk content using `Diff.applyPatch(..., { fuzzFactor: 3 })` in `packages/coding-agent/src/hashline/recovery.ts`. On success the edit proceeds with a warning; on failure the original mismatch error is re-thrown.\n9. Before splicing lines, `absorbReplacementBoundaryDuplicates()` normalizes some malformed-but-recoverable ranges:\n - duplicate prefix/suffix lines adjacent to a replacement can be absorbed by widening the delete range\n - pure inserts can auto-drop duplicated leading/trailing payload lines when `edit.hashlineAutoDropPureInsertDuplicates` is enabled\n - all such fixes append warnings\n10. `after_anchor` inserts are normalized to `before_anchor` of the next line, or `EOF` if the anchor was the last line.\n11. Anchor-targeted edits are bucketed by target line and applied bottom-up so earlier splices do not invalidate later original line numbers. `BOF` and `EOF` inserts are applied after that.\n12. The edited text is restored to the original BOM and line ending style with helpers from `packages/coding-agent/src/edit/normalize.ts` and persisted via `serializeEditFileText()` in `packages/coding-agent/src/edit/read-file.ts`.\n13. The writethrough callback from `createLspWritethrough()` may format the file and fetch diagnostics. Late diagnostics are queued back into session state as a hidden deferred message by `EditTool.#injectLateDiagnostics()` in `packages/coding-agent/src/edit/index.ts`.\n14. `invalidateFsScanAfterWrite()` calls `invalidateFsScanCache(path)` so filesystem-backed tools do not serve stale scan results.\n15. The session file-read cache is refreshed with the post-edit file text via `recordContiguous()`, making the just-written content the new recovery base for subsequent stale-anchor merges.\n16. The final response is built from a unified diff (`generateDiffString()`), a compact preview, and any accumulated warnings.\n\n## Modes / Variants\n- `hashline` — default mode; line-anchored patch language described here (`packages/coding-agent/src/utils/edit-mode.ts`).\n- `replace` — exact/fuzzy old/new text replacement (`packages/coding-agent/src/edit/modes/replace.ts`).\n- `patch` — structured JSON diff-hunk mode (`packages/coding-agent/src/edit/modes/patch.ts`).\n- `apply_patch` — freeform Codex-style `*** Begin Patch` envelope, internally expanded into patch-mode entries (`packages/coding-agent/src/edit/modes/apply-patch.ts`).\n- `vim` — persistent modal editing buffer (`packages/coding-agent/src/tools/vim.ts`).\n\nHashline op examples:\n\n```text\n@@ src/a.ts\n+ 4fb\n~const added = true;\n```\n\n```text\n@@ src/a.ts\n< 4fb\n~const addedBefore = true;\n```\n\n```text\n@@ src/a.ts\n- 4fb..6qx\n```\n\n```text\n@@ src/a.ts\n= 4fb..5dm\n~const clean = (name || DEF).trim();\n~return clean.length === 0 ? DEF : clean.toUpperCase();\n```\n\nBOF/EOF examples:\n\n```text\n@@ src/a.ts\n+ BOF\n~const HEADER = true;\n```\n\n```text\n@@ src/a.ts\n+ EOF\n~export const done = true;\n```\n\n## Side Effects\n- Filesystem\n - Reads target files with `readEditFileText()`.\n - Writes full updated file contents with `serializeEditFileText()`.\n - Preserves BOM and original line-ending style.\n- Subprocesses / native bindings\n - `createLspWritethrough()` may trigger formatter / diagnostics work through the LSP subsystem.\n - `invalidateFsScanAfterWrite()` calls native `invalidateFsScanCache()` from `@oh-my-pi/pi-natives`.\n- Session state\n - Reads and updates the per-session `FileReadCache` used for stale-anchor recovery.\n - Stores pending deferred-diagnostics abort controllers per path inside `EditTool`.\n - Queues late diagnostics back into the session transcript as a hidden custom message.\n- Background work / cancellation\n - A new edit to the same path aborts the prior deferred diagnostics fetch for that path (`packages/coding-agent/src/edit/index.ts`).\n - The tool itself is marked `nonAbortable = true` and `concurrency = \"exclusive\"` in `packages/coding-agent/src/edit/index.ts`.\n\n## Limits & Caps\n- Default mode is `hashline` (`DEFAULT_EDIT_MODE`) in `packages/coding-agent/src/utils/edit-mode.ts`.\n- Anchor hashes are always 2 lowercase letters from a stable 647-entry bigram table (`HL_BIGRAMS_COUNT`) in `packages/coding-agent/src/hashline/hash.ts`.\n- The visible mismatch report shows 2 lines of context on each side (`MISMATCH_CONTEXT`) in `packages/coding-agent/src/hashline/constants.ts`.\n- Stale-anchor recovery uses `fuzzFactor: 3` (`HASHLINE_RECOVERY_FUZZ_FACTOR`) in `packages/coding-agent/src/hashline/recovery.ts`.\n- The per-session read cache keeps at most 30 paths (`MAX_PATHS_PER_SESSION`) in `packages/coding-agent/src/edit/file-read-cache.ts`.\n- Hashline streaming chunk defaults are 200 lines or 64 KiB per chunk (`packages/coding-agent/src/hashline/types.ts`, consumed by `packages/coding-agent/src/hashline/stream.ts`).\n- `HL_EDIT_SEP` defaults to `~`; `HL_BODY_SEP` is always `|` (`packages/coding-agent/src/hashline/hash.ts`).\n\n## Errors\n- Missing section header:\n - `input must begin with \"@@ PATH\" on the first non-blank line; got: ... Example: \"@@ src/foo.ts\" then edit ops.`\n- Empty header:\n - `Input header \"@\" is empty; provide a file path.`\n- Bad anchor token:\n - `line N: expected a full anchor such as \"119sr\"; got \"...\".`\n- Bad range syntax:\n - `line N: explicit ranges are required for delete/replace...`\n - `line N: range must include exactly two full anchors separated by \"..\".`\n - `line N: range A..B ends before it starts.`\n - `line N: range A..B uses two different hashes for the same line.`\n- Missing payload for `+` / `<`:\n - `line N: + and < operations require at least one ~TEXT payload line.`\n- Stray payload line:\n - `line N: payload line has no preceding +, <, or = operation.`\n- Unknown op:\n - `line N: unrecognized op. Use < ANCHOR..., + ANCHOR..., - A..B..., = A..B...`\n- Missing file for anchor-scoped edits:\n - `File not found: <path>`\n- Out-of-range anchor:\n - `Line N does not exist (file has M lines)`\n- Stale anchors throw `HashlineMismatchError`. The error message contains re-read guidance and reprints nearby current file lines as `LINEhh|TEXT`; mismatched lines are marked `*`. `displayMessage` renders the same information in a code-frame style.\n- No-op edit:\n - `Edits to <path> resulted in no changes being made.`\n- Recovery failure is silent internally: if cache-based merge cannot prove a valid result, the original mismatch error is surfaced unchanged.\n\n## Notes\n- `read` and `search` are the authoritative source of anchors. The edit parser does not want the trailing `|TEXT`; copy only the `LINEhh` token.\n- Multi-op patches are parsed against the original file snapshot. Do not renumber later anchors after earlier ops; `applyHashlineEdits()` buckets and applies them bottom-up.\n- `= A..B` is not a primitive replace in the parser. It expands to inserts before `A` plus deletes for `A..B`, which is why stale-anchor checking still happens on the original range lines.\n- Interior lines of a multi-line range use hash `**` (`RANGE_INTERIOR_HASH`) and are not individually verified; only the first and last anchor hashes are checked.\n- `computeLineHash()` trims trailing whitespace before hashing. Anchors survive line-ending changes and trailing-space-only changes, but not substantive line edits.\n- For punctuation-only lines, the hash mixes in the line number; identical `}` lines on different lines intentionally get different anchors.\n- `splitHashlineInputs()` normalizes absolute `@@ PATH` headers back to a cwd-relative path when the file is inside the current working tree. Headers with any run of leading `@` chars (e.g. `@ foo.ts`, `@@ foo.ts`, `@@@foo.ts`) are accepted to absorb unified-diff-style drift; the canonical form is `@@ PATH`.\n- Optional `*** Begin Patch` / `*** End Patch` markers are accepted in hashline mode, but the file sections are still `@@ PATH`-based, not Codex `*** Update File:` hunks.\n- `*** Abort` terminates parsing early and returns `ABORT_WARNING`; ops parsed before the marker still apply.\n- File-read cache invalidation is conflict-based, not write-through invalidation. If `read` later records content for a line that disagrees with the cached snapshot, the entire snapshot for that path is replaced with the newly observed lines (`packages/coding-agent/src/edit/file-read-cache.ts`).\n- There is no resolve-style apply/discard phase for hashline edits. The only preview path is the transient TUI diff preview in `packages/coding-agent/src/edit/streaming.ts`.\n",
70
- "tools/eval.md": "# eval\n\n> Execute Python or JavaScript code in persistent cell-based runtimes.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/eval.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/eval.md`\n- Key collaborators:\n - `packages/coding-agent/src/eval/parse.ts` — lenient cell parser\n - `packages/coding-agent/src/eval/sniff.ts` — language sniffing heuristics\n - `packages/coding-agent/src/eval/backend.ts` — backend execution contract\n - `packages/coding-agent/src/eval/js/index.ts` — JS backend adapter\n - `packages/coding-agent/src/eval/js/executor.ts` — JS execution + output sink\n - `packages/coding-agent/src/eval/js/context-manager.ts` — persistent VM contexts, prelude, tool bridge\n - `packages/coding-agent/src/eval/js/prelude.txt` — JS global helpers\n - `packages/coding-agent/src/eval/py/index.ts` — Python backend adapter\n - `packages/coding-agent/src/eval/py/executor.ts` — kernel session retention, reset, cleanup\n - `packages/coding-agent/src/eval/py/kernel.ts` — Jupyter gateway/kernel protocol, display capture\n - `packages/coding-agent/src/eval/py/prelude.py` — Python helper functions and status events\n - `packages/coding-agent/src/session/streaming-output.ts` — truncation, artifacts, streamed chunks\n - `docs/python-repl.md` — Python kernel/gateway internals\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `input` | `string` | Yes | Cell program text. Parsed by `parseEvalInput()` in `packages/coding-agent/src/eval/parse.ts`, not by JSON subfields. |\n\n`input` syntax accepted at runtime:\n\n- Cell header: `*** Cell <attrs...>`. Attributes are space-separated tokens with quoted titles (`\"...\"` or `'...'`).\n- Canonical tokens (advertised in the prompt):\n - `<lang>:\"<title>\"` — language + title shorthand. `lang` is `py` or `js` (lenient: also `ts`, plus the long-form aliases `python`, `javascript`, `typescript`, `ipy`, `ipython`).\n - `t:<n>[ms|s|m]` — per-cell timeout (default 30s).\n - `rst` — wipe this cell's language kernel before running.\n- Lenient additional tokens (accepted by the parser, not advertised):\n - bare language token (`py`, `js`)\n - `id:\"...\"` / `title:\"...\"` / `name:\"...\"` / `cell:\"...\"` / `file:\"...\"` / `label:\"...\"` — title aliases\n - `timeout:` / `duration:` / `time:` — `t:` aliases\n - `reset` — `rst` alias\n - `rst:true|false|1|0|yes|no|on|off` — explicit boolean form\n - a bare positional duration token (`30s`, `2m`, `500ms`)\n - any unclassified bare token folds into a positional title fragment\n- Cell body: every following line until the next `*** Cell ...`, the optional `*** End`, or `*** Abort`. `*** End` is a quirk fix for GPT-trained models that emit terminators and is not documented in the prompt.\n\nLeniencies in `packages/coding-agent/src/eval/parse.ts`:\n\n- Markers accept two or more leading `*` and flexible whitespace.\n- `*** End` is optional everywhere; the parser silently consumes trailing tokens (e.g. `*** End py`).\n- Missing terminators between adjacent cells are tolerated; the next `*** Cell` closes the prior cell, and stray non-marker lines between cells fold into the prior cell's body without crashing.\n- Bare code or a single markdown fence such as ```` ```py ```` is treated as one implicit cell.\n- If `*** Abort` appears, the in-progress cell is dropped and the result carries an abort warning. To preserve a completed cell before `*** Abort`, emit `*** End` first.\n\nThe tool also exposes a custom Lark grammar from `packages/coding-agent/src/eval/eval.lark` for constrained sampling. That grammar is stricter than the runtime parser: it requires the canonical `*** Cell <lang>:\"title\"` header form with a fixed attribute order, advertises only `py` / `js`, and pins the trailing `*** End` so GPT-trained models' natural terminator habit aligns with the constrained output.\n\n## Outputs\n\nFinal result from `EvalTool.execute()` is single-shot, but `onUpdate` streams partial text and `details` while cells run.\n\nReturned shape:\n\n- `content`: one text block containing combined cell output, or `(no text output)` / `(no output)` when only rich outputs exist.\n- `details` (`EvalToolDetails` from `packages/coding-agent/src/eval/types.ts`):\n - `cells`: per-cell code, status (`pending`/`running`/`complete`/`error`), output, duration, exit code, status events, markdown flag\n - `language`: first backend used\n - `languages`: distinct backends used, in first-use order\n - `jsonOutputs`: structured values emitted via `display(...)`\n - `images`: image payloads emitted by Python rich display or JS `display({ type: \"image\", ... })`\n - `statusEvents`: aggregated helper/tool status events\n - `notice`: backend fallback notice\n - `meta`: truncation metadata\n - `isError`: set on cell failure or cancellation\n\nRenderer behavior in `packages/coding-agent/src/tools/eval.ts`:\n\n- call preview renders parsed code cells with syntax highlighting\n- result view renders each cell separately, including status, duration, and output\n- markdown outputs are rendered with the Markdown component instead of plain text\n- `jsonOutputs` render as a tree, collapsed or expanded depending on UI state\n- timeout / fallback / truncation notices render as dim metadata lines\n- images are carried in `details.images`; generic tool UI image handling renders them outside the text block\n\nSide-channel artifacts:\n\n- `session.allocateOutputArtifact?.(\"eval\")` may allocate an `artifact://...` backing store for spilled output.\n- Truncated output metadata points at that artifact when available.\n\n## Flow\n\n1. `EvalTool.execute()` in `packages/coding-agent/src/tools/eval.ts` parses `params.input` with `parseEvalInput()`.\n2. `parseEvalInput()` normalizes newlines, collects cells, parses attributes, and assigns each cell a language from the header, language sniffing, or the default `python`.\n3. Back in `execute()`, each parsed cell is resolved to a backend with `resolveBackend()`:\n - explicit `python`/`js` requests are validated against session settings and backend availability\n - otherwise `sniffEvalLanguage()` in `packages/coding-agent/src/eval/sniff.ts` tries shebangs and language markers\n - if no explicit language was present, later cells prefer the previous runtime language before re-sniffing\n - Python is preferred when available; JS is the fallback when Python is unavailable or disabled\n4. The tool allocates an `OutputSink`, a `TailBuffer`, per-cell result objects, and a `sessionAbortController`. `session.trackEvalExecution?.(...)` can wrap the whole run for external cancellation tracking.\n5. Cells execute sequentially. For each cell, `execute()`:\n - clamps the cell timeout through `clampTimeout(\"eval\", ...)`\n - builds a combined abort signal from the tool signal, the timeout, and the session abort controller\n - marks the cell `running` and emits an update\n - calls the backend’s `execute()` with `cwd`, `sessionId`, `sessionFile`, `kernelOwnerId`, `deadlineMs`, `reset`, artifact info, and chunk callback\n6. JS cells dispatch through `packages/coding-agent/src/eval/js/index.ts` into `executeJs()`; Python cells dispatch through `packages/coding-agent/src/eval/py/index.ts` into `executePython()`.\n7. Backend text chunks stream into the shared `OutputSink`; rich outputs are accumulated separately as JSON, images, markdown markers, and status events.\n8. After each cell:\n - text output is trimmed and stored on that cell result\n - multi-cell runs prefix text with `[i/n]` and the optional title\n - cancellations return early with `isError: true` and a cell-specific abort message\n - non-zero exit codes return early with `isError: true` and a message naming the failed cell\n - later cells are skipped after the first error, but earlier cell state persists in the underlying runtime\n9. On success, the tool joins all cell outputs, synthesizes `(no text output)` or `(no output)` when needed, and attaches truncation metadata from `summarizeFinal()`.\n10. The renderer uses `details.cells`, `details.jsonOutputs`, and `details.statusEvents` to build notebook-style output. `mergeCallAndResult = true` and `inline = true`, so call and result render together in the transcript.\n\n## Modes / Variants\n\n### Parsing modes\n\n- Explicit multi-cell format with `*** Cell ...` headers\n- Implicit single-cell fallback for bare code or a single fenced block\n- Abort-recovery parse path when `*** Abort` is present\n\n### Backend selection\n\n- Explicit Python backend\n- Explicit JavaScript backend\n- Auto-detected backend via `sniffEvalLanguage()`\n- Fallback from requested/inferred Python to JS when Python is unavailable\n- Fallback notice when JS markers are seen but `eval.js` is disabled and Python is used instead\n\n### JavaScript runtime\n\nImplemented in `packages/coding-agent/src/eval/js/context-manager.ts` and `packages/coding-agent/src/eval/js/prelude.txt`.\n\n- Persistent `vm.Context` instances keyed by `js:${sessionId}` in `vmContexts`\n- `rst` calls `resetVmContext(sessionKey)` before the cell executes\n- Top-level `await` and bare `return` are supported by wrapping code in an async IIFE when `wrapCode()` sees `await` or `return`\n- Top-level static `import ... from ...` is rewritten to `await import(...)` by `rewriteStaticImports()`\n- The prelude installs globals:\n - `display`, `print`\n - `read`, `write`, `append`, `sort`, `uniq`, `counter`, `diff`, `tree`, `env`, `output`\n - `tool.<name>(args)` proxy for arbitrary session tool calls\n- JS helpers are async because they cross the VM/tool boundary\n- `display(value)` behavior:\n - plain objects/arrays become JSON outputs\n - `{ type: \"image\", data, mimeType }` becomes an image output\n - scalars become text\n- The VM exposes a restricted `process` subset plus `Buffer`, `fetch`, `Blob`, `File`, `Headers`, `Request`, `Response`, `fs`, `require`, and browser-style globals\n- Per-session VM runs are serialized with `runQueued()`\n\n### Python runtime\n\nImplemented in `packages/coding-agent/src/eval/py/executor.ts`, `packages/coding-agent/src/eval/py/kernel.ts`, and `packages/coding-agent/src/eval/py/prelude.py`. See `docs/python-repl.md` for gateway and kernel details.\n\n- Default mode is retained `session` kernels keyed by `python:${sessionId}`\n- Optional `python.kernelMode = \"per-call\"` creates a fresh kernel for each cell and shuts it down afterward\n- `rst` disposes the retained kernel for that session before the cell runs; later Python cells in the same tool call reuse the fresh kernel\n- Startup path:\n - availability check\n - create/connect kernel\n - initialize cwd / env / `sys.path`\n - execute `PYTHON_PRELUDE`\n- Python cells run inside IPython/Jupyter, so top-level `await` works; the prompt warns not to use `asyncio.run(...)`\n- The Python prelude defines synchronous helpers with the same surface as JS (except `tool.<name>` exists only in JS)\n- `display(value)` wraps dict/list/tuple values in `IPython.display.JSON`; rich display MIME bundles are preserved\n- Kernel `display_data` / `execute_result` messages map to:\n - `application/x-omp-status` → status event\n - `image/png` → image output\n - `application/json` → JSON output\n - `text/markdown` → markdown output\n - `text/plain` → text output\n - `text/html` → HTML converted to markdown with `htmlToBasicMarkdown()`\n- Interactive stdin is rejected: `input_request` sends an empty reply, marks `stdinRequested`, and the executor returns exit code `1`\n\n### Multi-language call behavior\n\nA single tool call can mix Python and JS cells. Persistence is per language runtime:\n\n- resetting Python does not touch JS state\n- resetting JS does not touch Python state\n- each backend keeps its own retained session keyed from the same session-derived ID\n\n## Side Effects\n\n- Filesystem\n - JS/Python prelude helpers can read, write, append, diff, and traverse files under the session cwd or absolute paths.\n - Output may spill to an artifact file via `OutputSink`.\n- Network\n - Python backend speaks NDJSON to a local `python3` subprocess over stdin/stdout (no network).\n - JS runtime exposes `fetch` and `tool.<name>()`; those tools may perform additional network I/O.\n- Subprocesses / native bindings\n - Python availability check runs `<python> -c ...`.\n - Python backend spawns one `python -u runner.py` subprocess per kernel; cancellation sends `SIGINT`. Details in `docs/python-repl.md`.\n- Session state\n - `session.assertEvalExecutionAllowed?.()` can block execution.\n - `session.trackEvalExecution?.(...)` can register cancellable eval work.\n - `session.getSessionFile?.()` and `session.getEvalKernelOwnerId?.()` influence kernel reuse and artifact lookup.\n - JS VM contexts persist in `vmContexts` across eval calls until reset/disposal.\n - Python retained kernels persist in `kernelSessions` until reset, eviction, idle cleanup, or owner cleanup.\n- User-visible prompts / interactive UI\n - none; stdin requests are rejected programmatically\n- Background work / cancellation\n - Python retained kernels have heartbeat and idle cleanup timers.\n - Cancellation interrupts a running Python kernel and aborts JS promise waits.\n\n## Limits & Caps\n\n- Per-cell timeout default: 30s (`DEFAULT_TIMEOUT_MS` in `packages/coding-agent/src/eval/parse.ts`; `TOOL_TIMEOUTS.eval.default` in `packages/coding-agent/src/tools/tool-timeouts.ts`)\n- Timeout clamp: 1s minimum, 600s maximum (`TOOL_TIMEOUTS.eval` in `packages/coding-agent/src/tools/tool-timeouts.ts`)\n- Transcript code/output preview: 10 lines by default (`EVAL_DEFAULT_PREVIEW_LINES` in `packages/coding-agent/src/tools/eval.ts`)\n- Output truncation window: 50KB default (`DEFAULT_MAX_BYTES` in `packages/coding-agent/src/session/streaming-output.ts`)\n- Output line cap inside truncation helpers: 3000 lines (`DEFAULT_MAX_LINES` in `packages/coding-agent/src/session/streaming-output.ts`)\n- Streaming tail buffer for live updates: `DEFAULT_MAX_BYTES * 2` = 100KB (`packages/coding-agent/src/tools/eval.ts`)\n- Python retained kernel idle timeout: 5 minutes (`IDLE_TIMEOUT_MS` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python retained kernel cap: 4 sessions (`MAX_KERNEL_SESSIONS` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python retained kernel cleanup sweep: every 30s (`CLEANUP_INTERVAL_MS` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python owner-cleanup shutdown wait: 2000ms (`OWNER_CLEANUP_KERNEL_SHUTDOWN_TIMEOUT_MS` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python heartbeat interval: 5s (`ensureKernelHeartbeat()` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python external gateway availability check timeout: 5s (`AbortSignal.timeout(5000)` in `packages/coding-agent/src/eval/py/kernel.ts`)\n- Python auto-restart budget: one restart per retained session before hard failure (`restartCount > 1` in `packages/coding-agent/src/eval/py/executor.ts`)\n\n## Errors\n\n- Parse errors from `parseEvalInput()` throw immediately, for example invalid timeout strings.\n- Missing session without proxy executor throws `ToolError(\"Eval tool requires a session when not using proxy executor\")`.\n- Disabled/unavailable backends throw `ToolError` from `resolveBackend()`:\n - `eval.py = false`\n - `eval.js = false`\n - Python kernel unavailable\n - no backend available\n- JS runtime exceptions are converted into text output plus `exitCode: 1`; cancellations return `cancelled: true` and may append `Command timed out`.\n- Python execution errors from the kernel become text output and `exitCode: 1`; later cells are skipped.\n- Python stdin requests are treated as errors with the message `Kernel requested stdin; interactive input is not supported.`\n- Cancellation is returned, not thrown, once backend execution has started. The tool formats it as a cell failure and sets `details.isError = true`.\n- If parsing encountered `*** Abort`, the final text appends `ABORT_WARNING`, explicitly telling the model that earlier cells ran and state persists.\n- If output truncates, the tool still succeeds; truncation is surfaced through `details.meta` and artifact-backed full output when available.\n\n## Notes\n\n- The runtime parser is intentionally more permissive than `packages/coding-agent/src/eval/eval.lark`; maintain both when changing syntax.\n- Cell language in `ParsedEvalCell` is not the last word: `EvalTool.execute()` may override backend selection for cells without an explicit header by inheriting the previous runtime language.\n- `tool.<name>()` exists only in JS. Python prelude helpers do not call back into the full tool registry.\n- JS helper paths reject protocol URIs (`://`) in `resolvePath()`; the JS prelude is filesystem-only unless the code calls `tool.read(...)` or another tool explicitly.\n- Python helper `output(...)` depends on `PI_SESSION_FILE`; it fails outside a session-backed run.\n- `display()` can produce text and structured outputs from the same value; the renderer prefers markdown over `text/plain` when both exist.\n- JS static imports are rewritten only at top level. Nested imports stay invalid and surface normal JS syntax/runtime errors.\n- `EvalTool` is `concurrency = \"exclusive\"`, so eval calls do not overlap within a session.\n- The tool description shown to the model is templated by backend availability (`getEvalToolDescription()`); if Python is unavailable, the prompt omits Python-specific instructions.\n",
70
+ "tools/eval.md": "# eval\n\n> Execute Python or JavaScript code in persistent cell-based runtimes.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/eval.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/eval.md`\n- Key collaborators:\n - `packages/coding-agent/src/eval/parse.ts` — lenient cell parser\n - `packages/coding-agent/src/eval/sniff.ts` — language sniffing heuristics\n - `packages/coding-agent/src/eval/backend.ts` — backend execution contract\n - `packages/coding-agent/src/eval/js/index.ts` — JS backend adapter\n - `packages/coding-agent/src/eval/js/executor.ts` — JS execution + output sink\n - `packages/coding-agent/src/eval/js/context-manager.ts` — persistent VM contexts, prelude, tool bridge\n - `packages/coding-agent/src/eval/js/prelude.txt` — JS global helpers\n - `packages/coding-agent/src/eval/py/index.ts` — Python backend adapter\n - `packages/coding-agent/src/eval/py/executor.ts` — kernel session retention, reset, cleanup\n - `packages/coding-agent/src/eval/py/kernel.ts` — Jupyter gateway/kernel protocol, display capture\n - `packages/coding-agent/src/eval/py/prelude.py` — Python helper functions and status events\n - `packages/coding-agent/src/session/streaming-output.ts` — truncation, artifacts, streamed chunks\n - `docs/python-repl.md` — Python kernel/gateway internals\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `input` | `string` | Yes | Cell program text. Parsed by `parseEvalInput()` in `packages/coding-agent/src/eval/parse.ts`, not by JSON subfields. |\n\n`input` syntax accepted at runtime:\n\n- Cell header: `*** Cell <attrs...>`. Attributes are space-separated tokens with quoted titles (`\"...\"` or `'...'`).\n- Canonical tokens (advertised in the prompt):\n - `<lang>:\"<title>\"` — language + title shorthand. `lang` is `py` or `js` (lenient: also `ts`, plus the long-form aliases `python`, `javascript`, `typescript`, `ipy`, `ipython`).\n - `t:<n>[ms|s|m]` — per-cell timeout (default 30s).\n - `rst` — wipe this cell's language kernel before running.\n- Lenient additional tokens (accepted by the parser, not advertised):\n - bare language token (`py`, `js`)\n - `id:\"...\"` / `title:\"...\"` / `name:\"...\"` / `cell:\"...\"` / `file:\"...\"` / `label:\"...\"` — title aliases\n - `timeout:` / `duration:` / `time:` — `t:` aliases\n - `reset` — `rst` alias\n - `rst:true|false|1|0|yes|no|on|off` — explicit boolean form\n - a bare positional duration token (`30s`, `2m`, `500ms`)\n - any unclassified bare token folds into a positional title fragment\n- Cell body: every following line until the next `*** Cell ...`, the optional `*** End`, or `*** Abort`. `*** End` is a quirk fix for GPT-trained models that emit terminators and is not documented in the prompt.\n\nLeniencies in `packages/coding-agent/src/eval/parse.ts`:\n\n- Markers accept two or more leading `*` and flexible whitespace.\n- `*** End` is optional everywhere; the parser silently consumes trailing tokens (e.g. `*** End py`).\n- Missing terminators between adjacent cells are tolerated; the next `*** Cell` closes the prior cell, and stray non-marker lines between cells fold into the prior cell's body without crashing.\n- Bare code or a single markdown fence such as ```` ```py ```` is treated as one implicit cell.\n- If `*** Abort` appears, the in-progress cell is dropped and the result carries an abort warning. To preserve a completed cell before `*** Abort`, emit `*** End` first.\n\nThe tool also exposes a custom Lark grammar from `packages/coding-agent/src/eval/eval.lark` for constrained sampling. That grammar is stricter than the runtime parser: it requires the canonical `*** Cell <lang>:\"title\"` header form with a fixed attribute order, advertises only `py` / `js`, and pins the trailing `*** End` so GPT-trained models' natural terminator habit aligns with the constrained output.\n\n## Outputs\n\nFinal result from `EvalTool.execute()` is single-shot, but `onUpdate` streams partial text and `details` while cells run.\n\nReturned shape:\n\n- `content`: one text block containing combined cell output, or `(no text output)` / `(no output)` when only rich outputs exist.\n- `details` (`EvalToolDetails` from `packages/coding-agent/src/eval/types.ts`):\n - `cells`: per-cell code, status (`pending`/`running`/`complete`/`error`), output, duration, exit code, status events, markdown flag\n - `language`: first backend used\n - `languages`: distinct backends used, in first-use order\n - `jsonOutputs`: structured values emitted via `display(...)`\n - `images`: image payloads emitted by Python rich display or JS `display({ type: \"image\", ... })`\n - `statusEvents`: aggregated helper/tool status events\n - `notice`: backend fallback notice\n - `meta`: truncation metadata\n - `isError`: set on cell failure or cancellation\n\nRenderer behavior in `packages/coding-agent/src/tools/eval.ts`:\n\n- call preview renders parsed code cells with syntax highlighting\n- result view renders each cell separately, including status, duration, and output\n- markdown outputs are rendered with the Markdown component instead of plain text\n- `jsonOutputs` render as a tree, collapsed or expanded depending on UI state\n- timeout / fallback / truncation notices render as dim metadata lines\n- images are carried in `details.images`; generic tool UI image handling renders them outside the text block\n\nSide-channel artifacts:\n\n- `session.allocateOutputArtifact?.(\"eval\")` may allocate an `artifact://...` backing store for spilled output.\n- Truncated output metadata points at that artifact when available.\n\n## Flow\n\n1. `EvalTool.execute()` in `packages/coding-agent/src/tools/eval.ts` parses `params.input` with `parseEvalInput()`.\n2. `parseEvalInput()` normalizes newlines, collects cells, parses attributes, and assigns each cell a language from the header, language sniffing, or the default `python`.\n3. Back in `execute()`, each parsed cell is resolved to a backend with `resolveBackend()`:\n - explicit `python`/`js` requests are validated against session settings and backend availability\n - otherwise `sniffEvalLanguage()` in `packages/coding-agent/src/eval/sniff.ts` tries shebangs and language markers\n - if no explicit language was present, later cells prefer the previous runtime language before re-sniffing\n - Python is preferred when available; JS is the fallback when Python is unavailable or disabled\n4. The tool allocates an `OutputSink`, a `TailBuffer`, per-cell result objects, and a `sessionAbortController`. `session.trackEvalExecution?.(...)` can wrap the whole run for external cancellation tracking.\n5. Cells execute sequentially. For each cell, `execute()`:\n - clamps the cell timeout through `clampTimeout(\"eval\", ...)`\n - builds a combined abort signal from the tool signal, the timeout, and the session abort controller\n - marks the cell `running` and emits an update\n - calls the backend’s `execute()` with `cwd`, `sessionId`, `sessionFile`, `kernelOwnerId`, `deadlineMs`, `reset`, artifact info, and chunk callback\n6. JS cells dispatch through `packages/coding-agent/src/eval/js/index.ts` into `executeJs()`; Python cells dispatch through `packages/coding-agent/src/eval/py/index.ts` into `executePython()`.\n7. Backend text chunks stream into the shared `OutputSink`; rich outputs are accumulated separately as JSON, images, markdown markers, and status events.\n8. After each cell:\n - text output is trimmed and stored on that cell result\n - multi-cell runs prefix text with `[i/n]` and the optional title\n - cancellations return early with `isError: true` and a cell-specific abort message\n - non-zero exit codes return early with `isError: true` and a message naming the failed cell\n - later cells are skipped after the first error, but earlier cell state persists in the underlying runtime\n9. On success, the tool joins all cell outputs, synthesizes `(no text output)` or `(no output)` when needed, and attaches truncation metadata from `summarizeFinal()`.\n10. The renderer uses `details.cells`, `details.jsonOutputs`, and `details.statusEvents` to build notebook-style output. `mergeCallAndResult = true` and `inline = true`, so call and result render together in the transcript.\n\n## Modes / Variants\n\n### Parsing modes\n\n- Explicit multi-cell format with `*** Cell ...` headers\n- Implicit single-cell fallback for bare code or a single fenced block\n- Abort-recovery parse path when `*** Abort` is present\n\n### Backend selection\n\n- Explicit Python backend\n- Explicit JavaScript backend\n- Auto-detected backend via `sniffEvalLanguage()`\n- Fallback from requested/inferred Python to JS when Python is unavailable\n- Fallback notice when JS markers are seen but `eval.js` is disabled and Python is used instead\n\n### JavaScript runtime\n\nImplemented in `packages/coding-agent/src/eval/js/context-manager.ts` and `packages/coding-agent/src/eval/js/prelude.txt`.\n\n- Persistent `vm.Context` instances keyed by `js:${sessionId}` in `vmContexts`\n- `rst` calls `resetVmContext(sessionKey)` before the cell executes\n- Top-level `await` and bare `return` are supported by wrapping code in an async IIFE when `wrapCode()` sees `await` or `return`\n- Top-level static `import ... from ...` and dynamic `import(...)` calls are routed through `rewriteImports()`, which sends them via `__omp_import__` so the specifier resolves against the session cwd\n- The prelude installs globals:\n - `display`, `print`\n - `read`, `write`, `append`, `sort`, `uniq`, `counter`, `diff`, `tree`, `env`, `output`\n - `tool.<name>(args)` proxy for arbitrary session tool calls\n- JS helpers are async because they cross the VM/tool boundary\n- `display(value)` behavior:\n - plain objects/arrays become JSON outputs\n - `{ type: \"image\", data, mimeType }` becomes an image output\n - scalars become text\n- The VM exposes a restricted `process` subset plus `Buffer`, `fetch`, `Blob`, `File`, `Headers`, `Request`, `Response`, `fs`, `require`, and browser-style globals\n- Per-session VM runs are serialized with `runQueued()`\n\n### Python runtime\n\nImplemented in `packages/coding-agent/src/eval/py/executor.ts`, `packages/coding-agent/src/eval/py/kernel.ts`, and `packages/coding-agent/src/eval/py/prelude.py`. See `docs/python-repl.md` for gateway and kernel details.\n\n- Default mode is retained `session` kernels keyed by `python:${sessionId}`\n- Optional `python.kernelMode = \"per-call\"` creates a fresh kernel for each cell and shuts it down afterward\n- `rst` disposes the retained kernel for that session before the cell runs; later Python cells in the same tool call reuse the fresh kernel\n- Startup path:\n - availability check\n - create/connect kernel\n - initialize cwd / env / `sys.path`\n - execute `PYTHON_PRELUDE`\n- Python cells run inside IPython/Jupyter, so top-level `await` works; the prompt warns not to use `asyncio.run(...)`\n- The Python prelude defines synchronous helpers with the same surface as JS (except `tool.<name>` exists only in JS)\n- `display(value)` wraps dict/list/tuple values in `IPython.display.JSON`; rich display MIME bundles are preserved\n- Kernel `display_data` / `execute_result` messages map to:\n - `application/x-omp-status` → status event\n - `image/png` → image output\n - `application/json` → JSON output\n - `text/markdown` → markdown output\n - `text/plain` → text output\n - `text/html` → HTML converted to markdown with `htmlToBasicMarkdown()`\n- Interactive stdin is rejected: `input_request` sends an empty reply, marks `stdinRequested`, and the executor returns exit code `1`\n\n### Multi-language call behavior\n\nA single tool call can mix Python and JS cells. Persistence is per language runtime:\n\n- resetting Python does not touch JS state\n- resetting JS does not touch Python state\n- each backend keeps its own retained session keyed from the same session-derived ID\n\n## Side Effects\n\n- Filesystem\n - JS/Python prelude helpers can read, write, append, diff, and traverse files under the session cwd or absolute paths.\n - Output may spill to an artifact file via `OutputSink`.\n- Network\n - Python backend speaks NDJSON to a local `python3` subprocess over stdin/stdout (no network).\n - JS runtime exposes `fetch` and `tool.<name>()`; those tools may perform additional network I/O.\n- Subprocesses / native bindings\n - Python availability check runs `<python> -c ...`.\n - Python backend spawns one `python -u runner.py` subprocess per kernel; cancellation sends `SIGINT`. Details in `docs/python-repl.md`.\n- Session state\n - `session.assertEvalExecutionAllowed?.()` can block execution.\n - `session.trackEvalExecution?.(...)` can register cancellable eval work.\n - `session.getSessionFile?.()` and `session.getEvalKernelOwnerId?.()` influence kernel reuse and artifact lookup.\n - JS VM contexts persist in `vmContexts` across eval calls until reset/disposal.\n - Python retained kernels persist in `kernelSessions` until reset, eviction, idle cleanup, or owner cleanup.\n- User-visible prompts / interactive UI\n - none; stdin requests are rejected programmatically\n- Background work / cancellation\n - Python retained kernels have heartbeat and idle cleanup timers.\n - Cancellation interrupts a running Python kernel and aborts JS promise waits.\n\n## Limits & Caps\n\n- Per-cell timeout default: 30s (`DEFAULT_TIMEOUT_MS` in `packages/coding-agent/src/eval/parse.ts`; `TOOL_TIMEOUTS.eval.default` in `packages/coding-agent/src/tools/tool-timeouts.ts`)\n- Timeout clamp: 1s minimum, 600s maximum (`TOOL_TIMEOUTS.eval` in `packages/coding-agent/src/tools/tool-timeouts.ts`)\n- Transcript code/output preview: 10 lines by default (`EVAL_DEFAULT_PREVIEW_LINES` in `packages/coding-agent/src/tools/eval.ts`)\n- Output truncation window: 50KB default (`DEFAULT_MAX_BYTES` in `packages/coding-agent/src/session/streaming-output.ts`)\n- Output line cap inside truncation helpers: 3000 lines (`DEFAULT_MAX_LINES` in `packages/coding-agent/src/session/streaming-output.ts`)\n- Streaming tail buffer for live updates: `DEFAULT_MAX_BYTES * 2` = 100KB (`packages/coding-agent/src/tools/eval.ts`)\n- Python retained kernel idle timeout: 5 minutes (`IDLE_TIMEOUT_MS` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python retained kernel cap: 4 sessions (`MAX_KERNEL_SESSIONS` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python retained kernel cleanup sweep: every 30s (`CLEANUP_INTERVAL_MS` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python owner-cleanup shutdown wait: 2000ms (`OWNER_CLEANUP_KERNEL_SHUTDOWN_TIMEOUT_MS` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python heartbeat interval: 5s (`ensureKernelHeartbeat()` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python external gateway availability check timeout: 5s (`AbortSignal.timeout(5000)` in `packages/coding-agent/src/eval/py/kernel.ts`)\n- Python auto-restart budget: one restart per retained session before hard failure (`restartCount > 1` in `packages/coding-agent/src/eval/py/executor.ts`)\n\n## Errors\n\n- Parse errors from `parseEvalInput()` throw immediately, for example invalid timeout strings.\n- Missing session without proxy executor throws `ToolError(\"Eval tool requires a session when not using proxy executor\")`.\n- Disabled/unavailable backends throw `ToolError` from `resolveBackend()`:\n - `eval.py = false`\n - `eval.js = false`\n - Python kernel unavailable\n - no backend available\n- JS runtime exceptions are converted into text output plus `exitCode: 1`; cancellations return `cancelled: true` and may append `Command timed out`.\n- Python execution errors from the kernel become text output and `exitCode: 1`; later cells are skipped.\n- Python stdin requests are treated as errors with the message `Kernel requested stdin; interactive input is not supported.`\n- Cancellation is returned, not thrown, once backend execution has started. The tool formats it as a cell failure and sets `details.isError = true`.\n- If parsing encountered `*** Abort`, the final text appends `ABORT_WARNING`, explicitly telling the model that earlier cells ran and state persists.\n- If output truncates, the tool still succeeds; truncation is surfaced through `details.meta` and artifact-backed full output when available.\n\n## Notes\n\n- The runtime parser is intentionally more permissive than `packages/coding-agent/src/eval/eval.lark`; maintain both when changing syntax.\n- Cell language in `ParsedEvalCell` is not the last word: `EvalTool.execute()` may override backend selection for cells without an explicit header by inheriting the previous runtime language.\n- `tool.<name>()` exists only in JS. Python prelude helpers do not call back into the full tool registry.\n- JS helper paths reject protocol URIs (`://`) in `resolvePath()`; the JS prelude is filesystem-only unless the code calls `tool.read(...)` or another tool explicitly.\n- Python helper `output(...)` depends on `PI_SESSION_FILE`; it fails outside a session-backed run.\n- `display()` can produce text and structured outputs from the same value; the renderer prefers markdown over `text/plain` when both exist.\n- JS static imports are rewritten only at top level. Nested imports stay invalid and surface normal JS syntax/runtime errors.\n- `EvalTool` is `concurrency = \"exclusive\"`, so eval calls do not overlap within a session.\n- The tool description shown to the model is templated by backend availability (`getEvalToolDescription()`); if Python is unavailable, the prompt omits Python-specific instructions.\n",
71
71
  "tools/exit_plan_mode.md": "# exit_plan_mode\n\n> Submits the current plan-mode plan for user approval.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/exit-plan-mode.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/exit-plan-mode.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/plan-mode-guard.ts` — resolves canonical plan paths during plan mode\n - `packages/coding-agent/src/plan-mode/approved-plan.ts` — renames approved plan artifact after user approval\n - `packages/coding-agent/src/modes/interactive-mode.ts` — approval popup, plan preview, mode exit, tool restoration\n - `packages/coding-agent/src/plan-mode/state.ts` — plan-mode state shape\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `title` | `string` | Yes | Final plan title. `.md` is optional; the runtime normalizes to `local://<title>.md`. Allowed characters: letters, numbers, `_`, `-`. |\n\n## Outputs\n- Single-shot success result with `content[0].text = \"Plan ready for approval.\"`.\n- `details` contains:\n - `planFilePath` — current plan artifact path from plan-mode state, typically `local://PLAN.md`\n - `planExists` — whether that file existed at call time\n - `title` — normalized title without `.md`\n - `finalPlanFilePath` — normalized destination, always `local://<title>.md`\n- The actual rename and mode transition happen later in the interactive controller after the user chooses an approval action.\n\n## Flow\n1. `execute()` reads `session.getPlanModeState()` and rejects the call unless `state.enabled` is true.\n2. `normalizePlanTitle()` trims whitespace, rejects empty values, rejects `/`, `\\\\`, and `..`, appends `.md` if missing, and enforces `^[A-Za-z0-9_-]+\\.md$`.\n3. The tool computes `finalPlanFilePath = local://<normalized>.md` and resolves both source and destination through `resolvePlanPath(...)` to validate them against plan-mode path rules.\n4. It `stat`s the current plan file path; if the plan artifact does not exist it throws a `ToolError` telling the caller to write the finalized plan first.\n5. On success it returns the approval-ready payload; it does not mutate files itself.\n6. `packages/coding-agent/src/modes/controllers/event-controller.ts` watches successful `exit_plan_mode` results and forwards `details` to `InteractiveMode.handleExitPlanModeTool(...)`.\n7. The interactive controller aborts the agent, renders the current plan, and shows four choices: `Approve and execute`, `Approve and keep context`, `Refine plan`, `Stay in plan mode`.\n8. If the user approves, `#approvePlan(...)` renames `local://PLAN.md` to `local://<title>.md`, exits plan mode, restores the previous tool set, optionally clears session context, writes the approved plan into the new local root when context is reset, and injects a synthetic system prompt instructing execution from the finalized artifact.\n\n## Side Effects\n- Filesystem\n - Tool itself only `stat`s the current plan file.\n - Approval path later renames the plan artifact via `fs.rename(...)` and may rewrite the approved plan into a fresh local root with `Bun.write(...)`.\n- Session state\n - Requires active plan-mode state.\n - Approval flow aborts the current agent loop, exits plan mode, restores previous active tools, clears or preserves context depending on the user choice, and records the approved plan reference path.\n- User-visible prompts / interactive UI\n - Successful calls trigger a plan preview and an approval/refinement selector in interactive mode.\n- Background work / cancellation\n - The controller aborts the running agent before showing the popup to prevent repeated `exit_plan_mode` calls.\n\n## Limits & Caps\n- `title` accepts only `[A-Za-z0-9_-]` plus optional `.md` (`packages/coding-agent/src/tools/exit-plan-mode.ts`).\n- Destination must be under the `local:` scheme; approval rename rejects non-`local:` source or destination paths (`packages/coding-agent/src/plan-mode/approved-plan.ts`).\n- In plan mode, only the plan file may be edited; other writes are blocked by `enforcePlanModeWrite(...)` in `packages/coding-agent/src/tools/plan-mode-guard.ts`.\n\n## Errors\n- Plan mode inactive: throws `ToolError(\"Plan mode is not active.\")`.\n- Empty title: throws `ToolError(\"Title is required and must not be empty.\")`.\n- Path traversal / separators: throws `ToolError(\"Title must not contain path separators or '..'.\")`.\n- Invalid characters: throws `ToolError(\"Title may only contain letters, numbers, underscores, or hyphens.\")`.\n- Missing plan artifact: throws `ToolError(\"Plan file not found at ... Write the finalized plan ... before calling exit_plan_mode.\")`.\n- Approval-time failures surface in the UI from `InteractiveMode.handleExitPlanModeTool(...)`, including destination already exists and rename failures from `renameApprovedPlanFile(...)`.\n\n## Notes\n- This tool is hidden/internal: it is injected when `plan.enabled` is on and is not part of normal discoverable built-ins (`packages/coding-agent/src/tools/index.ts`, `packages/coding-agent/src/session/agent-session.ts`).\n- The tool returning success does not mean plan mode has ended; it only means the request was handed off to the approval UI.\n- `resolvePlanPath(...)` special-cases bare filenames matching the plan basename so `PLAN.md` maps back to the canonical session-scoped `local://PLAN.md` artifact.\n- `Approve and keep context` skips the full conversation reset; `Approve and execute` clears context, then copies the approved plan into the new session-local artifact root before execution resumes.\n",
72
72
  "tools/find.md": "# find\n\n> Find filesystem paths by glob; use `search` when you need content matches instead of path matches.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/find.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/find.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/path-utils.ts` — normalize inputs; split base path vs glob.\n - `packages/coding-agent/src/tools/list-limit.ts` — apply result-count caps.\n - `packages/coding-agent/src/session/streaming-output.ts` — truncate text output at byte cap.\n - `packages/coding-agent/src/tools/tool-result.ts` — build `content` and `details.meta`.\n - `packages/coding-agent/src/tools/output-meta.ts` — encode limit / truncation metadata.\n - `packages/coding-agent/src/tools/tool-errors.ts` — map user-facing tool errors.\n - `packages/coding-agent/src/tools/index.ts` — register the built-in local implementation.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `paths` | `string[]` | Yes | One or more globs, files, or directories. Empty strings are rejected. Multiple entries may be merged into one brace-union search when their base paths can be resolved together. |\n| `hidden` | `boolean` | No | Whether hidden files are included. Defaults to `true` (`hidden ?? true`). |\n| `limit` | `number` | No | Max returned paths. Defaults to `1000`. Must be a finite positive number; non-integers are floored. |\n\n## Outputs\nThe tool returns a single text block plus structured `details`.\n\n- Success text: newline-delimited paths, one per line, relative to the session cwd when possible; absolute when outside cwd. Exact file inputs return that file path as one line.\n- Empty result text: `No files found matching pattern`.\n- Multi-path partial miss: appends `Skipped missing paths: ...` after the result block, or after the empty-result line.\n- `details` may include:\n - `scopePath`: display form of the searched root or merged roots.\n - `fileCount`: number of paths returned after result limiting.\n - `files`: returned paths as an array.\n - `truncated`: whether result count or byte truncation occurred.\n - `resultLimitReached`: reached result limit.\n - `missingPaths`: skipped missing inputs in multi-path calls.\n - `truncation` / `meta.limits`: structured truncation and limit metadata for renderers.\n- Streaming: when the runtime supplies `onUpdate`, the local implementation emits incremental newline-delimited text snapshots during globbing, throttled to 200 ms.\n\n## Flow\n1. `FindTool.execute()` normalizes each `paths` entry with `normalizePathLikeInput()` and `/\\\\/g -> \"/\"` (`packages/coding-agent/src/tools/find.ts`). Empty normalized entries fail with `` `paths` must contain non-empty globs or paths ``.\n2. For multi-path local calls, `partitionExistingPaths(..., parseFindPattern)` (`packages/coding-agent/src/tools/path-utils.ts`) stats each base path. Missing entries are skipped; if all are missing, the tool throws `Path not found: ...`. Single missing paths still hard-fail.\n3. The tool tries `resolveExplicitFindPatterns()` to merge multiple inputs into one search rooted at a common base path. If that does not apply, it parses one input with `parseFindPattern()`.\n4. `parseFindPattern()` determines `(basePath, globPattern, hasGlob)`:\n - no glob chars (`*`, `?`, `[`, `{`) => search that path with implicit `**/*`.\n - glob in the first segment => search from `.` and, unless the pattern already starts with `**/`, prefix it with `**/`.\n - glob later in the path => split at the first glob-bearing segment.\n5. `resolveToCwd()` converts the base path to an absolute path under the session cwd. A resolved `/` is rejected with `Searching from root directory '/' is not allowed`.\n6. `limit` is defaulted to `DEFAULT_LIMIT` (`1000`) and validated as a positive finite integer. `hidden` defaults to `true`. The tool also creates a 5 s timeout via `AbortSignal.timeout(GLOB_TIMEOUT_MS)`.\n7. Execution then branches:\n - **Custom operations branch**: if `FindToolOptions.operations.glob` exists, the tool checks existence with `operations.exists()`, short-circuits exact-file inputs via `operations.stat()` when available, then calls `operations.glob(globPattern, searchPath, { ignore: [\"**/node_modules/**\", \"**/.git/**\"], limit })`.\n - **Built-in local branch**: the tool stats `searchPath`. Exact-file inputs return immediately. Directory inputs call `natives.glob()` with `fileType: File`, `hidden`, `maxResults: limit`, `sortByMtime: true`, `gitignore: true`, and the combined abort signal.\n8. In the local branch, optional `onMatch` callbacks convert each match to a cwd-relative display path and emit throttled progress updates.\n9. After native glob returns, JS sorts `result.matches` by `mtime` descending (`(b.mtime ?? 0) - (a.mtime ?? 0)`) before formatting paths.\n10. `buildResult()` applies `applyListLimit()` to cap the array again at `limit`, joins paths with `\\n`, then runs `truncateHead()` with `maxLines: Number.MAX_SAFE_INTEGER`. In practice this leaves the 50 KB byte cap in place while disabling the default 3000-line cap.\n11. `toolResult()` packages text plus `details`, and records result-limit / truncation metadata for renderers.\n\n## Modes / Variants\n- **Exact file path**: if the parsed input has no glob and the resolved path stats as a file, output is that one path.\n- **Directory path**: if the parsed input has no glob and stats as a directory, the tool searches it with implicit `**/*`.\n- **Single glob path**: one input parsed by `parseFindPattern()`.\n- **Merged multi-path search**: multiple inputs resolved by `resolveExplicitFindPatterns()` into one brace-union glob rooted at a common base path.\n- **Partial multi-path search with missing inputs**: local multi-path calls skip missing base paths and surface them as `missingPaths` / `Skipped missing paths: ...`.\n- **Custom delegated search**: uses injected `FindOperations` instead of local fs + native glob.\n\n## Side Effects\n- Filesystem\n - Stats the resolved base path, and in local multi-path mode stats every candidate base path up front.\n - Does not write files.\n- Subprocesses / native bindings\n - Built-in local mode calls the native `@oh-my-pi/pi-natives` glob implementation.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Emits structured progress updates when `onUpdate` is provided.\n - Adds truncation / limit metadata to the tool result.\n- Background work / cancellation\n - Local globbing is cancellable through the caller abort signal plus an internal 5 s timeout.\n\n## Limits & Caps\n- Default result limit: `1000` (`DEFAULT_LIMIT` in `packages/coding-agent/src/tools/find.ts`).\n- Local glob timeout: `5000` ms (`GLOB_TIMEOUT_MS` in `packages/coding-agent/src/tools/find.ts`).\n- Output byte cap: `50 * 1024` bytes (`DEFAULT_MAX_BYTES` in `packages/coding-agent/src/session/streaming-output.ts`).\n- Default generic line cap in `truncateHead()` is `3000`, but `find` overrides `maxLines` to `Number.MAX_SAFE_INTEGER`, so byte size — not line count — is the practical output truncation cap.\n- Streaming update throttle: `200` ms between `onUpdate` emissions.\n- Sort order: most recent `mtime` first in the built-in local branch and promised in the prompt. The tool re-sorts in JS even though native glob receives `sortByMtime: true` so native code can still stop early at `maxResults`.\n\n## Errors\n- User-facing `ToolError`s from `FindTool.execute()` include:\n - `` `paths` must contain non-empty globs or paths ``\n - `Path not found: ...`\n - `Searching from root directory '/' is not allowed`\n - `Limit must be a positive number`\n - `Path is not a directory: ...`\n - `find timed out after 5s`\n- If the caller aborts, the local branch converts `AbortError` into `ToolAbortError`.\n- Non-`ENOENT` stat failures and other unexpected errors are rethrown.\n- Empty matches are not errors; they return the no-files text result.\n\n## Notes\n- Reach for `find` for filename / path discovery. Reach for `search` when the selection criterion is file contents or regex matches; `search` takes a `pattern` and returns anchored content matches, while `find` only returns matching paths (`packages/coding-agent/src/prompts/tools/find.md`, `packages/coding-agent/src/prompts/tools/search.md`).\n- Bare top-level globs are made recursive. `*.ts` is parsed as base `.` plus glob `**/*.ts`; `src/*.ts` stays rooted at `src` with a non-recursive `*.ts` segment; `src/**/*.ts` preserves explicit recursion.\n- `.gitignore` is always enabled in the built-in local branch (`gitignore: true`). There is no model-facing flag to disable it.\n- `hidden` defaults to `true`; hidden-file exclusion is opt-out, not opt-in.\n- Multi-path missing-input tolerance only applies in the built-in local branch. The custom-operations branch hard-fails the first missing `searchPath` it checks.\n- The custom `FindOperations.glob()` hook receives `ignore` and `limit`, but not the `hidden` flag or an explicit `.gitignore` toggle. A remote delegate must account for that itself if it wants parity with the local branch.\n- Built-in local globbing asks the native layer for `fileType: File`, so recursive directory searches yield files, not directories. Directory outputs are only possible through exact-path passthrough or custom delegates that return them.\n",
73
- "tools/github.md": "# github\n\n> Dispatch GitHub CLI operations for repositories, issues, pull requests, search, and Actions run watching.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/gh.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/github.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/gh-format.ts` — shorten commit SHAs for summaries.\n - `packages/coding-agent/src/tools/gh-renderer.ts` — TUI rendering, especially `run_watch` live/result views.\n - `packages/coding-agent/src/utils/git.ts` — `gh`/`git` process wrappers, repo locking, branch config writes.\n - `packages/utils/src/dirs.ts` — base directory for dedicated PR worktrees.\n - `packages/coding-agent/src/sdk.ts` — session artifact allocation hook.\n - `packages/coding-agent/src/session/artifacts.ts` — artifact filename format `<id>.<toolType>.log`.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `op` | `\"repo_view\" \\| \"issue_view\" \\| \"pr_create\" \\| \"pr_view\" \\| \"pr_diff\" \\| \"pr_checkout\" \\| \"pr_push\" \\| \"search_issues\" \\| \"search_prs\" \\| \"search_code\" \\| \"search_commits\" \\| \"search_repos\" \\| \"run_watch\"` | Yes | Dispatch selector. `GithubTool.execute()` switches only on this field. |\n| `repo` | `string` | No | `owner/repo` override. Ignored when the identifier argument is already a full GitHub URL. Required in practice when `gh` cannot infer repo context from the current checkout. |\n| `branch` | `string` | No | Used by `repo_view`, `pr_push`, and `run_watch`. `run_watch` falls back to current git branch when `run` is omitted; `pr_push` falls back to current branch. |\n| `issue` | `string` | No | Used only by `issue_view`. Required there; accepts an issue number or GitHub issue URL. |\n| `pr` | `string \\| string[]` | No | Used by `pr_view`, `pr_diff`, `pr_checkout`. Each item may be a PR number, branch name, or GitHub PR URL. Array form enables batching. Omitted means current branch PR. |\n| `comments` | `boolean` | No | Used by `issue_view` and `pr_view`. Defaults to `true`. |\n| `nameOnly` | `boolean` | No | Used only by `pr_diff`; adds `--name-only`. |\n| `exclude` | `string[]` | No | Used only by `pr_diff`; each entry becomes `--exclude <pattern>`. Empty strings are rejected. |\n| `force` | `boolean` | No | Used only by `pr_checkout`. Defaults to `false`; allows resetting an existing `pr-<number>` local branch to the PR head commit. |\n| `forceWithLease` | `boolean` | No | Used only by `pr_push`; passed through to git push. |\n| `title` | `string` | No | Used only by `pr_create`. Required unless `fill` is `true`. |\n| `body` | `string` | No | Used only by `pr_create`. Mutually exclusive with `fill`. Empty/omitted body becomes `--body \"\"` to suppress the interactive editor. Non-empty body is written to a temp file and passed as `--body-file`. |\n| `base` | `string` | No | Used only by `pr_create`; passed as `--base`. |\n| `head` | `string` | No | Used only by `pr_create`; passed as `--head`. |\n| `draft` | `boolean` | No | Used only by `pr_create`. Defaults to `false`. |\n| `fill` | `boolean` | No | Used only by `pr_create`. Defaults to `false`. Mutually exclusive with `title` and `body`. |\n| `reviewer` | `string[]` | No | Used only by `pr_create`; each entry becomes `--reviewer`. |\n| `assignee` | `string[]` | No | Used only by `pr_create`; each entry becomes `--assignee`. |\n| `label` | `string[]` | No | Used only by `pr_create`; each entry becomes `--label`. |\n| `query` | `string` | No | Used by all `search_*` ops. Required there. |\n| `limit` | `number` | No | Used by all `search_*` ops. Defaults to `10`, floored, clamped to `50`, and must be `> 0`. |\n| `run` | `string` | No | Used only by `run_watch`. Must be a numeric run ID or full GitHub Actions run URL. |\n| `tail` | `number` | No | Used only by `run_watch`. Defaults to `15`, floored, clamped to `200`, and must be `> 0`. |\n\n## Outputs\nThe tool returns a single text result built by `buildTextResult()` in `packages/coding-agent/src/tools/gh.ts`.\n\n- `content`: one text block. Multi-item ops join sections with blank lines and `---` separators.\n- `sourceUrl`: set for single repo/issue/PR/run results when a canonical URL is known.\n- `details`: optional structured metadata used by the TUI renderer.\n - Common fields: `artifactId`, `repo`, `branch`, `worktreePath`, `remote`, `remoteBranch`, `headSha`, `runId`, `runIds`, `status`, `conclusion`, `failedJobs`.\n - `pr_checkout` adds `checkouts: GhPrCheckoutSummary[]`.\n - `run_watch` adds `watch: GhRunWatchViewDetails`, which drives the custom live/result renderer in `packages/coding-agent/src/tools/gh-renderer.ts`.\n- Artifact trailer: when `artifactId` is present, the text body gets an appended line like `Full failed-job logs: artifact://<id>`.\n - `run_watch` allocates artifacts with `session.allocateOutputArtifact(\"github\")`; persistent sessions therefore save failed-log bodies as `<artifact-dir>/<id>.github.log`.\n\n`run_watch` is the only streaming op. It emits `onUpdate` snapshots while polling, then returns one final text result.\n\n## Flow\n1. `GithubTool.createIf()` exposes the tool only when `git.github.available()` finds `gh` on `PATH`.\n2. `GithubTool.execute()` wraps dispatch in `untilAborted()` and switches on `params.op`.\n3. Each op normalizes optional strings, arrays, booleans, and numeric caps locally in `packages/coding-agent/src/tools/gh.ts`.\n4. CLI execution goes through `git.github.run/json/text()` in `packages/coding-agent/src/utils/git.ts`:\n - spawns `gh ...` with `Bun.spawn()`;\n - trims stdout/stderr unless `trimOutput: false`;\n - maps common auth/repo-context failures into tool-facing `ToolError` messages;\n - `json()` rejects empty or invalid JSON.\n5. Read-style ops (`repo_view`, `issue_view`, `pr_view`, `search_*`) fetch JSON and format Markdown-like text summaries.\n6. `pr_view` optionally makes an extra REST call to `/repos/<repo>/pulls/<n>/comments` so inline review comments are included; this is separate from `gh pr view --json`.\n7. `pr_diff` fetches raw text from `gh pr diff`; multi-PR batches are handled with `Promise.all()`.\n8. `pr_checkout` resolves PR metadata first, then enters `git.withRepoLock()` before any git mutation so parallel checkout calls for the same primary repo do not race on shared `.git` state.\n9. `pr_push` reads PR head metadata back from git branch config, derives a refspec, then pushes with `git.push()`.\n10. `pr_create` shells out once, then best-effort re-reads the created PR with `gh pr view` for a richer summary.\n11. `run_watch` chooses either run mode (`run` supplied) or commit mode (`run` omitted), polls GitHub Actions APIs every 3 seconds, emits streaming updates, and may save a full failed-log artifact before returning.\n12. Final text goes through `toolResult().text(...)`; if `session.allocateOutputArtifact()` returns a slot, failed-log text is persisted with `Bun.write()`.\n\n## Modes / Variants\n\n### `repo_view`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` |\n| Optional fields | `repo`, `branch` |\n| `gh` command | `gh repo view [<repo>] [--branch <branch>] --json <GH_REPO_FIELDS>` |\n| Batching | None |\n| Output | `# <owner/repo>` header, description, URL, default branch, requested branch, visibility, permission, primary language, stars, forks, archive/fork flags, updated timestamp, homepage, topics. `sourceUrl = data.url`. |\n\nIf `repo` is omitted, `gh` repository resolution is used.\n\n### `issue_view`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op`, `issue` |\n| Optional fields | `repo`, `comments` |\n| `gh` command | `gh issue view <issue> [--repo <repo>] --json <GH_ISSUE_FIELDS or GH_ISSUE_FIELDS_NO_COMMENTS>` |\n| Batching | None |\n| Output | Single issue summary with metadata, `## Body`, and optional `## Comments`. `sourceUrl = data.url`. |\n\n`comments: false` switches the requested JSON field set and suppresses comment rendering.\n\n### `pr_create`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` plus either `fill=true` or `title` |\n| Optional fields | `repo`, `title`, `body`, `base`, `head`, `draft`, `fill`, `reviewer[]`, `assignee[]`, `label[]` |\n| `gh` command | `gh pr create ...` with flags assembled from provided fields |\n| Batching | None |\n| Output | `# Created Pull Request ...` summary with URL, state, draft flag, base/head, author, created time, labels, optional body. `sourceUrl` is the created PR URL. |\n\nBranches:\n- `fill && (title || body !== undefined)` throws.\n- Non-empty `body` is written under a temp dir `gh-pr-body-*` in `os.tmpdir()`, passed as `--body-file`, then removed in `finally`.\n- After creation, the tool parses the returned URL and best-effort runs `gh pr view <number> --repo <repo> --json <GH_PR_FIELDS_NO_COMMENTS>`; failures there are swallowed.\n\n### `pr_view`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` |\n| Optional fields | `repo`, `pr`, `comments` |\n| `gh` command | For each requested PR: `gh pr view [<pr>] [--repo <repo>] --json <GH_PR_FIELDS or GH_PR_FIELDS_NO_COMMENTS>` |\n| Batching | Yes. `pr` may be `string[]`; each entry is fetched independently with `Promise.all()`. Omitted `pr` means one fetch for current branch PR. |\n| Output | Single PR: one PR summary with body, up to 50 files, optional reviews, inline review comments, and issue comments. Batched: `# <n> Pull Requests` plus per-PR sections. |\n\nWhen comments are enabled and both repo + PR number are known, the tool paginates `/repos/<repo>/pulls/<n>/comments` with `per_page=100` to supplement `gh pr view` output.\n\n### `pr_diff`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` |\n| Optional fields | `repo`, `pr`, `nameOnly`, `exclude[]` |\n| `gh` command | For each requested PR: `gh pr diff [<pr>] [--repo <repo>] --color never [--name-only] [--exclude <glob> ...]` |\n| Batching | Yes. Same `pr` normalization as `pr_view`; fetched with `Promise.all()`. |\n| Output | Single PR: `# Pull Request Diff` or `# Pull Request Files` followed by raw CLI output. Batched: `# <n> Pull Request Diffs` / `File Lists` plus labeled sections. |\n\nDiff stdout is preserved without trimming. Empty output becomes `No diff output.` or `No changed files.`.\n\n### `pr_checkout`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` |\n| Optional fields | `repo`, `pr`, `force` |\n| `gh` command | For each requested PR: `gh pr view [<pr>] [--repo <repo>] --json <GH_PR_CHECKOUT_FIELDS>`; cross-repo PRs may also call `gh repo view <headRepository> --json <GH_REPO_CLONE_FIELDS>`. |\n| Batching | Yes. `pr` may be `string[]`; each PR is resolved in parallel, but git mutations are serialized per primary repo by `git.withRepoLock()`. |\n| Output | Single PR: checkout/worktree summary plus `details.repo`, `details.branch`, `details.worktreePath`, `details.remote`, `details.remoteBranch`, `details.checkouts`. Batched: `# <n> Pull Request Worktrees (...)` plus one section per PR and aggregated `details.checkouts`. |\n\nWorktree and metadata behavior:\n- Local branch name is always `pr-<number>`.\n- Worktree path is `path.join(getWorktreesDir(), encodeRepoPathForFilesystem(primaryRepoRoot), localBranch)`, where `getWorktreesDir()` is `~/.omp/wt`; effective path is `~/.omp/wt/<encoded-primary-repo-root>/pr-<number>`.\n- Existing worktree detection is by branch ref `refs/heads/pr-<number>` from `git.worktree.list()`.\n- New worktree creation calls `git.worktree.add(repoRoot, finalWorktreePath, localBranch, { signal })` after verifying the path is neither already registered nor already present on disk.\n- For same-repo PRs, remote is `origin`. For cross-repo PRs, the tool resolves a clone URL for the head repo, reuses an existing remote with the same URL when possible, or creates `fork-<owner>` / `fork-<owner>-<n>`.\n- The branch push metadata is persisted with `git config` under the repository's shared `.git/config` as:\n - `branch.pr-<number>.remote`\n - `branch.pr-<number>.merge`\n - `branch.pr-<number>.pushRemote`\n - `branch.pr-<number>.ompPrHeadRef`\n - `branch.pr-<number>.ompPrUrl`\n - `branch.pr-<number>.ompPrIsCrossRepository`\n - `branch.pr-<number>.ompPrMaintainerCanModify`\n- If `refs/heads/pr-<number>` already exists at a different commit, checkout fails unless `force=true`, in which case `git branch --force` resets it to the fetched PR head.\n- If a matching worktree already exists, the tool reuses it and reports `reused: true`.\n\n### `pr_push`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` |\n| Optional fields | `branch`, `forceWithLease` |\n| `gh` command | None. This path uses git, not `gh`. |\n| Batching | None |\n| Output | `# Pushed Pull Request Branch` summary with local branch, remote, remote branch, remote URL, PR URL, and force-with-lease flag. `sourceUrl = prUrl` when known. |\n\nPush target resolution reads the `branch.<name>.ompPrHeadRef`, `pushRemote`/`remote`, `ompPrUrl`, `ompPrMaintainerCanModify`, and `ompPrIsCrossRepository` git-config keys written by `pr_checkout`. If the current checked-out branch matches the target branch, the source ref is `HEAD`; otherwise it pushes `refs/heads/<branch>`. The refspec is `HEAD:refs/heads/<headRef>` or `refs/heads/<branch>:refs/heads/<headRef>`.\n\n### `search_issues`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op`, `query` |\n| Optional fields | `repo`, `limit` |\n| `gh` command | `gh search issues --limit <limit> --json <GH_SEARCH_FIELDS> [--repo <repo>] -- <query>` |\n| Batching | None |\n| Output | `# GitHub issues search`, echoed query, optional repo, result count, then one bullet per issue with repo/state/author/labels/timestamps/URL. |\n\n### `search_prs`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op`, `query` |\n| Optional fields | `repo`, `limit` |\n| `gh` command | `gh search prs --limit <limit> --json <GH_SEARCH_FIELDS> [--repo <repo>] -- <query>` |\n| Batching | None |\n| Output | Same shape as `search_issues`, labeled as pull requests. |\n\n### `search_code`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op`, `query` |\n| Optional fields | `repo`, `limit` |\n| `gh` command | `gh search code --limit <limit> --json <GH_SEARCH_CODE_FIELDS> [--repo <repo>] -- <query>` |\n| Batching | None |\n| Output | `# GitHub code search`, result count, then one bullet per match with path, repo, short commit SHA, URL, and first normalized text-match fragment line when present. |\n\n### `search_commits`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op`, `query` |\n| Optional fields | `repo`, `limit` |\n| `gh` command | `gh search commits --limit <limit> --json <GH_SEARCH_COMMITS_FIELDS> [--repo <repo>] -- <query>` |\n| Batching | None |\n| Output | `# GitHub commits search`, result count, then one bullet per commit: short SHA + first commit-message line, repo, author, date, URL. |\n\n### `search_repos`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op`, `query` |\n| Optional fields | `limit` |\n| `gh` command | `gh search repos --limit <limit> --json <GH_SEARCH_REPOS_FIELDS> -- <query>` |\n| Batching | None |\n| Output | `# GitHub repositories search`, result count, then one bullet per repo with first description line, language, stars, forks, open issues, visibility, archive/fork flags, updated time, URL. |\n\n`repo` is intentionally not used for this op.\n\n### `run_watch`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` |\n| Optional fields | `repo`, `branch`, `run`, `tail` |\n| `gh` command | Repo resolution: `gh repo view --json nameWithOwner -q .nameWithOwner` when `repo` and run URL repo are both absent. Single-run mode uses `gh api --method GET /repos/<repo>/actions/runs/<runId>` and `gh api --method GET /repos/<repo>/actions/runs/<runId>/jobs`. Commit mode uses `gh api --method GET /repos/<repo>/branches/<branch>`, `gh api --method GET /repos/<repo>/actions/runs`, `gh api --method GET /repos/<repo>/actions/runs/<runId>/jobs`, and `gh api /repos/<repo>/actions/jobs/<jobId>/logs` for failed jobs. |\n| Batching | Implicit batching only in commit mode: all workflow runs for one commit are tracked together. |\n| Output | Streaming watch snapshots via `onUpdate`, then a final text report. On failure, appends `Full failed-job logs: artifact://<id>` and sets `details.artifactId`. |\n\nWatch flow:\n- `run` parsing accepts either a decimal run ID or a full run URL. URL repo must match explicit `repo` when both are given.\n- Poll interval is fixed at 3 seconds (`RUN_WATCH_INTERVAL_DEFAULT`).\n- Failure grace period is fixed at 5 seconds (`RUN_WATCH_GRACE_DEFAULT`). When any failed job appears before completion, the tool emits a note, waits once, re-fetches state, then collects logs so concurrent failures are included.\n- Failed-job logs are fetched with `gh api /repos/<repo>/actions/jobs/<jobId>/logs` via `git.github.run()`, not `json()`. Non-zero exit leaves `available: false` instead of failing the whole watch.\n- Inline result includes only the last `tail` lines per failed job. The saved artifact contains full logs (`mode: \"full\"`).\n- In commit mode, success is intentionally double-checked: once all known runs are successful, the tool waits one more poll interval and succeeds only if the set of run IDs is unchanged. This avoids returning before late workflow runs appear for the same commit.\n- `details.watch` drives a specialized renderer in `packages/coding-agent/src/tools/gh-renderer.ts`; non-watch results fall back to generic text rendering.\n\n## Side Effects\n- Filesystem\n - `pr_create` may create a temp dir under `os.tmpdir()` named `gh-pr-body-*`, write `body.md`, then remove the dir in `finally`.\n - `pr_checkout` may create directories under `~/.omp/wt/<encoded-primary-repo-root>/` and add git worktrees there.\n - `run_watch` may write a session artifact with full failed-job logs.\n- Network\n - Every op shells out to `gh`, which then talks to GitHub APIs except `pr_push`.\n - `pr_push` uses git network transport to the configured remote.\n- Subprocesses / native bindings\n - All `gh` calls use `Bun.spawn([\"gh\", ...args])`.\n - `pr_checkout` and `pr_push` also invoke git helpers from `packages/coding-agent/src/utils/git.ts`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - `run_watch` consumes `session.allocateOutputArtifact()` when failed-job logs are persisted.\n - Returned `details` objects carry run/checkouts metadata for the renderer/UI.\n- User-visible prompts / interactive UI\n - `gh` interactive editor fallback is suppressed for `pr_create` by forcing either `--body-file` or `--body \"\"`.\n - `gh-renderer` provides compact headers for all ops and a custom live watch view for `run_watch`.\n- Background work / cancellation\n - `run_watch` loops until success/failure and uses `abortableSleep()` between polls.\n - `GithubTool.execute()` is wrapped in `untilAborted()`; `git.github.run()` forwards the abort signal into `Bun.spawn()`.\n\n## Limits & Caps\n- Search result default: `10` (`SEARCH_LIMIT_DEFAULT` in `packages/coding-agent/src/tools/gh.ts`).\n- Search result max: `50` (`SEARCH_LIMIT_MAX`).\n- PR file preview inside `pr_view`: first `50` files only (`FILE_PREVIEW_LIMIT`).\n- Run-watch poll interval: `3s` (`RUN_WATCH_INTERVAL_DEFAULT`).\n- Run-watch failure grace period: `5s` (`RUN_WATCH_GRACE_DEFAULT`).\n- Run-watch failed-log tail default: `15` lines (`RUN_WATCH_TAIL_DEFAULT`).\n- Run-watch failed-log tail max: `200` lines (`RUN_WATCH_TAIL_MAX`).\n- PR review comments page size: `100` (`REVIEW_COMMENTS_PAGE_SIZE`).\n- Actions jobs page size: `100` (`RUN_JOBS_PAGE_SIZE`).\n- Search and tail numeric inputs are floored with `Math.floor()`, clamped to the max, and rejected when non-finite or `<= 0`.\n- `pr_view`/`pr_diff`/`pr_checkout` batch fan-out is unbounded in tool code; all requested PRs are launched with `Promise.all()`.\n\n## Errors\n- Tool creation is skipped entirely when `gh` is not installed.\n- `git.github.run()` throws `ToolError(\"GitHub CLI (gh) is not installed...\")` if `gh` is missing at execution time.\n- `git.github.text/json()` map common failures to model-facing messages:\n - not authenticated → `GitHub CLI is not authenticated. Run \\`gh auth login\\`.`\n - missing repo context without explicit `repo` → `GitHub repository context is unavailable. Pass \\`repo\\` explicitly or run the tool inside a GitHub checkout.`\n - otherwise stderr/stdout text, or fallback `GitHub CLI command failed: gh ...`\n- `json()` also throws on empty stdout or invalid JSON.\n- Local validation errors throw `ToolError`, including:\n - missing required per-op fields (`issue`, `query`, `title unless fill=true`)\n - invalid numeric `limit` / `tail`\n - invalid `run` format\n - `fill` combined with `title` or `body`\n - empty exclude patterns\n - missing git repo / branch / HEAD context for checkout, push, or watch\n - `pr_push` on a branch without `ompPrHeadRef` metadata\n - conflicting existing worktree path or branch without `force`\n- `run_watch` treats failed-job log fetches specially: missing log content does not fail the watch; it marks that log `available: false` and prints `Log tail unavailable.` / `Full log unavailable.`.\n- `pr_create` swallows only the post-create best-effort `gh pr view` refresh; the create step itself still fails normally.\n\n## Notes\n- `appendRepoFlag()` intentionally skips `--repo` when the identifier argument is already a full GitHub URL; that lets `gh` derive repo/number from the URL.\n- `normalizePrIdentifierList()` accepts `reviewer`, `assignee`, and `label` arrays too; the helper name is broader than its callers.\n- `pr_push` depends on `pr_checkout` having run first for that local branch; there is no alternate metadata source.\n- `pr_checkout` stores push metadata in branch config, not in the worktree directory. Reusing the same `pr-<number>` branch reuses those config keys.\n- Worktree write serialization is keyed by the primary repo root, not the current worktree path, because git worktrees share `.git/config`, `packed-refs`, commit-graph, and worktree metadata files.\n- `search_repos` is the only search op that never forwards `repo`; repository scoping must be expressed in the query itself.\n- `run_watch` success on commit mode means “all observed runs succeeded and no additional runs appeared one poll later”, not merely “latest poll looked green”.\n- The TUI renderer collapses failed log previews unless the result view is expanded; the underlying text result still contains the same tailed lines plus any artifact reference.\n",
73
+ "tools/github.md": "# github\n\n> Dispatch GitHub CLI operations for repositories, issues, pull requests, search, and Actions run watching.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/gh.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/github.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/gh-format.ts` — shorten commit SHAs for summaries.\n - `packages/coding-agent/src/tools/gh-renderer.ts` — TUI rendering, especially `run_watch` live/result views.\n - `packages/coding-agent/src/utils/git.ts` — `gh`/`git` process wrappers, repo locking, branch config writes.\n - `packages/utils/src/dirs.ts` — base directory for dedicated PR worktrees.\n - `packages/coding-agent/src/sdk.ts` — session artifact allocation hook.\n - `packages/coding-agent/src/session/artifacts.ts` — artifact filename format `<id>.<toolType>.log`.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `op` | `\"repo_view\" \\| \"pr_create\" \\| \"pr_checkout\" \\| \"pr_push\" \\| \"search_issues\" \\| \"search_prs\" \\| \"search_code\" \\| \"search_commits\" \\| \"search_repos\" \\| \"run_watch\"` | Yes | Dispatch selector. `GithubTool.execute()` switches only on this field. |\n| `repo` | `string` | No | `owner/repo` override. Ignored when the identifier argument is already a full GitHub URL. Required in practice when `gh` cannot infer repo context from the current checkout. |\n| `branch` | `string` | No | Used by `repo_view`, `pr_push`, and `run_watch`. `run_watch` falls back to current git branch when `run` is omitted; `pr_push` falls back to current branch. |\n| `pr` | `string \\| string[]` | No | Used by `pr_checkout`. Each item may be a PR number, branch name, or GitHub PR URL. Array form enables batching. Omitted means current branch PR. |\n| `force` | `boolean` | No | Used only by `pr_checkout`. Defaults to `false`; allows resetting an existing `pr-<number>` local branch to the PR head commit. |\n| `forceWithLease` | `boolean` | No | Used only by `pr_push`; passed through to git push. |\n| `title` | `string` | No | Used only by `pr_create`. Required unless `fill` is `true`. |\n| `body` | `string` | No | Used only by `pr_create`. Mutually exclusive with `fill`. Empty/omitted body becomes `--body \"\"` to suppress the interactive editor. Non-empty body is written to a temp file and passed as `--body-file`. |\n| `base` | `string` | No | Used only by `pr_create`; passed as `--base`. |\n| `head` | `string` | No | Used only by `pr_create`; passed as `--head`. |\n| `draft` | `boolean` | No | Used only by `pr_create`. Defaults to `false`. |\n| `fill` | `boolean` | No | Used only by `pr_create`. Defaults to `false`. Mutually exclusive with `title` and `body`. |\n| `reviewer` | `string[]` | No | Used only by `pr_create`; each entry becomes `--reviewer`. |\n| `assignee` | `string[]` | No | Used only by `pr_create`; each entry becomes `--assignee`. |\n| `label` | `string[]` | No | Used only by `pr_create`; each entry becomes `--label`. |\n| `query` | `string` | No | Used by all `search_*` ops. Required there. |\n| `limit` | `number` | No | Used by all `search_*` ops. Defaults to `10`, floored, clamped to `50`, and must be `> 0`. |\n| `run` | `string` | No | Used only by `run_watch`. Must be a numeric run ID or full GitHub Actions run URL. |\n| `tail` | `number` | No | Used only by `run_watch`. Defaults to `15`, floored, clamped to `200`, and must be `> 0`. |\n\n## Outputs\nThe tool returns a single text result built by `buildTextResult()` in `packages/coding-agent/src/tools/gh.ts`.\n\n- `content`: one text block. Multi-item ops join sections with blank lines and `---` separators.\n- `sourceUrl`: set for single repo/PR/run results when a canonical URL is known.\n- `details`: optional structured metadata used by the TUI renderer.\n - Common fields: `artifactId`, `repo`, `branch`, `worktreePath`, `remote`, `remoteBranch`, `headSha`, `runId`, `runIds`, `status`, `conclusion`, `failedJobs`.\n - `pr_checkout` adds `checkouts: GhPrCheckoutSummary[]`.\n - `run_watch` adds `watch: GhRunWatchViewDetails`, which drives the custom live/result renderer in `packages/coding-agent/src/tools/gh-renderer.ts`.\n- Artifact trailer: when `artifactId` is present, the text body gets an appended line like `Full failed-job logs: artifact://<id>`.\n - `run_watch` allocates artifacts with `session.allocateOutputArtifact(\"github\")`; persistent sessions therefore save failed-log bodies as `<artifact-dir>/<id>.github.log`.\n\n`run_watch` is the only streaming op. It emits `onUpdate` snapshots while polling, then returns one final text result.\n\n## Flow\n1. `GithubTool.createIf()` exposes the tool only when `git.github.available()` finds `gh` on `PATH`.\n2. `GithubTool.execute()` wraps dispatch in `untilAborted()` and switches on `params.op`.\n3. Each op normalizes optional strings, arrays, booleans, and numeric caps locally in `packages/coding-agent/src/tools/gh.ts`.\n4. CLI execution goes through `git.github.run/json/text()` in `packages/coding-agent/src/utils/git.ts`:\n - spawns `gh ...` with `Bun.spawn()`;\n - trims stdout/stderr unless `trimOutput: false`;\n - maps common auth/repo-context failures into tool-facing `ToolError` messages;\n - `json()` rejects empty or invalid JSON.\n5. Read-style ops (`repo_view`, `search_*`) fetch JSON and format Markdown-like text summaries. Single-issue and single-PR views were moved out of the tool and now resolve through the `issue://` / `pr://` internal URL schemes, which share the same SQLite cache.\n7. PR diffs moved out of the tool. `pr://<N>/diff` lists changed files, `pr://<N>/diff/<i>` slices a single file, and `pr://<N>/diff/all` returns the full unified diff — see `docs/tools/read.md`. All three variants share one `gh pr diff` invocation through the `pr-diff` cache row.\n8. `pr_checkout` resolves PR metadata first, then enters `git.withRepoLock()` before any git mutation so parallel checkout calls for the same primary repo do not race on shared `.git` state.\n9. `pr_push` reads PR head metadata back from git branch config, derives a refspec, then pushes with `git.push()`.\n10. `pr_create` shells out once, then best-effort re-reads the created PR for a richer summary.\n11. `run_watch` chooses either run mode (`run` supplied) or commit mode (`run` omitted), polls GitHub Actions APIs every 3 seconds, emits streaming updates, and may save a full failed-log artifact before returning.\n12. Final text goes through `toolResult().text(...)`; if `session.allocateOutputArtifact()` returns a slot, failed-log text is persisted with `Bun.write()`.\n\n## Modes / Variants\n\n### `repo_view`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` |\n| Optional fields | `repo`, `branch` |\n| `gh` command | `gh repo view [<repo>] [--branch <branch>] --json <GH_REPO_FIELDS>` |\n| Batching | None |\n| Output | `# <owner/repo>` header, description, URL, default branch, requested branch, visibility, permission, primary language, stars, forks, archive/fork flags, updated timestamp, homepage, topics. `sourceUrl = data.url`. |\n\nIf `repo` is omitted, `gh` repository resolution is used.\n\nSingle-issue and single-PR reads live in the `issue://<N>` / `pr://<N>` URL schemes (see `docs/tools/read.md`). They share `~/.omp/cache/github-cache.db` (override via `OMP_GITHUB_CACHE_DB`) and the `github.cache.softTtlSec` / `github.cache.hardTtlSec` / `github.cache.enabled` settings. The cache retains rendered Markdown plus the raw JSON payload returned by `gh`, including private bodies, comments, reviews, and review comments when comments are enabled; rows are scoped by the local GitHub credential fingerprint. Root and repo-scoped reads (`issue://`, `pr://owner/repo`) issue a live `gh issue list` / `gh pr list` for browsing; query params `state`, `limit`, `author`, `label` pass through to `gh` (`issue://` accepts `state=open|closed|all`; `pr://` also accepts `merged`). PR diffs ride the same cache under `pr://<N>/diff[/…]`: the listing, full diff, and per-file slices all share one `pr-diff` row keyed by repo and PR number.\n\n### `pr_create`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` plus either `fill=true` or `title` |\n| Optional fields | `repo`, `title`, `body`, `base`, `head`, `draft`, `fill`, `reviewer[]`, `assignee[]`, `label[]` |\n| `gh` command | `gh pr create ...` with flags assembled from provided fields |\n| Batching | None |\n| Output | `# Created Pull Request ...` summary with URL, state, draft flag, base/head, author, created time, labels, optional body. `sourceUrl` is the created PR URL. |\n\nBranches:\n- `fill && (title || body !== undefined)` throws.\n- Non-empty `body` is written under a temp dir `gh-pr-body-*` in `os.tmpdir()`, passed as `--body-file`, then removed in `finally`.\n- After creation, the tool parses the returned URL and best-effort runs `gh pr view <number> --repo <repo> --json <GH_PR_FIELDS_NO_COMMENTS>`; failures there are swallowed.\n\n### `pr_checkout`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` |\n| Optional fields | `repo`, `pr`, `force` |\n| `gh` command | For each requested PR: `gh pr view [<pr>] [--repo <repo>] --json <GH_PR_CHECKOUT_FIELDS>`; cross-repo PRs may also call `gh repo view <headRepository> --json <GH_REPO_CLONE_FIELDS>`. |\n| Batching | Yes. `pr` may be `string[]`; each PR is resolved in parallel, but git mutations are serialized per primary repo by `git.withRepoLock()`. |\n| Output | Single PR: checkout/worktree summary plus `details.repo`, `details.branch`, `details.worktreePath`, `details.remote`, `details.remoteBranch`, `details.checkouts`. Batched: `# <n> Pull Request Worktrees (...)` plus one section per PR and aggregated `details.checkouts`. |\n\nWorktree and metadata behavior:\n- Local branch name is always `pr-<number>`.\n- Worktree path is `path.join(getWorktreesDir(), encodeRepoPathForFilesystem(primaryRepoRoot), localBranch)`, where `getWorktreesDir()` is `~/.omp/wt`; effective path is `~/.omp/wt/<encoded-primary-repo-root>/pr-<number>`.\n- Existing worktree detection is by branch ref `refs/heads/pr-<number>` from `git.worktree.list()`.\n- New worktree creation calls `git.worktree.add(repoRoot, finalWorktreePath, localBranch, { signal })` after verifying the path is neither already registered nor already present on disk.\n- For same-repo PRs, remote is `origin`. For cross-repo PRs, the tool resolves a clone URL for the head repo, reuses an existing remote with the same URL when possible, or creates `fork-<owner>` / `fork-<owner>-<n>`.\n- The branch push metadata is persisted with `git config` under the repository's shared `.git/config` as:\n - `branch.pr-<number>.remote`\n - `branch.pr-<number>.merge`\n - `branch.pr-<number>.pushRemote`\n - `branch.pr-<number>.ompPrHeadRef`\n - `branch.pr-<number>.ompPrUrl`\n - `branch.pr-<number>.ompPrIsCrossRepository`\n - `branch.pr-<number>.ompPrMaintainerCanModify`\n- If `refs/heads/pr-<number>` already exists at a different commit, checkout fails unless `force=true`, in which case `git branch --force` resets it to the fetched PR head.\n- If a matching worktree already exists, the tool reuses it and reports `reused: true`.\n\n### `pr_push`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` |\n| Optional fields | `branch`, `forceWithLease` |\n| `gh` command | None. This path uses git, not `gh`. |\n| Batching | None |\n| Output | `# Pushed Pull Request Branch` summary with local branch, remote, remote branch, remote URL, PR URL, and force-with-lease flag. `sourceUrl = prUrl` when known. |\n\nPush target resolution reads the `branch.<name>.ompPrHeadRef`, `pushRemote`/`remote`, `ompPrUrl`, `ompPrMaintainerCanModify`, and `ompPrIsCrossRepository` git-config keys written by `pr_checkout`. If the current checked-out branch matches the target branch, the source ref is `HEAD`; otherwise it pushes `refs/heads/<branch>`. The refspec is `HEAD:refs/heads/<headRef>` or `refs/heads/<branch>:refs/heads/<headRef>`.\n\n### `search_issues`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op`, `query` |\n| Optional fields | `repo`, `limit` |\n| `gh` command | `gh api -X GET /search/issues -f q=\"<query> [repo:<repo>] is:issue\" -F per_page=<limit>` |\n| Batching | None |\n| Output | `# GitHub issues search`, echoed query, optional repo, result count, then one bullet per issue with repo/state/author/labels/timestamps/URL. |\n\n### `search_prs`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op`, `query` |\n| Optional fields | `repo`, `limit` |\n| `gh` command | `gh api -X GET /search/issues -f q=\"<query> [repo:<repo>] is:pr\" -F per_page=<limit>` |\n| Batching | None |\n| Output | Same shape as `search_issues`, labeled as pull requests. |\n\n### `search_code`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op`, `query` |\n| Optional fields | `repo`, `limit` |\n| `gh` command | `gh api -X GET /search/code -f q=\"<query> [repo:<repo>]\" -F per_page=<limit> -H \"Accept: application/vnd.github.text-match+json\"` |\n| Batching | None |\n| Output | `# GitHub code search`, result count, then one bullet per match with path, repo, short commit SHA, URL, and first normalized text-match fragment line when present. |\n\n### `search_commits`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op`, `query` |\n| Optional fields | `repo`, `limit` |\n| `gh` command | `gh api -X GET /search/commits -f q=\"<query> [repo:<repo>]\" -F per_page=<limit>` |\n| Batching | None |\n| Output | `# GitHub commits search`, result count, then one bullet per commit: short SHA + first commit-message line, repo, author, date, URL. |\n\n### `search_repos`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op`, `query` |\n| Optional fields | `limit` |\n| `gh` command | `gh api -X GET /search/repositories -f q=\"<query>\" -F per_page=<limit>` |\n| Batching | None |\n| Output | `# GitHub repositories search`, result count, then one bullet per repo with first description line, language, stars, forks, open issues, visibility, archive/fork flags, updated time, URL. |\n\n`repo` is intentionally not used for this op.\n\n### `run_watch`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` |\n| Optional fields | `repo`, `branch`, `run`, `tail` |\n| `gh` command | Repo resolution: `gh repo view --json nameWithOwner -q .nameWithOwner` when `repo` and run URL repo are both absent. Single-run mode uses `gh api --method GET /repos/<repo>/actions/runs/<runId>` and `gh api --method GET /repos/<repo>/actions/runs/<runId>/jobs`. Commit mode uses `gh api --method GET /repos/<repo>/branches/<branch>`, `gh api --method GET /repos/<repo>/actions/runs`, `gh api --method GET /repos/<repo>/actions/runs/<runId>/jobs`, and `gh api /repos/<repo>/actions/jobs/<jobId>/logs` for failed jobs. |\n| Batching | Implicit batching only in commit mode: all workflow runs for one commit are tracked together. |\n| Output | Streaming watch snapshots via `onUpdate`, then a final text report. On failure, appends `Full failed-job logs: artifact://<id>` and sets `details.artifactId`. |\n\nWatch flow:\n- `run` parsing accepts either a decimal run ID or a full run URL. URL repo must match explicit `repo` when both are given.\n- Poll interval is fixed at 3 seconds (`RUN_WATCH_INTERVAL_DEFAULT`).\n- Failure grace period is fixed at 5 seconds (`RUN_WATCH_GRACE_DEFAULT`). When any failed job appears before completion, the tool emits a note, waits once, re-fetches state, then collects logs so concurrent failures are included.\n- Failed-job logs are fetched with `gh api /repos/<repo>/actions/jobs/<jobId>/logs` via `git.github.run()`, not `json()`. Non-zero exit leaves `available: false` instead of failing the whole watch.\n- Inline result includes only the last `tail` lines per failed job. The saved artifact contains full logs (`mode: \"full\"`).\n- In commit mode, success is intentionally double-checked: once all known runs are successful, the tool waits one more poll interval and succeeds only if the set of run IDs is unchanged. This avoids returning before late workflow runs appear for the same commit.\n- `details.watch` drives a specialized renderer in `packages/coding-agent/src/tools/gh-renderer.ts`; non-watch results fall back to generic text rendering.\n\n## Side Effects\n- Filesystem\n - `pr_create` may create a temp dir under `os.tmpdir()` named `gh-pr-body-*`, write `body.md`, then remove the dir in `finally`.\n - `pr_checkout` may create directories under `~/.omp/wt/<encoded-primary-repo-root>/` and add git worktrees there.\n - `run_watch` may write a session artifact with full failed-job logs.\n- Network\n - Every op shells out to `gh`, which then talks to GitHub APIs except `pr_push`.\n - `pr_push` uses git network transport to the configured remote.\n- Subprocesses / native bindings\n - All `gh` calls use `Bun.spawn([\"gh\", ...args])`.\n - `pr_checkout` and `pr_push` also invoke git helpers from `packages/coding-agent/src/utils/git.ts`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - `run_watch` consumes `session.allocateOutputArtifact()` when failed-job logs are persisted.\n - Returned `details` objects carry run/checkouts metadata for the renderer/UI.\n- User-visible prompts / interactive UI\n - `gh` interactive editor fallback is suppressed for `pr_create` by forcing either `--body-file` or `--body \"\"`.\n - `gh-renderer` provides compact headers for all ops and a custom live watch view for `run_watch`.\n- Background work / cancellation\n - `run_watch` loops until success/failure and uses `abortableSleep()` between polls.\n - `GithubTool.execute()` is wrapped in `untilAborted()`; `git.github.run()` forwards the abort signal into `Bun.spawn()`.\n\n## Limits & Caps\n- Search result default: `10` (`SEARCH_LIMIT_DEFAULT` in `packages/coding-agent/src/tools/gh.ts`).\n- Search result max: `50` (`SEARCH_LIMIT_MAX`).\n- PR file preview inside the `pr://` view: first `50` files only (`FILE_PREVIEW_LIMIT` in `gh.ts`).\n- Run-watch poll interval: `3s` (`RUN_WATCH_INTERVAL_DEFAULT`).\n- Run-watch failure grace period: `5s` (`RUN_WATCH_GRACE_DEFAULT`).\n- Run-watch failed-log tail default: `15` lines (`RUN_WATCH_TAIL_DEFAULT`).\n- Run-watch failed-log tail max: `200` lines (`RUN_WATCH_TAIL_MAX`).\n- PR review comments page size: `100` (`REVIEW_COMMENTS_PAGE_SIZE`).\n- Actions jobs page size: `100` (`RUN_JOBS_PAGE_SIZE`).\n- Search and tail numeric inputs are floored with `Math.floor()`, clamped to the max, and rejected when non-finite or `<= 0`.\n- `pr_checkout` batch fan-out is unbounded in tool code; all requested PRs are launched with `Promise.all()`.\n\n## Errors\n- Tool creation is skipped entirely when `gh` is not installed.\n- `git.github.run()` throws `ToolError(\"GitHub CLI (gh) is not installed...\")` if `gh` is missing at execution time.\n- `git.github.text/json()` map common failures to model-facing messages:\n - not authenticated → `GitHub CLI is not authenticated. Run \\`gh auth login\\`.`\n - missing repo context without explicit `repo` → `GitHub repository context is unavailable. Pass \\`repo\\` explicitly or run the tool inside a GitHub checkout.`\n - otherwise stderr/stdout text, or fallback `GitHub CLI command failed: gh ...`\n- `json()` also throws on empty stdout or invalid JSON.\n- Local validation errors throw `ToolError`, including:\n - missing required per-op fields (`query`, `title unless fill=true`)\n - invalid numeric `limit` / `tail`\n - invalid `run` format\n - `fill` combined with `title` or `body`\n - missing git repo / branch / HEAD context for checkout, push, or watch\n - `pr_push` on a branch without `ompPrHeadRef` metadata\n - conflicting existing worktree path or branch without `force`\n- `run_watch` treats failed-job log fetches specially: missing log content does not fail the watch; it marks that log `available: false` and prints `Log tail unavailable.` / `Full log unavailable.`.\n- `pr_create` swallows only the post-create best-effort `gh pr view` refresh; the create step itself still fails normally.\n\n## Notes\n- `appendRepoFlag()` intentionally skips `--repo` when the identifier argument is already a full GitHub URL; that lets `gh` derive repo/number from the URL.\n- `normalizePrIdentifierList()` accepts `reviewer`, `assignee`, and `label` arrays too; the helper name is broader than its callers.\n- `pr_push` depends on `pr_checkout` having run first for that local branch; there is no alternate metadata source.\n- `pr_checkout` stores push metadata in branch config, not in the worktree directory. Reusing the same `pr-<number>` branch reuses those config keys.\n- Worktree write serialization is keyed by the primary repo root, not the current worktree path, because git worktrees share `.git/config`, `packed-refs`, commit-graph, and worktree metadata files.\n- `search_repos` is the only search op that never forwards `repo`; repository scoping must be expressed in the query itself.\n- `run_watch` success on commit mode means “all observed runs succeeded and no additional runs appeared one poll later”, not merely “latest poll looked green”.\n- The TUI renderer collapses failed log previews unless the result view is expanded; the underlying text result still contains the same tailed lines plus any artifact reference.\n",
74
74
  "tools/inspect_image.md": "# inspect_image\n\n> Send a local image file to a vision-capable model and return text analysis.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/inspect-image.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/inspect-image.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/inspect-image-renderer.ts` — TUI call/result rendering.\n - `packages/coding-agent/src/utils/image-loading.ts` — path resolution, type detection, size gate, optional resize.\n - `packages/coding-agent/src/utils/image-resize.ts` — downscale and recompress oversized images.\n - `packages/coding-agent/src/tools/path-utils.ts` — resolve input path relative to session cwd.\n - `packages/utils/src/mime.ts` — detect supported image formats from file bytes.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `path` | `string` | Yes | Image path passed to `loadImageInput`; resolved relative to `session.cwd` by `resolveReadPath(...)`. |\n| `question` | `string` | Yes | User prompt sent as a text content block alongside the image. |\n\n## Outputs\nThe tool returns a single `AgentToolResult`:\n\n- `content`: one text block, `[{ type: \"text\", text }]`, where `text` is the concatenated assistant text content from the model response.\n- `details`:\n - `model`: `<provider>/<id>` of the selected model.\n - `imagePath`: resolved filesystem path returned by `loadImageInput(...)`.\n - `mimeType`: MIME type actually sent to the model after optional resize/re-encode.\n\nModel-visible output is single-shot, not streamed by this tool.\n\nTUI rendering adds presentation-only truncation from `packages/coding-agent/src/tools/inspect-image-renderer.ts`:\n\n- call preview truncates `question` to 100 columns,\n- result view shows 4 lines collapsed or 16 lines expanded,\n- each rendered output line is truncated to 120 columns,\n- footer metadata shows `model · mimeType` when present.\n\n## Flow\n1. `InspectImageTool.execute(...)` rejects immediately if `images.blockImages` is enabled in session settings.\n2. It reads `session.modelRegistry`; missing registry, empty registry, missing API key, or unresolved model each raise `ToolError` from `packages/coding-agent/src/tools/inspect-image.ts`.\n3. Model selection tries, in order, `pi/vision`, `pi/default`, the active model string from the session, then `availableModels[0]`. `expandRoleAlias(...)` and `resolveModelFromString(...)` handle each lookup.\n4. The chosen model must advertise `input.includes(\"image\")`; otherwise execution fails before reading the file.\n5. `loadImageInput(...)` in `packages/coding-agent/src/utils/image-loading.ts` resolves the path with `resolveReadPath(...)`, detects MIME type with `readImageMetadata(...)`, and rejects files larger than `MAX_IMAGE_INPUT_BYTES` (`20 * 1024 * 1024`, 20 MiB) using `ImageInputTooLargeError`.\n6. `readImageMetadata(...)` in `packages/utils/src/mime.ts` inspects file headers only. Supported detected MIME types are `image/png`, `image/jpeg`, `image/gif`, and `image/webp`.\n7. If `images.autoResize` is true, `loadImageInput(...)` calls `resizeImage(...)`. Resize failures are swallowed there and the original bytes are kept.\n8. If MIME detection returned no supported image type, `execute(...)` throws `ToolError(\"inspect_image only supports PNG, JPEG, GIF, and WEBP files detected by file content.\")`.\n9. The tool calls `completeSimple(...)` with one user message containing two content parts in order:\n - `{ type: \"image\", data: imageInput.data, mimeType: imageInput.mimeType }`\n - `{ type: \"text\", text: params.question }`\n10. `systemPrompt` is a one-element array rendered from `packages/coding-agent/src/prompts/tools/inspect-image-system.md`.\n11. If the model response stop reason is `error` or `aborted`, the tool maps that to `ToolError`.\n12. `extractResponseText(...)` concatenates only `text` content blocks from the assistant message, trims the result, and fails if nothing remains.\n13. Success returns the text plus `details`; `inspectImageToolRenderer` formats the result for the TUI.\n\n## Modes / Variants\n- **Original image path**: `images.autoResize` disabled. The original file bytes are base64-encoded and sent with the detected MIME type.\n- **Auto-resized path**: `images.autoResize` enabled. `resizeImage(...)` may downscale and re-encode the image before upload.\n- **Unsupported image path**: file exists but header sniffing does not identify PNG/JPEG/GIF/WEBP. The tool returns a `ToolError` before any model call.\n- **Oversize image path**: file size exceeds 20 MiB before upload. The tool returns a `ToolError` before any model call.\n\n## Side Effects\n- Filesystem\n - Resolves and reads the target image from disk.\n - Stats the file once with `Bun.file(...).stat()` and reads it fully with `fs.readFile(...)`.\n- Network\n - Sends the final base64 image payload plus question text to the selected model through `completeSimple(...)`.\n- Session state\n - Reads session settings, active model preferences, cwd, and model registry.\n- Background work / cancellation\n - Passes the caller `AbortSignal` into `completeSimple(...)`.\n - Image preprocessing is local and not cancellation-aware in these helpers.\n\n## Limits & Caps\n- Supported detected input formats: `image/png`, `image/jpeg`, `image/gif`, `image/webp` (`SUPPORTED_IMAGE_MIME_TYPES` in `packages/utils/src/mime.ts`).\n- Metadata sniff cap: `DEFAULT_IMAGE_METADATA_HEADER_BYTES = 256 * 1024` bytes. Format detection only reads up to 256 KiB from the file header.\n- Upload input cap: `MAX_IMAGE_INPUT_BYTES = 20 * 1024 * 1024` bytes (20 MiB) in `packages/coding-agent/src/utils/image-loading.ts`.\n- Auto-resize defaults in `packages/coding-agent/src/utils/image-resize.ts`:\n - `maxWidth: 1568`\n - `maxHeight: 1568`\n - `maxBytes: 500 * 1024` bytes (500 KiB target)\n - `jpegQuality: 75`\n- Resize fast path: if the original image is already within `1568x1568` and within `maxBytes / 4` (125 KiB by default), `resizeImage(...)` returns the original bytes unchanged.\n- Resize quality ladder: after the first encode pass, lossy retries use qualities `[70, 60, 50, 40]`.\n- Resize dimension ladder: if quality reduction still misses the byte target, retries scale dimensions by `[1.0, 0.75, 0.5, 0.35, 0.25]` and stop if either dimension would fall below `100` pixels.\n- First resize pass encodes PNG, JPEG, and WebP, then keeps the smallest encoded buffer. Fallback passes encode JPEG and WebP only, again keeping the smaller output.\n- Renderer caps:\n - `INSPECT_QUESTION_PREVIEW_WIDTH = 100`\n - `INSPECT_OUTPUT_COLLAPSED_LINES = 4`\n - `INSPECT_OUTPUT_EXPANDED_LINES = 16`\n - `INSPECT_OUTPUT_LINE_WIDTH = 120`\n\n## Errors\n- Settings gate:\n - `Image submission is disabled by settings (images.blockImages=true). Disable it to use inspect_image.`\n- Model resolution / capability:\n - `Model registry is unavailable for inspect_image.`\n - `No models available for inspect_image.`\n - `Unable to resolve a model for inspect_image.`\n - `Resolved model <provider>/<id> does not support image input. Configure a vision-capable model for modelRoles.vision.`\n - `No API key available for <provider>/<id>. Configure credentials for this provider or choose another vision-capable model.`\n- Input file:\n - `Image file too large: <size> exceeds <limit> limit.` from `ImageInputTooLargeError`, remapped to `ToolError`.\n - `inspect_image only supports PNG, JPEG, GIF, and WEBP files detected by file content.` when header sniffing fails.\n- Model call:\n - `inspect_image request failed.` if the response stop reason is `error` without a provider message.\n - Provider `errorMessage` is passed through when present.\n - `inspect_image request aborted.` on aborted responses.\n - `inspect_image model returned no text output.` when the assistant message contains no text blocks after filtering.\n\nFailures surface as thrown `ToolError`s from `execute(...)`; the normal success return shape is not used for error reporting.\n\n## Notes\n- The model-facing prompt path on disk is `packages/coding-agent/src/prompts/tools/inspect-image.md`; the assignment's underscore form does not exist.\n- Format support is based on file content, not filename extension. Renaming a non-image file to `.png` does not make it valid.\n- `resolveReadPath(...)` tries macOS-specific path variants: shell-unescaped spaces, AM/PM narrow no-break-space filenames, NFD normalization, and curly-quote variants.\n- `loadImageInput(...)` also computes `textNote`, `dimensionNote`, and final `bytes`, but `inspect_image` does not include those in tool output.\n- Auto-resize can change the MIME type sent to the model. A JPEG or GIF input may be uploaded as PNG, JPEG, or WebP depending on which encoder output is smallest.\n- If `resizeImage(...)` throws or cannot decode the image, `loadImageInput(...)` silently keeps the original base64 payload instead of failing.\n",
75
75
  "tools/irc.md": "# irc\n\n> Send short prose messages to other live agents in the current process.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/irc.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/irc.md`\n- Key collaborators:\n - `packages/coding-agent/src/registry/agent-registry.ts` — process-global live agent directory.\n - `packages/coding-agent/src/session/agent-session.ts` — side-channel reply generation and history injection.\n - `packages/coding-agent/src/prompts/system/irc-incoming.md` — no-tools auto-reply prompt.\n - `packages/coding-agent/src/tools/index.ts` — tool availability gating.\n - `packages/coding-agent/src/config/settings-schema.ts` — `irc.enabled` default.\n - `packages/coding-agent/src/modes/controllers/event-controller.ts` — renders IRC events into chat UI.\n - `packages/coding-agent/src/modes/utils/ui-helpers.ts` — formats `[IRC]` transcript lines.\n - `packages/coding-agent/src/task/executor.ts` — carries `irc.enabled` into subagents.\n\n## Inputs\n\n### `op: \"list\"`\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `op` | `\"list\"` | Yes | Lists peers visible to the caller. |\n\n### `op: \"send\"`\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `op` | `\"send\"` | Yes | Sends one message to one peer or to `\"all\"`. |\n| `to` | `string` | Yes | Peer id such as `0-Main`, or `\"all\"` for broadcast. Whitespace is trimmed. |\n| `message` | `string` | Yes | Message body. Whitespace is trimmed; empty-after-trim is rejected. |\n| `awaitReply` | `boolean` | No | Wait for prose replies. Defaults to `true` for direct messages and `false` for `to: \"all\"`. |\n\n## Outputs\n- Single-shot `AgentToolResult`; no streaming updates.\n- `content` is one text block.\n - `list` returns either `No other live agents.` or a bullet list headed by `<n> peer(s):`.\n - `send` returns delivery summary text, then optional `## Replies`, `## Failed`, and `Unknown / unavailable peers:` sections.\n- `details` is structured metadata:\n - `list`: `{ op, from, peers, channels }`\n - `send`: `{ op, from, to, delivered, replies?, failed?, notFound? }`\n- The tool does not return raw IRC frames, message ids, or a transcript object.\n\n## Flow\n1. `IrcTool.createIf` only constructs the tool when `irc.enabled` is on and the session has both an `AgentRegistry` and `getAgentId` (`packages/coding-agent/src/tools/irc.ts`).\n2. Tool discovery adds another gate in `packages/coding-agent/src/tools/index.ts`: if the caller is `0-Main` and `async.enabled` is off, `irc` is hidden because the main agent cannot talk to concurrent peers in sync mode.\n3. `execute` resolves the process-global registry and sender id. Missing either returns a text error result instead of throwing.\n4. `op: \"list\"` calls `registry.listVisibleTo(senderId)`, which exposes every other agent in flat namespace whose status is `running` or `idle` (`packages/coding-agent/src/registry/agent-registry.ts`).\n5. `list` formats human-readable lines and returns `channels` as `['all', ...peerIds]`. These are logical targets only; there is no channel join state.\n6. `op: \"send\"` trims `to` and `message`; missing values produce text errors.\n7. `send` resolves targets:\n - `to === \"all\"`: all visible peers.\n - otherwise: one exact registry id, excluding self and excluding peers not in `running`/`idle`.\n8. `send` chooses `awaitReply = params.awaitReply ?? !isBroadcast`.\n9. Each target is dispatched in parallel via `target.session.respondAsBackground(...)`. One slow or failing peer does not block dispatch to the others.\n10. `respondAsBackground` emits an `irc_message` session event, forwards a display-only relay to the main session UI, and either:\n - queues just the incoming message for later history injection when `awaitReply === false`, or\n - renders `packages/coding-agent/src/prompts/system/irc-incoming.md`, runs `runEphemeralTurn` with `toolChoice: \"none\"`, emits an auto-reply event, then queues both incoming and reply messages for history injection.\n11. Deferred injection waits until the recipient is no longer streaming; `#flushPendingBackgroundExchanges` appends the custom messages through normal `message_start`/`message_end` external events so persistence and listeners see them.\n12. `send` aggregates `delivered`, `replies`, `failed`, and `notFound`, then returns one text summary plus matching `details`.\n\n## Modes / Variants\n- `list`: enumerate visible peers and logical channels.\n- `send` direct message: one exact peer id, default synchronous auto-reply.\n- `send` broadcast: `to: \"all\"`, default fire-and-forget (`awaitReply: false`) to every visible peer.\n- `send` with `awaitReply: false`: recipient records the incoming message but does not generate a reply.\n- `send` with `awaitReply: true`: recipient performs a no-tools ephemeral LLM turn and returns prose.\n\n## Side Effects\n- Session state\n - Reads from the process-global `AgentRegistry`.\n - Emits `irc_message` session events on recipient sessions.\n - Queues IRC custom messages into recipient persisted history after the current stream finishes.\n - For non-main recipients, forwards display-only relay observations into the main session UI; these relays are not persisted to the main agent history.\n - Subagents inherit `irc.enabled` from task executor settings.\n- User-visible prompts / interactive UI\n - IRC events render as `[IRC]` transcript lines in the TUI.\n - Auto-replies are generated from `packages/coding-agent/src/prompts/system/irc-incoming.md` and explicitly forbid tool use.\n- Background work / cancellation\n - `send` starts one background `respondAsBackground` call per target.\n - The caller's `AbortSignal` is forwarded into each background reply turn.\n- Network\n - No IRC server connection.\n - When `awaitReply: true`, the recipient may make model-provider API calls through `runEphemeralTurn`.\n- Filesystem\n - No direct filesystem writes in the tool itself.\n\n## Limits & Caps\n- Availability gates:\n - `irc.enabled` defaults to `true` in `packages/coding-agent/src/config/settings-schema.ts`.\n - Main agent tool discovery suppresses `irc` when `async.enabled` is off (`packages/coding-agent/src/tools/index.ts`).\n- Visibility scope: only peers in status `running` or `idle` are addressable via `listVisibleTo`.\n- Reply execution:\n - No tools are available in auto-reply turns (`toolChoice: \"none\"` in `runEphemeralTurn`).\n - No internal timeout, retry, backoff, rate limit, or reply length cap is defined in `irc.ts`; behavior relies on the underlying model stream and any upstream API limits.\n- Flush scheduling: deferred history injection polls every `50` ms while the recipient is still streaming (`#scheduleBackgroundExchangeFlush` in `packages/coding-agent/src/session/agent-session.ts`).\n\n## Errors\n- The tool returns text errors, not thrown exceptions, for:\n - missing registry: `IRC is unavailable in this session.`\n - missing sender id: `IRC is unavailable: caller has no agent id.`\n - missing `to`: `` `to` is required for op=\"send\". ``\n - missing `message`: `` `message` is required for op=\"send\". ``\n - unknown op: `Unknown irc op.`\n- Unknown, self-addressed, non-running, and non-idle direct targets are reported under `details.notFound` and in the text footer `Unknown / unavailable peers:`.\n- If a target has no attached session, it is treated as not found.\n- Exceptions thrown by `respondAsBackground` or `runEphemeralTurn` are caught per-target and surfaced under `details.failed` as `{ id, error }`; other recipients still complete.\n- If no target succeeds, `send` still returns normally with `No recipients received the message.` and optional `failed`/`notFound` metadata.\n\n## Notes\n- This is IRC-like naming only. There are no servers, sockets, nick registration, auth handshakes, channels beyond `all`, or commands such as join/part/topic.\n- Addressing is by exact agent id from the registry; there is no fuzzy lookup or aliasing.\n- `channels` in `list` is synthetic output: `all` plus visible peer ids. Nothing is persisted across calls as channel membership.\n- Persistence is per recipient history, not per sender history. The sender gets the tool result; the recipient later sees injected custom messages on its next turn.\n- The main UI may show IRC relays for conversations it was not part of, but those relay records are explicitly display-only.\n- Because reply generation snapshots in-flight assistant text, a recipient can answer based on partially streamed context.\n- Direct self-messaging is rejected by resolving the target as unavailable.",
76
- "tools/job.md": "# job\n\n> Wait for or cancel background jobs managed by the session async runtime.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/job.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/job.md`\n- Key collaborators:\n - `packages/coding-agent/src/async/job-manager.ts` — job registry, cancellation, delivery suppression.\n - `packages/coding-agent/src/async/support.ts` — feature gating for background jobs.\n - `packages/coding-agent/src/internal-urls/jobs-protocol.ts` — `jobs://` listing and per-job detail.\n - `packages/coding-agent/src/tools/bash.ts` — explicit async bash and auto-backgrounded bash jobs.\n - `packages/coding-agent/src/task/index.ts` — async task-job scheduling.\n - `packages/coding-agent/src/sdk.ts` — automatic follow-up delivery for unsuppressed completions.\n - `packages/coding-agent/src/config/settings-schema.ts` — `async.pollWaitDuration` options.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `poll` | `string[]` | No | Job ids to watch. If omitted and `cancel` is also omitted, the tool watches all running jobs. If provided, missing ids are silently filtered out before waiting. |\n| `cancel` | `string[]` | No | Job ids to cancel before any polling. Missing ids are reported as `not_found`; non-running ids as `already_completed`. |\n\n## Outputs\nThe tool returns one text block plus `details`.\n\n- `content[0].text`: markdown-like plain text sections assembled by `#buildResult(...)`:\n - `## Cancelled (N)` for cancel outcomes.\n - `## Completed (N)` for non-running jobs, including stored `resultText` and `errorText`.\n - `## Still Running (N)` for jobs still in `running`.\n- `details.jobs`: array of snapshots:\n - `id: string`\n - `type: \"bash\" | \"task\"`\n - `status: \"running\" | \"completed\" | \"failed\" | \"cancelled\"`\n - `label: string`\n - `durationMs: number`\n - optional `resultText`, `errorText`\n- `details.cancelled` appears only when `cancel` was passed; each item is `{ id, status }` where status is `\"cancelled\" | \"not_found\" | \"already_completed\"`.\n\nStreaming behavior:\n- During a polling wait, `execute(...)` emits `onUpdate(...)` every 500 ms with an empty text block and fresh `details.jobs` snapshots.\n- Final return is single-shot after a completion, timeout, abort, or immediate fast path.\n\nRelated read path:\n- `read jobs://` lists all current jobs.\n- `read jobs://<id>` renders one job with status, label, start time, duration, and stored result/error text.\n\n## Flow\n1. `JobTool.createIf(...)` in `packages/coding-agent/src/tools/job.ts` only exposes the tool when `isBackgroundJobSupportEnabled(...)` returns true for either `async.enabled` or `bash.autoBackground.enabled`.\n2. `execute(...)` fetches `session.asyncJobManager`. If absent, it returns `Async execution is disabled; no background jobs are available.`\n3. `cancel` ids are processed first:\n - `manager.getJob(id)` missing → `not_found`.\n - existing job with `status !== \"running\"` → `already_completed`.\n - running job → `manager.cancel(id)`, which sets `job.status = \"cancelled\"`, aborts the controller, and schedules eviction.\n4. Polling mode is chosen with `const shouldPoll = requestedPollIds !== undefined || cancelIds.length === 0`:\n - only `cancel` present → return immediately, no wait.\n - explicit `poll`, or no args at all → proceed to watch jobs.\n5. Watch set resolution:\n - explicit `poll` → map ids through `manager.getJob(...)` and drop missing ones.\n - no `poll` and no `cancel` → `manager.getRunningJobs()`.\n6. Empty watch set returns immediately:\n - if cancellations happened, return snapshots for the cancelled ids that still exist.\n - else return either `No matching jobs found for IDs: ...` or `No running background jobs to wait for.`\n7. If every watched job is already non-running, `#buildResult(...)` returns immediately without waiting.\n8. Otherwise the tool waits on `Promise.race(...)` across:\n - every watched running job's `job.promise`,\n - a timeout promise for `async.pollWaitDuration`,\n - the tool-call abort signal when present.\n9. Before waiting, it calls `manager.watchJobs(watchedJobIds)`. This suppresses automatic completion delivery for those ids while they are being watched.\n10. If `onUpdate` exists, a 500 ms interval sends progress snapshots from `#snapshotJobs(...)`; one snapshot is emitted immediately before entering the race.\n11. In `finally`, the tool always calls `manager.unwatchJobs(...)`, clears the timeout, and stops the progress interval.\n12. `#buildResult(...)` deduplicates jobs, snapshots current manager state, then calls `manager.acknowledgeDeliveries(...)` for every non-running job in the result. That suppresses later automatic follow-up delivery for the same completions and removes queued deliveries for those ids.\n13. The final text groups jobs by non-running vs still-running state. A timeout is not an error path; it simply returns the current snapshot.\n\n## Modes / Variants\n- Poll all running jobs: call with neither `poll` nor `cancel`.\n- Poll explicit ids: call with `poll` only.\n- Cancel only: call with `cancel` only; cancellations happen and the tool returns immediately.\n- Cancel then poll: call with both. Cancellations are applied first, then the tool watches the remaining resolved `poll` ids.\n- Read-only inspection outside the tool: `jobs://` and `jobs://<id>` expose the same manager state without waiting.\n\nSpawn paths that produce jobs:\n- `packages/coding-agent/src/tools/bash.ts`\n - `async: true` always registers a `type: \"bash\"` job with `AsyncJobManager.register(...)` and returns a start message.\n - auto-background mode (`bash.autoBackground.enabled`) starts the same managed job path for non-PTY commands, waits up to `min(bash.autoBackground.thresholdMs, timeoutMs - 1000)`, and if the command is still running returns a background-job start result instead of inline command output.\n- `packages/coding-agent/src/task/index.ts`\n - when `async.enabled` is on, the chosen agent is not blocking, and `tasks.length > 0`, each task item is registered as a `type: \"task\"` job.\n\nLifecycle and exact state names:\n- Conceptual scheduling path: `pending` (only task-progress bookkeeping before work starts) → `running` → `completed` / `failed`; cancellation changes a running async job to `cancelled`.\n- Exact `AsyncJob.status` values in `packages/coding-agent/src/async/job-manager.ts`: `\"running\" | \"completed\" | \"failed\" | \"cancelled\"`.\n- Exact per-task progress values in `packages/coding-agent/src/task/types.ts`: `\"pending\" | \"running\" | \"completed\" | \"failed\" | \"aborted\"`.\n\n## Side Effects\n- Filesystem\n - None in `job.ts` itself.\n - Jobs being observed may already have written artifacts/results through their own tool runtimes.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Reads and mutates `session.asyncJobManager` state.\n - `watchJobs(...)` / `unwatchJobs(...)` toggle delivery suppression for the watched ids.\n - `acknowledgeDeliveries(...)` marks completed ids as suppressed and removes queued deliveries for them.\n - `cancel(...)` aborts running jobs through each job's `AbortController`.\n- User-visible prompts / interactive UI\n - Polling emits periodic `onUpdate` snapshots every 500 ms.\n - Automatic job completion follow-ups are generated by `packages/coding-agent/src/sdk.ts` only for unsuppressed deliveries.\n- Background work / cancellation\n - Waiting uses a timeout plus optional tool-call abort signal.\n - Cancelling a job does not synchronously await teardown; it flips state, aborts, and returns control to the manager/job promise.\n\n## Limits & Caps\n- Poll wait duration comes from `async.pollWaitDuration` in `packages/coding-agent/src/config/settings-schema.ts`:\n - allowed values: `5s`, `10s`, `30s`, `1m`, `5m`\n - default: `30s`\n- Progress update cadence while polling: `PROGRESS_INTERVAL_MS = 500` in `packages/coding-agent/src/tools/job.ts`.\n- Async job retention default: `DEFAULT_RETENTION_MS = 5 * 60 * 1000` in `packages/coding-agent/src/async/job-manager.ts`.\n- Manager fallback max-running limit: `DEFAULT_MAX_RUNNING_JOBS = 15` in `packages/coding-agent/src/async/job-manager.ts`.\n- Session wiring clamps `async.maxJobs` to `1..100` before constructing the manager in `packages/coding-agent/src/sdk.ts`; settings default is `100` in `packages/coding-agent/src/config/settings-schema.ts`.\n- Async completion delivery retry backoff in `packages/coding-agent/src/async/job-manager.ts`:\n - base `500` ms\n - max `30_000` ms\n - jitter `< 200` ms\n - exponent capped at 8 doublings\n\n## Errors\n- Tool-disabled path is returned as normal text, not thrown: `Async execution is disabled; no background jobs are available.`\n- Polling a nonexistent id is not an exception:\n - with `poll` only, missing ids are dropped; if none remain the tool returns `No matching jobs found for IDs: ...`.\n - with `cancel`, each missing id is reported as `not_found` in `details.cancelled` and text.\n- Cancelling a non-running job is not an exception; it reports `already_completed` even if the actual status is `completed`, `failed`, or `cancelled`.\n- Tool-call abort during polling stops waiting and returns a final snapshot through `#buildResult(...)`; it does not cancel watched jobs.\n- Failures inside the underlying async work are stored on the job (`status: \"failed\"`, `errorText`) and reported in normal tool output, not rethrown by `job`.\n- `read jobs://<id>` missing job returns markdown content headed `# Job Not Found` rather than throwing.\n\n## Notes\n- `job` waits for the first watched running job to settle, not for all watched jobs. If others remain `running`, they are reported under `## Still Running`; the caller must invoke `job` again to continue waiting.\n- Delivery suppression is the key difference between snapshot and automatic delivery:\n - snapshots (`job`, `read jobs://`) read current manager state;\n - follow-up delivery comes from `AsyncJobManager.#enqueueDelivery(...)` and `sdk.ts` `onJobComplete`;\n - watched or acknowledged ids are suppressed via `isDeliverySuppressed(...)`.\n- `manager.cancel(id)` sets `status = \"cancelled\"` before the underlying promise settles. The job function may later populate `resultText` or `errorText`; `job-manager.ts` preserves that text but does not transition the status away from `cancelled`.\n- `jobs://` is implemented by `JobsProtocolHandler` with `immutable = true`, but each resolve call reads live manager state at access time.\n- `jobs://<id>` shows a cancellation section only when a cancelled job has `errorText`; cancelled jobs with `resultText` are not rendered with a result section there.\n- Retention eviction removes the job record, suppression flags, and watch flag together. After eviction, both `job` and `read jobs://<id>` behave as if the id never existed.\n",
76
+ "tools/job.md": "# job\n\n> Wait for or cancel background jobs managed by the session async runtime.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/job.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/job.md`\n- Key collaborators:\n - `packages/coding-agent/src/async/job-manager.ts` — job registry, cancellation, delivery suppression.\n - `packages/coding-agent/src/async/support.ts` — feature gating for background jobs.\n - `packages/coding-agent/src/internal-urls/jobs-protocol.ts` — `jobs://` listing and per-job detail.\n - `packages/coding-agent/src/tools/bash.ts` — explicit async bash and auto-backgrounded bash jobs.\n - `packages/coding-agent/src/task/index.ts` — async task-job scheduling.\n - `packages/coding-agent/src/sdk.ts` — automatic follow-up delivery for unsuppressed completions.\n - `packages/coding-agent/src/config/settings-schema.ts` — `async.pollWaitDuration` options.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `poll` | `string[]` | No | Job ids to watch. If omitted and `cancel` is also omitted, the tool watches all running jobs. If provided, missing ids are silently filtered out before waiting. |\n| `cancel` | `string[]` | No | Job ids to cancel before any polling. Missing ids are reported as `not_found`; non-running ids as `already_completed`. |\n\n## Outputs\nThe tool returns one text block plus `details`.\n\n- `content[0].text`: markdown-like plain text sections assembled by `#buildResult(...)`:\n - `## Cancelled (N)` for cancel outcomes.\n - `## Completed (N)` for non-running jobs, including stored `resultText` and `errorText`.\n - `## Still Running (N)` for jobs still in `running`.\n- `details.jobs`: array of snapshots:\n - `id: string`\n - `type: \"bash\" | \"task\"`\n - `status: \"running\" | \"completed\" | \"failed\" | \"cancelled\"`\n - `label: string`\n - `durationMs: number`\n - optional `resultText`, `errorText`\n- `details.cancelled` appears only when `cancel` was passed; each item is `{ id, status }` where status is `\"cancelled\" | \"not_found\" | \"already_completed\"`.\n\nStreaming behavior:\n- During a polling wait, `execute(...)` emits `onUpdate(...)` every 500 ms with an empty text block and fresh `details.jobs` snapshots.\n- Final return is single-shot after a completion, timeout, abort, or immediate fast path.\n\nRelated read path:\n- Reading `jobs://` lists all current jobs.\n- Reading `jobs://<id>` renders one job with status, label, start time, duration, and stored result/error text.\n\n## Flow\n1. `JobTool.createIf(...)` in `packages/coding-agent/src/tools/job.ts` only exposes the tool when `isBackgroundJobSupportEnabled(...)` returns true for either `async.enabled` or `bash.autoBackground.enabled`.\n2. `execute(...)` fetches `session.asyncJobManager`. If absent, it returns `Async execution is disabled; no background jobs are available.`\n3. `cancel` ids are processed first:\n - `manager.getJob(id)` missing → `not_found`.\n - existing job with `status !== \"running\"` → `already_completed`.\n - running job → `manager.cancel(id)`, which sets `job.status = \"cancelled\"`, aborts the controller, and schedules eviction.\n4. Polling mode is chosen with `const shouldPoll = requestedPollIds !== undefined || cancelIds.length === 0`:\n - only `cancel` present → return immediately, no wait.\n - explicit `poll`, or no args at all → proceed to watch jobs.\n5. Watch set resolution:\n - explicit `poll` → map ids through `manager.getJob(...)` and drop missing ones.\n - no `poll` and no `cancel` → `manager.getRunningJobs()`.\n6. Empty watch set returns immediately:\n - if cancellations happened, return snapshots for the cancelled ids that still exist.\n - else return either `No matching jobs found for IDs: ...` or `No running background jobs to wait for.`\n7. If every watched job is already non-running, `#buildResult(...)` returns immediately without waiting.\n8. Otherwise the tool waits on `Promise.race(...)` across:\n - every watched running job's `job.promise`,\n - a timeout promise for `async.pollWaitDuration`,\n - the tool-call abort signal when present.\n9. Before waiting, it calls `manager.watchJobs(watchedJobIds)`. This suppresses automatic completion delivery for those ids while they are being watched.\n10. If `onUpdate` exists, a 500 ms interval sends progress snapshots from `#snapshotJobs(...)`; one snapshot is emitted immediately before entering the race.\n11. In `finally`, the tool always calls `manager.unwatchJobs(...)`, clears the timeout, and stops the progress interval.\n12. `#buildResult(...)` deduplicates jobs, snapshots current manager state, then calls `manager.acknowledgeDeliveries(...)` for every non-running job in the result. That suppresses later automatic follow-up delivery for the same completions and removes queued deliveries for those ids.\n13. The final text groups jobs by non-running vs still-running state. A timeout is not an error path; it simply returns the current snapshot.\n\n## Modes / Variants\n- Poll all running jobs: call with neither `poll` nor `cancel`.\n- Poll explicit ids: call with `poll` only.\n- Cancel only: call with `cancel` only; cancellations happen and the tool returns immediately.\n- Cancel then poll: call with both. Cancellations are applied first, then the tool watches the remaining resolved `poll` ids.\n- Read-only inspection outside the tool: `jobs://` and `jobs://<id>` expose the same manager state without waiting.\n\nSpawn paths that produce jobs:\n- `packages/coding-agent/src/tools/bash.ts`\n - `async: true` always registers a `type: \"bash\"` job with `AsyncJobManager.register(...)` and returns a start message.\n - auto-background mode (`bash.autoBackground.enabled`) starts the same managed job path for non-PTY commands, waits up to `min(bash.autoBackground.thresholdMs, timeoutMs - 1000)`, and if the command is still running returns a background-job start result instead of inline command output.\n- `packages/coding-agent/src/task/index.ts`\n - when `async.enabled` is on, the chosen agent is not blocking, and `tasks.length > 0`, each task item is registered as a `type: \"task\"` job.\n\nLifecycle and exact state names:\n- Conceptual scheduling path: `pending` (only task-progress bookkeeping before work starts) → `running` → `completed` / `failed`; cancellation changes a running async job to `cancelled`.\n- Exact `AsyncJob.status` values in `packages/coding-agent/src/async/job-manager.ts`: `\"running\" | \"completed\" | \"failed\" | \"cancelled\"`.\n- Exact per-task progress values in `packages/coding-agent/src/task/types.ts`: `\"pending\" | \"running\" | \"completed\" | \"failed\" | \"aborted\"`.\n\n## Side Effects\n- Filesystem\n - None in `job.ts` itself.\n - Jobs being observed may already have written artifacts/results through their own tool runtimes.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Reads and mutates `session.asyncJobManager` state.\n - `watchJobs(...)` / `unwatchJobs(...)` toggle delivery suppression for the watched ids.\n - `acknowledgeDeliveries(...)` marks completed ids as suppressed and removes queued deliveries for them.\n - `cancel(...)` aborts running jobs through each job's `AbortController`.\n- User-visible prompts / interactive UI\n - Polling emits periodic `onUpdate` snapshots every 500 ms.\n - Automatic job completion follow-ups are generated by `packages/coding-agent/src/sdk.ts` only for unsuppressed deliveries.\n- Background work / cancellation\n - Waiting uses a timeout plus optional tool-call abort signal.\n - Cancelling a job does not synchronously await teardown; it flips state, aborts, and returns control to the manager/job promise.\n\n## Limits & Caps\n- Poll wait duration comes from `async.pollWaitDuration` in `packages/coding-agent/src/config/settings-schema.ts`:\n - allowed values: `5s`, `10s`, `30s`, `1m`, `5m`\n - default: `30s`\n- Progress update cadence while polling: `PROGRESS_INTERVAL_MS = 500` in `packages/coding-agent/src/tools/job.ts`.\n- Async job retention default: `DEFAULT_RETENTION_MS = 5 * 60 * 1000` in `packages/coding-agent/src/async/job-manager.ts`.\n- Manager fallback max-running limit: `DEFAULT_MAX_RUNNING_JOBS = 15` in `packages/coding-agent/src/async/job-manager.ts`.\n- Session wiring clamps `async.maxJobs` to `1..100` before constructing the manager in `packages/coding-agent/src/sdk.ts`; settings default is `100` in `packages/coding-agent/src/config/settings-schema.ts`.\n- Async completion delivery retry backoff in `packages/coding-agent/src/async/job-manager.ts`:\n - base `500` ms\n - max `30_000` ms\n - jitter `< 200` ms\n - exponent capped at 8 doublings\n\n## Errors\n- Tool-disabled path is returned as normal text, not thrown: `Async execution is disabled; no background jobs are available.`\n- Polling a nonexistent id is not an exception:\n - with `poll` only, missing ids are dropped; if none remain the tool returns `No matching jobs found for IDs: ...`.\n - with `cancel`, each missing id is reported as `not_found` in `details.cancelled` and text.\n- Cancelling a non-running job is not an exception; it reports `already_completed` even if the actual status is `completed`, `failed`, or `cancelled`.\n- Tool-call abort during polling stops waiting and returns a final snapshot through `#buildResult(...)`; it does not cancel watched jobs.\n- Failures inside the underlying async work are stored on the job (`status: \"failed\"`, `errorText`) and reported in normal tool output, not rethrown by `job`.\n- Reading `jobs://<id>` for a missing job returns markdown content headed `# Job Not Found` rather than throwing.\n\n## Notes\n- `job` waits for the first watched running job to settle, not for all watched jobs. If others remain `running`, they are reported under `## Still Running`; the caller must invoke `job` again to continue waiting.\n- Delivery suppression is the key difference between snapshot and automatic delivery:\n - snapshots (`job`, reads of `jobs://`) read current manager state;\n - follow-up delivery comes from `AsyncJobManager.#enqueueDelivery(...)` and `sdk.ts` `onJobComplete`;\n - watched or acknowledged ids are suppressed via `isDeliverySuppressed(...)`.\n- `manager.cancel(id)` sets `status = \"cancelled\"` before the underlying promise settles. The job function may later populate `resultText` or `errorText`; `job-manager.ts` preserves that text but does not transition the status away from `cancelled`.\n- `jobs://` is implemented by `JobsProtocolHandler` with `immutable = true`, but each resolve call reads live manager state at access time.\n- `jobs://<id>` shows a cancellation section only when a cancelled job has `errorText`; cancelled jobs with `resultText` are not rendered with a result section there.\n- Retention eviction removes the job record, suppression flags, and watch flag together. After eviction, both `job` and reads of `jobs://<id>` behave as if the id never existed.\n",
77
77
  "tools/lsp.md": "# lsp\n\n> Query language servers for diagnostics, navigation, symbols, renames, code actions, capabilities, and raw requests.\n\n## Source\n- Entry: `packages/coding-agent/src/lsp/index.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/lsp.md`\n- Key collaborators:\n - `packages/coding-agent/src/lsp/client.ts` — client process lifecycle and JSON-RPC\n - `packages/coding-agent/src/lsp/config.ts` — config loading, auto-detect, server selection\n - `packages/coding-agent/src/lsp/lspmux.ts` — optional `lspmux` command wrapping\n - `packages/coding-agent/src/lsp/edits.ts` — apply `WorkspaceEdit` and text edits\n - `packages/coding-agent/src/lsp/utils.ts` — URI conversion, symbol resolution, formatting, glob expansion\n - `packages/coding-agent/src/lsp/types.ts` — tool schema and protocol types\n - `packages/coding-agent/src/lsp/clients/index.ts` — custom linter client cache/factory\n - `packages/coding-agent/src/lsp/clients/lsp-linter-client.ts` — LSP-backed linter adapter\n - `packages/coding-agent/src/lsp/clients/biome-client.ts` — Biome CLI diagnostics/formatting adapter\n - `packages/coding-agent/src/lsp/clients/swiftlint-client.ts` — SwiftLint CLI diagnostics adapter\n - `packages/coding-agent/src/tools/index.ts` — tool registration and `lsp.enabled` gating\n - `packages/coding-agent/src/tools/tool-timeouts.ts` — timeout defaults and clamping\n - `packages/coding-agent/src/lsp/defaults.json` — built-in server definitions for auto-detect\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `action` | string enum | Yes | One of `diagnostics`, `definition`, `references`, `hover`, `symbols`, `rename`, `rename_file`, `code_actions`, `type_definition`, `implementation`, `status`, `reload`, `capabilities`, `request`. |\n| `file` | string | No | File path; for `diagnostics` also a glob; for workspace forms use `\"*\"`; for `rename_file` this is the source path. |\n| `line` | number | No | 1-indexed line number for position-based actions. Defaults to `1` on the single-file action path. |\n| `symbol` | string | No | Substring used to resolve the column on `line`. Supports `name#N` occurrence selectors; `N` is 1-indexed and defaults to `1`. |\n| `query` | string | No | Workspace symbol query, code-action selector/filter, or LSP method name for `action=request`. |\n| `new_name` | string | No | Required for `rename` and `rename_file`. |\n| `apply` | boolean | No | For `rename`/`rename_file`, apply unless explicitly `false`. For `code_actions`, list unless explicitly `true`. |\n| `timeout` | number | No | Seconds, clamped by `clampTimeout(\"lsp\", ...)` to `5..60`, default `20`. |\n| `payload` | string | No | JSON string for `action=request`; overrides auto-built params. |\n\n## Outputs\n- Single-shot `AgentToolResult`.\n- `content` is always one text block: `[{ type: \"text\", text: string }]`.\n- `details` is `LspToolDetails`: `action`, `success`, optional `serverName`, optional original `request`.\n- No streaming updates.\n- No artifact URIs or background jobs.\n- Many validation failures are returned as ordinary text results with `details.success: false`; aborts throw `ToolAbortError` instead.\n\n## Flow\n1. `packages/coding-agent/src/tools/index.ts` registers `lsp: LspTool.createIf`; session creation also gates it behind `session.enableLsp !== false` and `settings.get(\"lsp.enabled\")`.\n2. `LspTool.execute()` in `packages/coding-agent/src/lsp/index.ts` clamps `timeout` with `clampTimeout(\"lsp\", ...)`, builds an `AbortSignal.timeout(...)`, and combines it with the caller signal.\n3. `getConfig()` loads and caches `LspConfig` per cwd, applies idle-timeout config via `setIdleTimeout()`, and reuses the cached config on later calls.\n4. Config loading in `packages/coding-agent/src/lsp/config.ts` merges `defaults.json` with JSON/YAML overrides from project, project config dirs, user config dirs, plugin roots, and home; if there are no overrides it auto-detects servers from root markers plus executable discovery.\n5. Server routing uses `getServersForFile()` / `getServerForFile()` from `config.ts`: extension or basename match, then sort primary servers before linters. `index.ts` further filters custom linter clients out of navigation/refactor paths with `getLspServersForFile()` / `getLspServerForFile()`.\n6. `getOrCreateClient()` in `client.ts` creates one process per `command:cwd`, optionally wraps supported commands with `lspmux`, spawns the server, starts the background message reader, sends `initialize`, stores server capabilities, then sends `initialized`.\n7. The message reader in `client.ts` parses LSP frames, resolves pending requests, caches `publishDiagnostics`, tracks `$/progress` tokens for project-load completion, answers `workspace/configuration`, and applies `workspace/applyEdit` requests through `applyWorkspaceEdit()`.\n8. File-scoped actions call `ensureFileOpen()` before requests. Column resolution uses `resolveSymbolColumn()` from `utils.ts`: read the target file, pick first non-whitespace when `symbol` is omitted, otherwise find the exact or case-insensitive match on the target line and honor `#N` occurrence selectors.\n9. Actions dispatch in `LspTool.execute()` through dedicated branches: workspace-only branches (`status`, some `diagnostics`, workspace `symbols`, workspace `reload`, `capabilities`, `request`) run before the single-file switch; all other single-file actions share one client lookup and `switch(action)`.\n10. Requests go through `sendRequest()` in `client.ts`, which allocates an incrementing JSON-RPC id, installs abort and timeout handling, sends `$/cancelRequest` on abort, and rejects on timeout or process exit.\n11. Actions that return edits either preview with `formatWorkspaceEdit()` or apply with `applyWorkspaceEdit()` from `edits.ts`; `rename_file` also performs the filesystem rename and then sends `workspace/didRenameFiles`.\n12. Non-abort failures inside the single-file action block are converted to `LSP error: ...`; many precondition failures return explicit text without throwing.\n\n## Modes / Variants\n### Routing and workspace scope\n- `file: \"*\"` is only special for `diagnostics`, `symbols`, and `reload`.\n- `status` ignores `file`.\n- `capabilities` with omitted `file` or `\"*\"` inspects all non-custom LSP servers; with a concrete file it scopes to matching non-custom servers.\n- `request` with omitted `file` or `\"*\"` chooses the first available non-custom LSP server; with a concrete file it chooses that file's primary non-linter server.\n- `rename_file` sends `workspace/willRenameFiles` and `workspace/didRenameFiles` to every non-custom LSP server from `getLspServers(config)`, not just one file-scoped server.\n- Diagnostics are the only tool action that queries both normal LSP servers and custom linter clients (`BiomeClient`, `SwiftLintClient`, or `LspLinterClient`).\n\n### `diagnostics`\n**Inputs**\n- Required: `file`, unless using workspace mode with `file: \"*\"`.\n- Optional: `timeout`.\n\n**Execution**\n- `file: \"*\"`: `runWorkspaceDiagnostics()` detects project type from root markers and runs one subprocess command: Rust `cargo check --message-format=short`, TypeScript `npx tsc --noEmit`, Go `go build ./...`, Python `pyright`.\n- Concrete file or glob: `resolveDiagnosticTargets()` treats non-globs as one target, otherwise expands a `Bun.Glob` up to `MAX_GLOB_DIAGNOSTIC_TARGETS`.\n- Per file, every matching server runs: custom clients call `lint(file)`; real LSP servers optionally wait for project load, capture `diagnosticsVersion`, `refreshFile()`, then `waitForDiagnostics()` for fresh `publishDiagnostics`.\n- Results are deduplicated by range+message and severity-sorted.\n\n**Output text**\n- Single target with no issues: `OK`.\n- Single target with issues: `<summary>:\\n<grouped diagnostics>`.\n- Batch/glob target: one section per file, plus an initial truncation warning when the glob exceeds the file cap.\n- Workspace mode: `Workspace diagnostics (<detected description>):\\n<command output>`.\n\n### `definition`\n**Inputs**\n- Required: `file`.\n- Optional: `line`, `symbol`, `timeout`.\n\n**Execution**\n- Sends `textDocument/definition` with `{ textDocument, position }`.\n- Accepts `Location`, `Location[]`, `LocationLink`, or `LocationLink[]`; `normalizeLocationResult()` converts `LocationLink` to `targetSelectionRange ?? targetRange`.\n- Waits for project load before the request.\n\n**Output text**\n- `No definition found` or `Found N definition(s):` followed by `file:line:col` and one context line above/below each location.\n\n### `type_definition`\nSame as `definition`, but sends `textDocument/typeDefinition` and reports `type definition(s)`.\n\n### `implementation`\nSame as `definition`, but sends `textDocument/implementation` and reports `implementation(s)`.\n\n### `references`\n**Inputs**\n- Required: `file`.\n- Optional: `line`, `symbol`, `timeout`.\n\n**Execution**\n- Sends `textDocument/references` with `includeDeclaration: true`.\n- For project-aware servers, retries up to `REFERENCES_RETRY_COUNT` times when the only hit is the queried declaration; between retries it waits for project load and sleeps `REFERENCES_RETRY_DELAY_MS`.\n- First `REFERENCE_CONTEXT_LIMIT` references include surrounding context; the rest are location-only.\n\n**Output text**\n- `No references found` or `Found N reference(s):` with contextual entries first, then `... M additional reference(s) shown without context` when truncated.\n\n### `hover`\n**Inputs**\n- Required: `file`.\n- Optional: `line`, `symbol`, `timeout`.\n\n**Execution**\n- Sends `textDocument/hover`.\n- `extractHoverText()` flattens strings, markup content, marked-string objects, or arrays into plain text.\n\n**Output text**\n- `No hover information` or the extracted hover text.\n\n### `symbols`\n**Inputs**\n- Workspace mode: `file: \"*\"` or omitted file on the early workspace branch, plus required `query`.\n- Document mode: required `file`.\n- Optional: `timeout`.\n\n**Execution**\n- Workspace mode sends `workspace/symbol` to every non-custom LSP server, post-filters matches with `filterWorkspaceSymbols()`, deduplicates with `dedupeWorkspaceSymbols()`, then truncates to `WORKSPACE_SYMBOL_LIMIT`.\n- Document mode sends `textDocument/documentSymbol` to the primary server. If the first item has `selectionRange`, it formats hierarchical `DocumentSymbol`s; otherwise it formats flat `SymbolInformation`s.\n\n**Output text**\n- Workspace mode: `Found N symbol(s) matching \"query\":` plus formatted `name @ file:line:col`, with an omission line when over the limit.\n- Document mode: `Symbols in <file>:` plus hierarchical or flat symbol lines.\n\n### `rename`\n**Inputs**\n- Required: `file`, `new_name`.\n- Optional: `line`, `symbol`, `apply`, `timeout`.\n\n**Execution**\n- Waits for project load, sends `textDocument/rename`, receives a `WorkspaceEdit`.\n- `apply !== false` applies edits immediately with `applyWorkspaceEdit()`.\n- `apply === false` renders a preview with `formatWorkspaceEdit()`.\n\n**Output text**\n- `Rename returned no edits`, `Applied rename:` plus applied change lines, or `Rename preview:` plus summarized edits.\n\n### `rename_file`\n**Inputs**\n- Required: `file` source path, `new_name` destination path.\n- Optional: `apply`, `timeout`.\n\n**Execution**\n- Resolves absolute source and destination, rejects identical paths, missing source, existing destination, empty rename set, or directories with more than `MAX_RENAME_PAIRS` files.\n- `enumerateRenamePairs()` returns one `{oldUri,newUri}` pair for a file or walks every regular file in a directory tree.\n- Sends `workspace/willRenameFiles` with `{ files: pairs }` to every non-custom LSP server; collects returned `WorkspaceEdit`s and server notes.\n- Preview mode (`apply === false`) only formats those edits.\n- Apply mode runs each returned `WorkspaceEdit`, renames the source path on disk, sends `textDocument/didClose` for every renamed open file, deletes those `openFiles` entries, then sends `workspace/didRenameFiles`.\n\n**Output text**\n- Preview: `Rename preview: <file-count label> → <dest>` plus per-server edit summaries and optional server notes.\n- Apply: `Renamed <file-count label> → <dest>` plus applied edit summaries, filesystem rename line, and optional server notes.\n\n### `code_actions`\n**Inputs**\n- Required: `file`.\n- Optional: `line`, `symbol`, `query`, `apply`, `timeout`.\n\n**Execution**\n- Reads cached diagnostics for the open URI from `client.diagnostics` and sends `textDocument/codeAction` for a zero-width range at the resolved position.\n- When `apply !== true`, `query` is passed as `context.only: [query]`; this is a server-side kind filter.\n- When `apply === true`, `query` becomes a required client-side selector: either a zero-based numeric index or a case-insensitive substring of the action title.\n- Applying a `CodeAction` uses `applyCodeAction()`: optionally `codeAction/resolve`, then `applyWorkspaceEdit(edit)`, then optional `workspace/executeCommand`.\n- Applying a bare `Command` only runs `workspace/executeCommand`.\n\n**Output text**\n- List mode: `N code action(s):` plus `index: [kind] title` lines.\n- Apply mode success: `Applied \"title\":` plus `Workspace edit:` and/or `Executed command(s):` sections.\n- Apply mode miss: `No code action matches \"query\". Available actions:`.\n- Apply mode with no edit/command: `Action \"title\" has no workspace edit or command to apply`.\n\n### `status`\n**Inputs**\n- None.\n\n**Execution**\n- Reads configured servers from cached `LspConfig`, not `getActiveClients()`.\n- Calls `detectLspmux()` and appends status text when `lspmux` is installed.\n\n**Output text**\n- `Active language servers: ...` or `No language servers configured for this project`, optionally followed by `lspmux: active (multiplexing enabled)` or `lspmux: installed but server not running`.\n\n### `reload`\n**Inputs**\n- Workspace mode: `file: \"*\"` or omitted `file`.\n- Single-file mode: required `file`.\n- Optional: `timeout`.\n\n**Execution**\n- Workspace mode reloads every non-custom LSP server.\n- Single-file mode reloads the primary server for that file.\n- `reloadServer()` tries `rust-analyzer/reloadWorkspace`, then `workspace/didChangeConfiguration` with `{ settings: {} }`; if neither works it kills the process so the next request cold-starts a new client.\n\n**Output text**\n- One line per server: `Reloaded <server>`, `Restarted <server>`, or `Failed to reload <server>: ...`.\n\n### `capabilities`\n**Inputs**\n- Optional: `file`, `timeout`.\n\n**Execution**\n- With a concrete `file`, inspects matching non-custom servers for that file.\n- With omitted `file` or `\"*\"`, inspects every non-custom configured server.\n- Starts servers as needed and dumps `client.serverCapabilities ?? {}` as pretty JSON.\n\n**Output text**\n- Per server: `<server>:` followed by indented `capabilities: { ... }`, or `<server>: failed to start (...)`.\n\n### `request`\n**Inputs**\n- Required: `query` method name.\n- Optional: `file`, `line`, `symbol`, `payload`, `timeout`.\n\n**Execution**\n- Chooses one non-custom server: file-scoped primary server, otherwise the first configured non-custom server.\n- Param building precedence:\n 1. If `payload` is present, parse JSON and use it verbatim.\n 2. Else if `file` is concrete and `line` is present, build `{ textDocument: { uri }, position: { line: line - 1, character } }` using `resolveSymbolColumn()`.\n 3. Else if `file` is concrete, build `{ textDocument: { uri } }`.\n 4. Else use `{}`.\n- Opens the file first when `file` is concrete.\n\n**Output text**\n- Success: `<server> ← <method>:\\n<formatted result>`, where non-string results are `JSON.stringify(..., null, 2)` and nullish values become `null`.\n- Failure: `LSP error from <server> on <method>: ...`.\n\n## Side Effects\n- Filesystem\n - Reads config files, target files, and root markers.\n - `rename` and `code_actions` may edit/create/delete/rename files via `applyWorkspaceEdit()`.\n - `rename_file` always renames the source path on disk in apply mode.\n - Server-initiated `workspace/applyEdit` requests also mutate files through `applyWorkspaceEdit()`.\n- Network\n - None directly; communication is local stdio JSON-RPC to subprocesses.\n- Subprocesses / native bindings\n - Spawns language servers with `ptree.spawn()`.\n - Workspace diagnostics spawns `cargo`, `npx`, `go`, or `pyright`.\n - `BiomeClient` and `SwiftLintClient` spawn CLI tools.\n - Optional `lspmux` detection spawns `lspmux status`; supported servers may be wrapped through `lspmux client`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Caches config per cwd in `configCache`.\n - Caches LSP clients per `command:cwd`, with `pendingRequests`, `diagnostics`, `openFiles`, `serverCapabilities`, and project-load state.\n - Caches custom linter clients by `serverName:cwd`.\n - Updates client `lastActivity`; optional idle-timeout cleanup is driven by `setIdleTimeout()`.\n- Background work / cancellation\n - Every request has an abortable timeout signal.\n - Aborting an in-flight LSP request sends `$/cancelRequest`.\n - Background message readers persist for each live client until process exit/shutdown.\n\n## Limits & Caps\n- Tool timeout clamp: default `20`, min `5`, max `60` seconds — `TOOL_TIMEOUTS.lsp` in `packages/coding-agent/src/tools/tool-timeouts.ts`.\n- LSP request default timeout inside `sendRequest()`: `30_000ms` — `DEFAULT_REQUEST_TIMEOUT_MS` in `packages/coding-agent/src/lsp/client.ts`.\n- Warmup initialize timeout default: `5_000ms` — `WARMUP_TIMEOUT_MS` in `packages/coding-agent/src/lsp/client.ts`.\n- Project-load wait fallback: `15_000ms` — `PROJECT_LOAD_TIMEOUT_MS` in `packages/coding-agent/src/lsp/client.ts`.\n- Idle-client sweep interval when enabled: `60_000ms` — `IDLE_CHECK_INTERVAL_MS` in `packages/coding-agent/src/lsp/client.ts`.\n- Diagnostic message output cap: first `50` messages — `DIAGNOSTIC_MESSAGE_LIMIT` in `packages/coding-agent/src/lsp/index.ts`.\n- Single-file diagnostics wait: `3_000ms` — `SINGLE_DIAGNOSTICS_WAIT_TIMEOUT_MS`.\n- Batch/glob diagnostics wait per file: `400ms` — `BATCH_DIAGNOSTICS_WAIT_TIMEOUT_MS`.\n- Glob diagnostic target cap: first `20` matches — `MAX_GLOB_DIAGNOSTIC_TARGETS`.\n- Workspace symbol cap: first `200` entries — `WORKSPACE_SYMBOL_LIMIT`.\n- Reference context cap: first `50` references include source context — `REFERENCE_CONTEXT_LIMIT`.\n- References retry count: `2` retries, `250ms` backoff — `REFERENCES_RETRY_COUNT`, `REFERENCES_RETRY_DELAY_MS`.\n- Directory rename cap: `1_000` file pairs — `MAX_RENAME_PAIRS`.\n- `detectLspmux()` state cache TTL: `5 * 60 * 1000ms`; liveness check timeout: `1_000ms` — `STATE_CACHE_TTL_MS`, `LIVENESS_TIMEOUT_MS` in `packages/coding-agent/src/lsp/lspmux.ts`.\n- Workspace diagnostics output cap: first `50` lines from the subprocess.\n\n## Errors\n- Missing or invalid inputs are usually returned as text with `details.success: false`, not thrown:\n - missing `file`/`query`/`new_name`\n - invalid JSON in `payload`\n - no matching server\n - invalid `rename_file` source/destination conditions\n- `resolveSymbolColumn()` throws explicit errors for missing files, missing symbols, and out-of-bounds `#N` selectors; these surface as `LSP error: ...` or request-specific error text.\n- `sendRequest()` rejects on timeout with `LSP request <method> timed out after <ms>ms`.\n- Client process exit rejects all pending requests with an exit-code/stderr error assembled in `getOrCreateClient()`.\n- Single-file action failures inside the main `try` become `LSP error: <message>`.\n- `request` has its own error envelope: `LSP error from <server> on <method>: <message>`.\n- Some server failures are intentionally softened:\n - diagnostics continue when one server fails\n - `rename_file` suppresses `workspace/willRenameFiles` “method not found” errors and records other server errors as notes\n - `code_actions` ignores `codeAction/resolve` failures and applies unresolved actions when possible\n- Aborts are not converted to text: `ToolAbortError` is rethrown.\n\n## Notes\n- `status` reports configured/available servers from `LspConfig`, not currently active client processes from `getActiveClients()`.\n- `getLspServerForFile()` excludes `createClient` adapters and linter-only servers; navigation/refactor actions never target Biome/SwiftLint custom clients.\n- `getServersForFile()` matches both file extensions and exact basenames from `fileTypes`; config can target names like `Dockerfile` if present.\n- `symbol` matching is exact first, then case-insensitive, and falls back to the Nth occurrence on the specified line only; it never scans other lines.\n- `code_actions` uses `query` in two different ways: server-side `context.only` filter in list mode, client-side title/index selector in apply mode.\n- `rename` and `rename_file` default to apply. Preview requires `apply: false`.\n- `request` with `file: \"*\"` is treated the same as omitted `file`: it does not build workspace-specific params.\n- `reload` does not recreate a client immediately after killing it; the next request triggers reinitialization.\n- `workspace/applyEdit` can apply edits initiated by the server outside the direct tool action result path.\n- `detectLspmux()` can be disabled with `PI_DISABLE_LSPMUX=1`; only `rust-analyzer` is in `DEFAULT_SUPPORTED_SERVERS`.\n- `configCache` is per-process and never auto-invalidated; config changes require a fresh process to be observed by `getConfig()` callers.",
78
- "tools/read.md": "# read\n\n> Read files, directories, archives, SQLite databases, internal resources, images, documents, and URLs through one `path` string.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/read.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/read.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/path-utils.ts` — split `path` from trailing selectors; normalize local paths.\n - `packages/coding-agent/src/tools/archive-reader.ts` — detect `archive.ext:inner/path`, index archives, list/read entries.\n - `packages/coding-agent/src/tools/sqlite-reader.ts` — detect SQLite targets, parse selectors, render tables.\n - `packages/coding-agent/src/tools/fetch.ts` — URL parsing, fetch/render pipeline, URL cache/artifacts.\n - `packages/coding-agent/src/internal-urls/router.ts` — resolve `agent://`, `artifact://`, `jobs://`, `local://`, `mcp://`, `memory://`, `pi://`, `rule://`, `skill://`.\n - `packages/coding-agent/src/edit/notebook.ts` — convert `.ipynb` to editable `# %% [...] cell:N` text.\n - `packages/coding-agent/src/utils/file-display-mode.ts` — decide hashline vs line-number vs raw display.\n - `packages/coding-agent/src/workspace-tree.ts` — render directory trees.\n - `packages/coding-agent/src/edit/file-read-cache.ts` — cache read lines for later hashline edit recovery.\n - `packages/coding-agent/src/tools/index.ts` — registers `read: s => new ReadTool(s)`.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `path` | `string` | Yes | Filesystem path, internal URL, or web URL. May end with a trailing selector such as `:50-100` or `:raw`. |\n\n### Selector grammar\n\nFor normal file-like reads, `splitPathAndSel()` in `packages/coding-agent/src/tools/path-utils.ts` recognizes the final suffix only when it matches one of these forms:\n\n| Suffix | Meaning |\n| --- | --- |\n| `:raw` | Raw/verbatim mode. Disables structural summaries and line prefixes. |\n| `:N` / `:LN` | Start at 1-indexed line `N`, open-ended. |\n| `:A-B` / `:LA-LB` | Inclusive 1-indexed line range. |\n| `:A+C` / `:LA+LC` | `C` lines starting at `A`; tool converts this to end line `A + C - 1`. |\n| `:range:raw` or `:raw:range` | Same line selection, but raw output. |\n\nValidation in `parseLineRangeChunk()`:\n- line numbers are 1-indexed; `:0` throws.\n- `+` counts must be `>= 1`.\n- `-` end must be `>= start`.\n\nSelector parsing intentionally falls through for unrecognized trailing `:...`; archive and SQLite paths consume their own colon syntax.\n\nURL selectors are parsed separately in `packages/coding-agent/src/tools/fetch.ts` and support only `:raw`, `:N`, `:A-B`, and `:A+C` — no optional `L` prefix there.\n\n## Outputs\n- Single-shot `AgentToolResult` built through `toolResult()` in `packages/coding-agent/src/tools/tool-result.ts`.\n- `content` is usually one text block. Image reads may return `[text, image]`.\n- `details` is path-dependent. `ReadToolDetails` may include:\n - `kind: \"file\" | \"url\"` (URL path uses `kind: \"url\"`; file reads usually omit `kind`)\n - `isDirectory`\n - `resolvedPath`\n - `suffixResolution`\n - URL fields: `url`, `finalUrl`, `contentType`, `method`, `notes`\n - `truncation`\n - `displayContent` (unprefixed text + starting line for TUI rendering)\n - `summary` (`lines`, `elidedSpans`) for structural summaries\n - `meta` from `packages/coding-agent/src/tools/output-meta.ts`\n- `details.meta.source` is set to the backing path, URL, or internal URL.\n- `details.meta.truncation` carries shown range, total lines/bytes, next offset, and optional `artifactId` for cached URL output.\n- Directory/archive listings and SQLite table lists also set `details.meta.limits` when list limits trigger.\n\n## Flow\n1. `ReadTool.execute()` accepts `{ path }`. `file://...` inputs are expanded first with `expandPath()`.\n2. It tries URL handling first via `parseReadUrlTarget()` from `packages/coding-agent/src/tools/fetch.ts`.\n - Plain URL reads call `executeReadUrl()`.\n - URL reads with line selectors load or refresh the URL cache with `loadReadUrlCacheEntry()` and paginate the cached text locally with `#buildInMemoryTextResult()`.\n3. If not a web URL, it checks `session.internalRouter.canHandle(...)`.\n - Internal URLs are resolved with `internalRouter.resolve()`.\n - `agent://` query extraction (`/path` or `?q=`) bypasses pagination and returns the extracted content directly.\n - Other internal resources are paginated in-memory by `#buildInMemoryTextResult()`.\n4. It tries archive resolution next with `#resolveArchiveReadPath()`.\n - `parseArchivePathCandidates()` scans for `.tar`, `.tar.gz`, `.tgz`, or `.zip` anywhere before `:sub/path`.\n - On success, `#readArchive()` either lists a directory or decodes an entry as UTF-8 text.\n5. It tries SQLite resolution with `#resolveSqliteReadPath()`.\n - `parseSqlitePathCandidates()` scans for `.sqlite`, `.sqlite3`, `.db`, `.db3` before any `:table`, `:key`, or `?query` suffix.\n - `#readSqlite()` dispatches on `parseSqliteSelector()`.\n6. Otherwise it treats the input as a local filesystem path.\n - `resolveReadPath()` expands `~`, resolves relative to session cwd, treats bare `/` as session cwd, and retries macOS screenshot/NFD/curly-quote variants.\n - If the path does not exist, `findUniqueSuffixMatch()` does a workspace glob-based unique suffix lookup (skipped for remote mounts).\n7. Directories go through `#readDirectory()`.\n8. Non-directories branch by content type:\n - image metadata / inline image\n - editable notebook text\n - markit-converted document\n - structural summary for parseable code/prose\n - streamed text/line-range read\n9. Local text reads are streamed by `streamLinesFromFile()` rather than loading the whole file. The tool adds up to 3 lines of context before/after explicit bounded ranges.\n10. Non-empty contiguous local reads are recorded into `getFileReadCache(session)` for later hashline edit recovery.\n11. If suffix resolution happened, the first text block is prefixed with `[Path '...' not found; resolved to '...' via suffix match]`.\n\n## Modes / Variants\n\n### Local text files\n- No selector: if summarization is enabled and the file is small enough, `#trySummarize()` calls `summarizeCode()`.\n - Guards: file size `<= 2 MiB` (`MAX_SUMMARY_BYTES`), line count `<= 20_000` (`MAX_SUMMARY_LINES`).\n - Summary output keeps selected declarations and replaces elided spans with `...`.\n - When an elided block sits between matching brace lines, `#renderSummary()` may merge them into one anchored line rather than emitting separate opener/closer lines.\n- Explicit selector or summarization miss: streamed text read.\n - Default open-ended limit is `min(session setting read.defaultLimit, DEFAULT_MAX_LINES)`.\n - Explicit ranges expand by `RANGE_CONTEXT_LINES = 3` on the constrained sides only.\n - Non-raw output uses `resolveFileDisplayMode()`:\n - hashline anchors when edit mode is hashline, read is not raw, source is mutable, edit tool exists, and `readHashLines !== false`\n - otherwise optional line numbers when `readLineNumbers === true`\n - raw mode suppresses both\n- Prefix format in hashline mode is `lineNumber + 2-char line hash + \"|\"`, e.g. `41th|def alpha():`, from `formatHashLine()` in `packages/coding-agent/src/hashline/hash.ts`.\n- Those anchors are what the `edit`/hashline path consumes later; immutable sources and `:raw` intentionally suppress them.\n\n### Directory listings\n- `#readDirectory()` calls `buildDirectoryTree()` with:\n - `maxDepth = 2`\n - `perDirLimit = 12`\n - `rootLimit = null`\n - `lineCap = limit` when a line selector was present, else unlimited at this layer\n- `buildDirectoryTree()` sorts siblings by recency, shows file sizes and relative ages, and may mark `limits.resultLimit` when the tree truncates.\n- Empty directories render as `(empty directory)`.\n\n### Archives\n- Supported archive containers: `.tar`, `.tar.gz`, `.tgz`, `.zip`.\n- Syntax: `archive.ext`, `archive.ext:path/inside`, `archive.ext:path/inside:50-60`.\n- `openArchive()` reads the whole archive into memory, then:\n - tar/tgz uses `new Bun.Archive(bytes)`\n - zip uses `fflate.unzipSync()`\n- Archive paths normalize `/`, drop `.` segments, and reject `..`.\n- Directory reads list immediate children; files show `name` plus ` (size)` when size > 0.\n- Directory listing default limit is `500` entries in `#readArchiveDirectory()`.\n- File entries are UTF-8 decoded. Non-UTF-8 entries return `[Cannot read binary archive entry '...' (...)]` instead of bytes.\n- Text archive entries reuse the normal in-memory pagination/anchoring path.\n\n### SQLite databases\n- Database detection requires both a matching extension and a valid SQLite file header (`isSqliteFile()`).\n- Selector forms from `parseSqliteSelector()`:\n\n#### `db.sqlite`\n- `kind: \"list\"`\n- Lists non-`sqlite_%` tables with row counts.\n- `#readSqlite()` caps the rendered list to `500` tables via `applyListLimit()`.\n\n#### `db.sqlite:table`\n- `kind: \"schema\"`\n- Returns `sqlite_master.sql` plus sample rows.\n- Sample size is `DEFAULT_SCHEMA_SAMPLE_LIMIT = 5`.\n\n#### `db.sqlite:table:key`\n- `kind: \"row\"`\n- Resolves by primary key when the table has exactly one PK column; otherwise falls back to `rowid` lookup.\n- No query parameters allowed on row lookups.\n\n#### `db.sqlite:table?limit=...&offset=...&order=...&where=...`\n- `kind: \"query\"`\n- Defaults: `limit = 20`, `offset = 0`.\n- `limit` is capped at `500`.\n- `order` accepts `column` or `column:asc|desc` and must name an existing column.\n- `where` is accepted only after `validateWhereClause()` rejects comments, semicolons, and control keywords like `LIMIT`, `OFFSET`, `UNION`, `ATTACH`, `PRAGMA`.\n- Unknown query parameters throw.\n\n#### `db.sqlite?q=SELECT ...`\n- `kind: \"raw\"`\n- Cannot be combined with table selectors or any other query param.\n- Empty `q` throws.\n- `executeReadQuery()` runs `db.prepare(sql).all()` and rejects bound parameters; it does not verify that the SQL starts with `SELECT`.\n\n- Rendering caps in `packages/coding-agent/src/tools/sqlite-reader.ts`:\n - ASCII table width `120` (`MAX_RENDER_WIDTH`)\n - per-column width `40` (`MAX_COLUMN_WIDTH`)\n- `#readSqlite()` opens Bun SQLite in `{ readonly: true, strict: true }` and sets `PRAGMA busy_timeout = 3000`.\n\n### Documents\n- `CONVERTIBLE_EXTENSIONS` in `packages/coding-agent/src/tools/read.ts` covers `.pdf`, `.doc`, `.docx`, `.ppt`, `.pptx`, `.xls`, `.xlsx`, `.rtf`, `.epub`.\n- `convertFileWithMarkit()` converts the file to text/markdown.\n- Converted output is then head-truncated with normal shared limits; there is no line selector support inside the source document before conversion.\n- Conversion failures return a text block like `[Cannot read .pdf file: ...]`.\n\n### Jupyter notebooks\n- `.ipynb` goes through `readEditableNotebookText()` unless `:raw` was requested.\n- Output is editable plain text with markers like:\n\n```text\n# %% [code] cell:0\n...\n```\n\n- Raw mode bypasses that conversion and falls back to file-text reading.\n\n### Images\n- Image detection is metadata-based (`readImageMetadata()`).\n- Max accepted image size is `20 MiB` (`MAX_IMAGE_INPUT_BYTES`, re-exported as `MAX_IMAGE_SIZE`). Larger files throw.\n- If `inspect_image.enabled` is true, `read` returns metadata only (MIME, bytes, dimensions, channels, alpha) plus a suggestion to call `inspect_image`.\n- Otherwise it calls `loadImageInput()` and returns:\n - a text note from the image loader\n - an inline image block\n- Unsupported/undecodable image formats throw a `ToolError`.\n\n### Internal URLs\n- `read` does not resolve these itself; it delegates to `session.internalRouter.resolve()`.\n- Registered protocols are outside this file, but the router in `packages/coding-agent/src/internal-urls/router.ts` is built for `agent://`, `artifact://`, `jobs://`, `local://`, `mcp://`, `memory://`, `pi://`, `rule://`, and `skill://`.\n- `#handleInternalUrl()` behavior:\n - parses the URL with `parseInternalUrl()` so colons inside the host segment are legal\n - for `agent://`, treats non-root path extraction or `?q=` extraction as a special no-pagination mode\n - otherwise paginates the resolved text in memory\n - passes `immutable` through to `resolveFileDisplayMode()` so anchors are suppressed for immutable resources such as artifacts, skills, memory, and agent outputs\n - sets `ignoreResultLimits: true` for `skill://` so the full skill text is paginated only by explicit selectors, not by the normal default line limit\n\n### Web URLs\n- `parseReadUrlTarget()` accepts `http://`, `https://`, or `www.` targets.\n- Plain URL reads call `executeReadUrl()` in `packages/coding-agent/src/tools/fetch.ts`.\n- `:raw` means raw HTML/body fallback path; plain URL reads prefer rendered/reader-friendly output.\n- `:N`, `:A-B`, `:A+C` do not refetch. They page over cached output from the prior or current URL render.\n- URL render pipeline in `renderUrl()`:\n 1. normalize scheme (`https://` added for bare `www.`)\n 2. try special handlers for known sites unless raw\n 3. fetch with `loadPage()`\n 4. if content is image/PDF/DOCX/etc., try binary fetch + markit/image handling\n 5. handle JSON directly, feeds via feed parser, plain text directly\n 6. for HTML and non-raw mode, try markdown alternates, `URL.md`, content negotiation, feed alternates, HTML-to-text renderers, extracted linked documents, then `llms.txt`\n 7. fall back to raw body text/html\n- URL output is wrapped with a small header:\n\n```text\nURL: ...\nContent-Type: ...\nMethod: ...\nNotes: ...\n\n---\n```\n\n- `method` records the winning path (`json`, `feed`, `text`, `alternate-markdown`, `md-suffix`, `content-negotiation`, `image`, `markit`, `llms.txt`, `raw`, `raw-html`, etc.).\n- URL reads may return an inline image block when the fetched resource is a supported image and survives resizing.\n\n## Side Effects\n- Filesystem\n - Opens and streams local files.\n - Reads entire archives into memory before indexing.\n - May read URL-cache artifact files from the session artifacts directory.\n - Writes URL output artifacts when URL output is truncated or when line-range pagination needs a persisted cache body.\n- Network\n - URL mode performs HTTP fetches, binary refetches, and alternate-endpoint probes.\n- Subprocesses / native bindings\n - Uses Bun SQLite for `.db`/`.sqlite*`.\n - Uses `Bun.Archive` for tar/tgz and `fflate` for zip.\n - URL HTML rendering can delegate into site handlers and HTML-to-text backends from `packages/coding-agent/src/tools/fetch.ts`.\n- Session state\n - Records local text lines into `session.fileReadCache` for later stale-anchor recovery.\n - Uses `session.internalRouter` for internal URLs.\n - Uses `session.allocateOutputArtifact()` for cached/truncated URL output.\n- Background work / cancellation\n - Most branches honor `AbortSignal`; the tool itself is marked `nonAbortable = true`, but helper paths still call `throwIfAborted(signal)`.\n\n## Limits & Caps\n- Shared text truncation defaults from `packages/coding-agent/src/session/streaming-output.ts`:\n - `DEFAULT_MAX_LINES = 3000`\n - `DEFAULT_MAX_BYTES = 50 * 1024`\n- Local text open-ended default line limit: `read.defaultLimit`, clamped to `[1, DEFAULT_MAX_LINES]`.\n- Explicit line ranges add `3` context lines on each constrained side (`RANGE_CONTEXT_LINES`).\n- File streaming chunk size: `8 * 1024` bytes (`READ_CHUNK_SIZE`).\n- Local streamed byte budget for line reads: `max(DEFAULT_MAX_BYTES, maxLinesToCollect * 512)`.\n- Structural summaries only run when file size `<= 2 MiB` and line count `<= 20_000`.\n- Image input max: `20 MiB`.\n- Directory tree caps for local directories: depth `2`, per-directory children `12`.\n- Archive directory default list cap: `500` entries.\n- SQLite:\n - default row query limit `20`\n - schema sample limit `5`\n - max query limit `500`\n - table list cap `500`\n - render width `120`, column width `40`\n - busy timeout `3000` ms\n- URL read result shown to the model is truncated to `300` lines and `50 KiB` in `executeReadUrl()`; full cached output can be attached as an artifact.\n- Inline fetched URL images:\n - source bytes cap `20 MiB`\n - post-resize inline output cap `300 KiB`\n- Unique suffix auto-resolution glob timeout: `5000` ms.\n- File-read cache holds `30` paths per session.\n\n## Errors\n- Validation and operational failures surface as `ToolError`.\n- Selector errors include:\n - `Line selector 0 is invalid; lines are 1-indexed. Use :1.`\n - invalid `A+B` / `A-B` shapes\n - `Cannot combine query extraction with offset/limit` for `agent://.../path:50`\n- Missing local/archive/sqlite paths first attempt unique suffix resolution; if no unique match exists they error.\n- Out-of-bounds line reads do not throw. They return explanatory text with a suggestion such as `Use :1 ...` or `Use :<last line> ...`.\n- Binary archive entries do not throw; they return a text notice.\n- Document conversion failure returns a text notice.\n- Image oversize/unsupported/invalid cases throw.\n- SQLite parser rejects unsupported parameter combinations early; DB/runtime errors are caught and rethrown as `ToolError(message)`.\n- URL fetch failure does not throw when HTTP fetch succeeds but `response.ok === false`; it returns a failed URL read with `method: \"failed\"` and explanatory notes.\n\n## Notes\n- `readSchema` examples include `https://example.com:L1-L40`, but URL selector parsing in `packages/coding-agent/src/tools/fetch.ts` does not accept `L` prefixes.\n- Hashline anchors are suppressed for raw reads and immutable internal resources because there is no editable backing target for later `edit` consumption.\n- `splitPathAndSel()` intentionally treats unknown trailing `:...` as part of the path so `archive.zip:inner/file` and `db.sqlite:table:key` still work.\n- `resolveReadPath()` contains macOS-specific filename fallbacks for screenshot timestamps, NFD Unicode normalization, and curly apostrophes.\n- A bare `/` resolves to the session cwd, not the filesystem root.\n- URL cache keys are session-scoped and normalized by requested URL + raw/rendered mode; both requested URL and final redirected URL are cached.\n- URL line-range reads request `ensureArtifact: true, preferCached: true` so a later paginated read can reopen the same rendered body from artifact storage.\n- Raw SQLite `q=` execution is not keyword-restricted beyond “no bound parameters”; the read tool relies on the surrounding contract to keep it read-only.\n- The file-read cache is not a read acceleration cache. It exists to recover hashline edits when the file changed after the read.",
78
+ "tools/read.md": "# read\n\n> Read files, directories, archives, SQLite databases, internal resources, images, documents, and URLs through one `path` string.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/read.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/read.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/path-utils.ts` — split `path` from trailing selectors; normalize local paths.\n - `packages/coding-agent/src/tools/archive-reader.ts` — detect `archive.ext:inner/path`, index archives, list/read entries.\n - `packages/coding-agent/src/tools/sqlite-reader.ts` — detect SQLite targets, parse selectors, render tables.\n - `packages/coding-agent/src/tools/fetch.ts` — URL parsing, fetch/render pipeline, URL cache/artifacts.\n - `packages/coding-agent/src/internal-urls/router.ts` — resolve `agent://`, `artifact://`, `jobs://`, `local://`, `mcp://`, `memory://`, `pi://`, `rule://`, `skill://`.\n - `packages/coding-agent/src/edit/notebook.ts` — convert `.ipynb` to editable `# %% [...] cell:N` text.\n - `packages/coding-agent/src/utils/file-display-mode.ts` — decide hashline vs line-number vs raw display.\n - `packages/coding-agent/src/workspace-tree.ts` — render directory trees.\n - `packages/coding-agent/src/edit/file-read-cache.ts` — cache read lines for later hashline edit recovery.\n - `packages/coding-agent/src/tools/index.ts` — registers `read: s => new ReadTool(s)`.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `path` | `string` | Yes | Filesystem path, internal URL, or web URL. May end with a trailing selector such as `:50-100` or `:raw`. |\n\n### Selector grammar\n\nFor normal file-like reads, `splitPathAndSel()` in `packages/coding-agent/src/tools/path-utils.ts` recognizes the final suffix only when it matches one of these forms:\n\n| Suffix | Meaning |\n| --- | --- |\n| `:raw` | Raw/verbatim mode. Disables structural summaries and line prefixes. |\n| `:N` / `:LN` | Start at 1-indexed line `N`, open-ended. |\n| `:A-B` / `:LA-LB` | Inclusive 1-indexed line range. |\n| `:A+C` / `:LA+LC` | `C` lines starting at `A`; tool converts this to end line `A + C - 1`. |\n| `:range:raw` or `:raw:range` | Same line selection, but raw output. |\n\nValidation in `parseLineRangeChunk()`:\n- line numbers are 1-indexed; `:0` throws.\n- `+` counts must be `>= 1`.\n- `-` end must be `>= start`.\n\nSelector parsing intentionally falls through for unrecognized trailing `:...`; archive and SQLite paths consume their own colon syntax.\n\nURL selectors are parsed separately in `packages/coding-agent/src/tools/fetch.ts` and support only `:raw`, `:N`, `:A-B`, and `:A+C` — no optional `L` prefix there.\n\n## Outputs\n- Single-shot `AgentToolResult` built through `toolResult()` in `packages/coding-agent/src/tools/tool-result.ts`.\n- `content` is usually one text block. Image reads may return `[text, image]`.\n- `details` is path-dependent. `ReadToolDetails` may include:\n - `kind: \"file\" | \"url\"` (URL path uses `kind: \"url\"`; file reads usually omit `kind`)\n - `isDirectory`\n - `resolvedPath`\n - `suffixResolution`\n - URL fields: `url`, `finalUrl`, `contentType`, `method`, `notes`\n - `truncation`\n - `displayContent` (unprefixed text + starting line for TUI rendering)\n - `summary` (`lines`, `elidedSpans`) for structural summaries\n - `meta` from `packages/coding-agent/src/tools/output-meta.ts`\n- `details.meta.source` is set to the backing path, URL, or internal URL.\n- `details.meta.truncation` carries shown range, total lines/bytes, next offset, and optional `artifactId` for cached URL output.\n- Directory/archive listings and SQLite table lists also set `details.meta.limits` when list limits trigger.\n\n## Flow\n1. `ReadTool.execute()` accepts `{ path }`. `file://...` inputs are expanded first with `expandPath()`.\n2. It tries URL handling first via `parseReadUrlTarget()` from `packages/coding-agent/src/tools/fetch.ts`.\n - Plain URL reads call `executeReadUrl()`.\n - URL reads with line selectors load or refresh the URL cache with `loadReadUrlCacheEntry()` and paginate the cached text locally with `#buildInMemoryTextResult()`.\n3. If not a web URL, it checks `session.internalRouter.canHandle(...)`.\n - Internal URLs are resolved with `internalRouter.resolve()`.\n - `agent://` query extraction (`/path` or `?q=`) bypasses pagination and returns the extracted content directly.\n - Other internal resources are paginated in-memory by `#buildInMemoryTextResult()`.\n4. It tries archive resolution next with `#resolveArchiveReadPath()`.\n - `parseArchivePathCandidates()` scans for `.tar`, `.tar.gz`, `.tgz`, or `.zip` anywhere before `:sub/path`.\n - On success, `#readArchive()` either lists a directory or decodes an entry as UTF-8 text.\n5. It tries SQLite resolution with `#resolveSqliteReadPath()`.\n - `parseSqlitePathCandidates()` scans for `.sqlite`, `.sqlite3`, `.db`, `.db3` before any `:table`, `:key`, or `?query` suffix.\n - `#readSqlite()` dispatches on `parseSqliteSelector()`.\n6. Otherwise it treats the input as a local filesystem path.\n - `resolveReadPath()` expands `~`, resolves relative to session cwd, treats bare `/` as session cwd, and retries macOS screenshot/NFD/curly-quote variants.\n - If the path does not exist, `findUniqueSuffixMatch()` does a workspace glob-based unique suffix lookup (skipped for remote mounts).\n7. Directories go through `#readDirectory()`.\n8. Non-directories branch by content type:\n - image metadata / inline image\n - editable notebook text\n - markit-converted document\n - structural summary for parseable code/prose\n - streamed text/line-range read\n9. Local text reads are streamed by `streamLinesFromFile()` rather than loading the whole file. The tool adds up to 3 lines of context before/after explicit bounded ranges.\n10. Non-empty contiguous local reads are recorded into `getFileReadCache(session)` for later hashline edit recovery.\n11. If suffix resolution happened, the first text block is prefixed with `[Path '...' not found; resolved to '...' via suffix match]`.\n\n## Modes / Variants\n\n### Local text files\n- No selector: if summarization is enabled and the file is small enough, `#trySummarize()` calls `summarizeCode()`.\n - Guards: file size `<= 2 MiB` (`MAX_SUMMARY_BYTES`), line count `<= 20_000` (`MAX_SUMMARY_LINES`).\n - Summary output keeps selected declarations and replaces elided spans with `...`.\n - When an elided block sits between matching brace lines, `#renderSummary()` may merge them into one anchored line rather than emitting separate opener/closer lines.\n- Explicit selector or summarization miss: streamed text read.\n - Default open-ended limit is `min(session setting read.defaultLimit, DEFAULT_MAX_LINES)`.\n - Explicit ranges expand by `RANGE_CONTEXT_LINES = 3` on the constrained sides only.\n - Non-raw output uses `resolveFileDisplayMode()`:\n - hashline anchors when edit mode is hashline, read is not raw, source is mutable, edit tool exists, and `readHashLines !== false`\n - otherwise optional line numbers when `readLineNumbers === true`\n - raw mode suppresses both\n- Prefix format in hashline mode is `lineNumber + 2-char line hash + \"|\"`, e.g. `41th|def alpha():`, from `formatHashLine()` in `packages/coding-agent/src/hashline/hash.ts`.\n- Those anchors are what the `edit`/hashline path consumes later; immutable sources and `:raw` intentionally suppress them.\n\n### Directory listings\n- `#readDirectory()` calls `buildDirectoryTree()` with:\n - `maxDepth = 2`\n - `perDirLimit = 12`\n - `rootLimit = null`\n - `lineCap = limit` when a line selector was present, else unlimited at this layer\n- `buildDirectoryTree()` sorts siblings by recency, shows file sizes and relative ages, and may mark `limits.resultLimit` when the tree truncates.\n- Empty directories render as `(empty directory)`.\n\n### Archives\n- Supported archive containers: `.tar`, `.tar.gz`, `.tgz`, `.zip`.\n- Syntax: `archive.ext`, `archive.ext:path/inside`, `archive.ext:path/inside:50-60`.\n- `openArchive()` reads the whole archive into memory, then:\n - tar/tgz uses `new Bun.Archive(bytes)`\n - zip uses `fflate.unzipSync()`\n- Archive paths normalize `/`, drop `.` segments, and reject `..`.\n- Directory reads list immediate children; files show `name` plus ` (size)` when size > 0.\n- Directory listing default limit is `500` entries in `#readArchiveDirectory()`.\n- File entries are UTF-8 decoded. Non-UTF-8 entries return `[Cannot read binary archive entry '...' (...)]` instead of bytes.\n- Text archive entries reuse the normal in-memory pagination/anchoring path.\n\n### SQLite databases\n- Database detection requires both a matching extension and a valid SQLite file header (`isSqliteFile()`).\n- Selector forms from `parseSqliteSelector()`:\n\n#### `db.sqlite`\n- `kind: \"list\"`\n- Lists non-`sqlite_%` tables with row counts.\n- `#readSqlite()` caps the rendered list to `500` tables via `applyListLimit()`.\n\n#### `db.sqlite:table`\n- `kind: \"schema\"`\n- Returns `sqlite_master.sql` plus sample rows.\n- Sample size is `DEFAULT_SCHEMA_SAMPLE_LIMIT = 5`.\n\n#### `db.sqlite:table:key`\n- `kind: \"row\"`\n- Resolves by primary key when the table has exactly one PK column; otherwise falls back to `rowid` lookup.\n- No query parameters allowed on row lookups.\n\n#### `db.sqlite:table?limit=...&offset=...&order=...&where=...`\n- `kind: \"query\"`\n- Defaults: `limit = 20`, `offset = 0`.\n- `limit` is capped at `500`.\n- `order` accepts `column` or `column:asc|desc` and must name an existing column.\n- `where` is accepted only after `validateWhereClause()` rejects comments, semicolons, and control keywords like `LIMIT`, `OFFSET`, `UNION`, `ATTACH`, `PRAGMA`.\n- Unknown query parameters throw.\n\n#### `db.sqlite?q=SELECT ...`\n- `kind: \"raw\"`\n- Cannot be combined with table selectors or any other query param.\n- Empty `q` throws.\n- `executeReadQuery()` runs `db.prepare(sql).all()` and rejects bound parameters; it does not verify that the SQL starts with `SELECT`.\n\n- Rendering caps in `packages/coding-agent/src/tools/sqlite-reader.ts`:\n - ASCII table width `120` (`MAX_RENDER_WIDTH`)\n - per-column width `40` (`MAX_COLUMN_WIDTH`)\n- `#readSqlite()` opens Bun SQLite in `{ readonly: true, strict: true }` and sets `PRAGMA busy_timeout = 3000`.\n\n### Documents\n- `CONVERTIBLE_EXTENSIONS` in `packages/coding-agent/src/tools/read.ts` covers `.pdf`, `.doc`, `.docx`, `.ppt`, `.pptx`, `.xls`, `.xlsx`, `.rtf`, `.epub`.\n- `convertFileWithMarkit()` converts the file to text/markdown.\n- Converted output is then head-truncated with normal shared limits; there is no line selector support inside the source document before conversion.\n- Conversion failures return a text block like `[Cannot read .pdf file: ...]`.\n\n### Jupyter notebooks\n- `.ipynb` goes through `readEditableNotebookText()` unless `:raw` was requested.\n- Output is editable plain text with markers like:\n\n```text\n# %% [code] cell:0\n...\n```\n\n- Raw mode bypasses that conversion and falls back to file-text reading.\n\n### Images\n- Image detection is metadata-based (`readImageMetadata()`).\n- Max accepted image size is `20 MiB` (`MAX_IMAGE_INPUT_BYTES`, re-exported as `MAX_IMAGE_SIZE`). Larger files throw.\n- If `inspect_image.enabled` is true, `read` returns metadata only (MIME, bytes, dimensions, channels, alpha) plus a suggestion to call `inspect_image`.\n- Otherwise it calls `loadImageInput()` and returns:\n - a text note from the image loader\n - an inline image block\n- Unsupported/undecodable image formats throw a `ToolError`.\n\n### Internal URLs\n- `read` does not resolve these itself; it delegates to `session.internalRouter.resolve()`.\n- Registered protocols are outside this file, but the router in `packages/coding-agent/src/internal-urls/router.ts` is built for `agent://`, `artifact://`, `issue://`, `jobs://`, `local://`, `mcp://`, `memory://`, `pi://`, `pr://`, `rule://`, and `skill://`.\n- `#handleInternalUrl()` behavior:\n - parses the URL with `parseInternalUrl()` so colons inside the host segment are legal\n - for `agent://`, treats non-root path extraction or `?q=` extraction as a special no-pagination mode\n - otherwise paginates the resolved text in memory\n - passes `immutable` through to `resolveFileDisplayMode()` so anchors are suppressed for immutable resources such as artifacts, skills, memory, and agent outputs\n - sets `ignoreResultLimits: true` for `skill://` so the full skill text is paginated only by explicit selectors, not by the normal default line limit\n- `issue://<N>` / `pr://<N>` (and the long form `issue://<owner>/<repo>/<N>` / `pr://<owner>/<repo>/<N>`) route through the same SQLite cache the `github` tool writes to; `?comments=0` selects the no-comments rendering. Bare `issue://` / `pr://` (and `issue://<owner>/<repo>` / `pr://<owner>/<repo>`) issue a live `gh issue list` / `gh pr list` for browsing, accepting `?state=`, `?limit=`, `?author=`, `?label=`. PR diffs share the same cache through `pr://<N>/diff` (numbered file listing with per-file hints), `pr://<N>/diff/<i>` (single file slice; 1-indexed), and `pr://<N>/diff/all` (verbatim unified diff); the listing and per-file slices are reconstructed from the cached unified-diff payload, so all three variants share one `gh pr diff` invocation per PR. Diff content is served as `text/plain`. Soft TTL `github.cache.softTtlSec` (default 5 minutes), hard TTL `github.cache.hardTtlSec` (default 7 days). Stale-hit returns the cached row and schedules a background refresh.\n\n### Web URLs\n- `parseReadUrlTarget()` accepts `http://`, `https://`, or `www.` targets.\n- Plain URL reads call `executeReadUrl()` in `packages/coding-agent/src/tools/fetch.ts`.\n- `:raw` means raw HTML/body fallback path; plain URL reads prefer rendered/reader-friendly output.\n- `:N`, `:A-B`, `:A+C` do not refetch. They page over cached output from the prior or current URL render.\n- URL render pipeline in `renderUrl()`:\n 1. normalize scheme (`https://` added for bare `www.`)\n 2. try special handlers for known sites unless raw\n 3. fetch with `loadPage()`\n 4. if content is image/PDF/DOCX/etc., try binary fetch + markit/image handling\n 5. handle JSON directly, feeds via feed parser, plain text directly\n 6. for HTML and non-raw mode, try markdown alternates, `URL.md`, content negotiation, feed alternates, HTML-to-text renderers, extracted linked documents, then `llms.txt`\n 7. fall back to raw body text/html\n- URL output is wrapped with a small header:\n\n```text\nURL: ...\nContent-Type: ...\nMethod: ...\nNotes: ...\n\n---\n```\n\n- `method` records the winning path (`json`, `feed`, `text`, `alternate-markdown`, `md-suffix`, `content-negotiation`, `image`, `markit`, `llms.txt`, `raw`, `raw-html`, etc.).\n- URL reads may return an inline image block when the fetched resource is a supported image and survives resizing.\n\n## Side Effects\n- Filesystem\n - Opens and streams local files.\n - Reads entire archives into memory before indexing.\n - May read URL-cache artifact files from the session artifacts directory.\n - Writes URL output artifacts when URL output is truncated or when line-range pagination needs a persisted cache body.\n- Network\n - URL mode performs HTTP fetches, binary refetches, and alternate-endpoint probes.\n- Subprocesses / native bindings\n - Uses Bun SQLite for `.db`/`.sqlite*`.\n - Uses `Bun.Archive` for tar/tgz and `fflate` for zip.\n - URL HTML rendering can delegate into site handlers and HTML-to-text backends from `packages/coding-agent/src/tools/fetch.ts`.\n- Session state\n - Records local text lines into `session.fileReadCache` for later stale-anchor recovery.\n - Uses `session.internalRouter` for internal URLs.\n - Uses `session.allocateOutputArtifact()` for cached/truncated URL output.\n- Background work / cancellation\n - Most branches honor `AbortSignal`; the tool itself is marked `nonAbortable = true`, but helper paths still call `throwIfAborted(signal)`.\n\n## Limits & Caps\n- Shared text truncation defaults from `packages/coding-agent/src/session/streaming-output.ts`:\n - `DEFAULT_MAX_LINES = 3000`\n - `DEFAULT_MAX_BYTES = 50 * 1024`\n- Local text open-ended default line limit: `read.defaultLimit`, clamped to `[1, DEFAULT_MAX_LINES]`.\n- Explicit line ranges add `3` context lines on each constrained side (`RANGE_CONTEXT_LINES`).\n- File streaming chunk size: `8 * 1024` bytes (`READ_CHUNK_SIZE`).\n- Local streamed byte budget for line reads: `max(DEFAULT_MAX_BYTES, maxLinesToCollect * 512)`.\n- Structural summaries only run when file size `<= 2 MiB` and line count `<= 20_000`.\n- Image input max: `20 MiB`.\n- Directory tree caps for local directories: depth `2`, per-directory children `12`.\n- Archive directory default list cap: `500` entries.\n- SQLite:\n - default row query limit `20`\n - schema sample limit `5`\n - max query limit `500`\n - table list cap `500`\n - render width `120`, column width `40`\n - busy timeout `3000` ms\n- URL read result shown to the model is truncated to `300` lines and `50 KiB` in `executeReadUrl()`; full cached output can be attached as an artifact.\n- Inline fetched URL images:\n - source bytes cap `20 MiB`\n - post-resize inline output cap `300 KiB`\n- Unique suffix auto-resolution glob timeout: `5000` ms.\n- File-read cache holds `30` paths per session.\n\n## Errors\n- Validation and operational failures surface as `ToolError`.\n- Selector errors include:\n - `Line selector 0 is invalid; lines are 1-indexed. Use :1.`\n - invalid `A+B` / `A-B` shapes\n - `Cannot combine query extraction with offset/limit` for `agent://.../path:50`\n- Missing local/archive/sqlite paths first attempt unique suffix resolution; if no unique match exists they error.\n- Out-of-bounds line reads do not throw. They return explanatory text with a suggestion such as `Use :1 ...` or `Use :<last line> ...`.\n- Binary archive entries do not throw; they return a text notice.\n- Document conversion failure returns a text notice.\n- Image oversize/unsupported/invalid cases throw.\n- SQLite parser rejects unsupported parameter combinations early; DB/runtime errors are caught and rethrown as `ToolError(message)`.\n- URL fetch failure does not throw when HTTP fetch succeeds but `response.ok === false`; it returns a failed URL read with `method: \"failed\"` and explanatory notes.\n\n## Notes\n- `readSchema` examples include `https://example.com:L1-L40`, but URL selector parsing in `packages/coding-agent/src/tools/fetch.ts` does not accept `L` prefixes.\n- Hashline anchors are suppressed for raw reads and immutable internal resources because there is no editable backing target for later `edit` consumption.\n- `splitPathAndSel()` intentionally treats unknown trailing `:...` as part of the path so `archive.zip:inner/file` and `db.sqlite:table:key` still work.\n- `resolveReadPath()` contains macOS-specific filename fallbacks for screenshot timestamps, NFD Unicode normalization, and curly apostrophes.\n- A bare `/` resolves to the session cwd, not the filesystem root.\n- URL cache keys are session-scoped and normalized by requested URL + raw/rendered mode; both requested URL and final redirected URL are cached.\n- URL line-range reads request `ensureArtifact: true, preferCached: true` so a later paginated read can reopen the same rendered body from artifact storage.\n- Raw SQLite `q=` execution is not keyword-restricted beyond “no bound parameters”; the read tool relies on the surrounding contract to keep it read-only.\n- The file-read cache is not a read acceleration cache. It exists to recover hashline edits when the file changed after the read.",
79
79
  "tools/recall.md": "# recall\n\n> Search the active Hindsight bank and return raw matching memories.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/hindsight-recall.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/recall.md`\n- Key collaborators:\n - `packages/coding-agent/src/hindsight/state.ts` — session state, recall query defaults, prompt-side auto-recall.\n - `packages/coding-agent/src/hindsight/content.ts` — result formatting and UTC timestamp formatting.\n - `packages/coding-agent/src/hindsight/client.ts` — HTTP `recall` call and error mapping.\n - `packages/coding-agent/src/hindsight/bank.ts` — bank id and tag-filter scoping.\n - `docs/tools/retain.md` — shared backend, storage, seeding, and mental-model bootstrap.\n\n## Inputs\n\n| Field | Type | Required | Description |\n|---|---|---:|---|\n| `query` | `string` | Yes | Natural-language search query. The tool passes it through unchanged. |\n\n## Outputs\nReturns a single-shot tool result.\n\nWhen matches exist:\n- `content[0].type = \"text\"`\n- `content[0].text = \"Found <n> relevant memories (as of YYYY-MM-DD HH:MM UTC):\\n\\n<bullet list>\"`\n- each bullet is `- <text> [<type>] (<mentioned_at>)`; the type and timestamp suffixes appear only when those fields are present\n- `details = {}`\n\nWhen no matches exist:\n- `content[0].text = \"No relevant memories found.\"`\n- `details = {}`\n\n## Flow\n1. `HindsightRecallTool.createIf(...)` only exposes the tool when `memory.backend == \"hindsight\"`.\n2. `execute(...)` wraps the whole operation in `untilAborted(...)` from `@oh-my-pi/pi-utils`.\n3. It reads the active `HindsightSessionState`; missing state throws `Hindsight backend is not initialised for this session.`\n4. It calls `state.client.recall(...)` with:\n - `bankId` from session bootstrap,\n - the model-supplied `query`,\n - `budget`, `maxTokens`, and `types` from `HindsightConfig`,\n - tag filters from the bank scope (`recallTags`, `recallTagsMatch`).\n5. `HindsightApi.recall(...)` POSTs `/v1/default/banks/{bank_id}/memories/recall`.\n6. Results are formatted into a plain-text list with `formatMemories(...)`; empty results map to the fixed no-match string.\n7. Failures are logged with `logger.warn(\"recall failed\", ...)` and rethrown.\n\n## Modes / Variants\n- Tool path: explicit query-only recall. The tool does not compose context from recent turns; that richer path is reserved for backend auto-recall in `HindsightSessionState.beforeAgentStartPrompt(...)` / `maybeRecallOnAgentStart(...)`.\n- Bank scoping is inherited from the active `HindsightSessionState`:\n - `global` — no tag filter.\n - `per-project` — separate bank id per cwd basename.\n - `per-project-tagged` — shared bank id plus `project:<cwd basename>` filter with `tagsMatch = \"any\"`, so project-tagged and untagged global memories can both surface.\n- Session scope: reads cross-session server-side memories, but uses per-session cached config and scope.\n\n## Side Effects\n- Network\n - `POST /v1/default/banks/{bank_id}/memories/recall` via `packages/coding-agent/src/hindsight/client.ts`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - None on success. Unlike backend auto-recall, this tool does not update `lastRecallSnippet` or refresh the system prompt.\n- Background work / cancellation\n - Aborts through `untilAborted(...)` if the tool call signal is cancelled.\n\n## Limits & Caps\n- Client default budget for raw `HindsightApi.recall(...)` is `\"mid\"`; this tool overrides from config in `packages/coding-agent/src/hindsight/state.ts`.\n- Default recall settings from `packages/coding-agent/src/config/settings-schema.ts`:\n - `hindsight.recallBudget = \"mid\"`\n - `hindsight.recallMaxTokens = 1024`\n - `hindsight.recallTypes = [\"world\", \"experience\"]`\n- The explicit tool path does not apply `hindsight.recallContextTurns` or `hindsight.recallMaxQueryChars`; those caps only affect backend auto-recall query composition.\n\n## Errors\n- Throws `Hindsight backend is not initialised for this session.` when no state exists.\n- HTTP and fetch failures become `HindsightError` from `packages/coding-agent/src/hindsight/client.ts` with `statusCode` and parsed `details` when available.\n- Non-`Error` failures are normalized to `new Error(String(err))` before rethrow.\n\n## Notes\n- Shared backend details are in `docs/tools/retain.md`: server-side storage, subagent aliasing, bank scoping, mission setup, and mental-model bootstrap.\n- Mental models are not fetched by this tool. They may still already be present in the agent's developer instructions because the backend caches a `<mental_models>` block separately from recall results.\n- The tool returns raw memory hits; it does not synthesize across them. Use `reflect` for that path.\n",
80
80
  "tools/recipe.md": "# recipe\n\n> Run a task exposed by a detected project task runner.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/recipe/index.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/recipe.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/recipe/runner.ts` — op parsing, task resolution, prompt model.\n - `packages/coding-agent/src/tools/recipe/render.ts` — shell-style call/result rendering.\n - `packages/coding-agent/src/tools/recipe/runners/index.ts` — runner registration order.\n - `packages/coding-agent/src/tools/recipe/runners/just.ts` — detect `just` recipes from justfiles.\n - `packages/coding-agent/src/tools/recipe/runners/pkg.ts` — detect `package.json` scripts and workspaces.\n - `packages/coding-agent/src/tools/recipe/runners/cargo.ts` — detect Cargo run/test targets.\n - `packages/coding-agent/src/tools/recipe/runners/make.ts` — parse make targets from makefiles.\n - `packages/coding-agent/src/tools/recipe/runners/task.ts` — detect Taskfile tasks via `task --list-all`.\n - `packages/coding-agent/src/tools/bash.ts` — actual command execution, truncation, cwd/env handling.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `op` | `string` | Yes | Single string containing the task selector plus trailing arguments. The first whitespace-delimited token selects the task; the remainder is appended verbatim to the resolved runner command. Examples from schema/prompt: `test`, `build --release`, `pkg-a/test`, `crate/bin/server`, `pkg:test --watch`. |\n\n### `op` grammar\n\n```text\nop := S* head (S+ tail)?\nhead := explicit-runner / implicit-task\nexplicit-runner := runner-id \":\" task-token\nimplicit-task := task-token\nrunner-id := detected runner id (`just` | `pkg` | `cargo` | `make` | `task`)\ntask-token := first non-whitespace token; may contain `/`\ntail := remaining characters after the first whitespace run\n```\n\nResolution rules from `resolveRunnerAndTask()`:\n- Leading whitespace is ignored; an empty `op` throws `ToolError` with the available task list.\n- Only the first token is parsed structurally. Everything after the first whitespace run becomes `tail` and is appended to the command unchanged.\n- If `head` contains `:` and the prefix matches a detected runner id, the suffix must exactly match a task in that runner.\n- Otherwise `head` is treated as a task name and matched across all detected runners.\n- If exactly one runner has that task, it is used.\n- If multiple runners have that task, the call is rejected and the error tells the model to use `<runner-id>:<task>`.\n- Namespaced task names generated by runners use `/`, not `:`. `/` is part of the task name, not a parser separator.\n\n## Outputs\n- Delegates directly to `BashTool.execute()` and returns the same `AgentToolResult<BashToolDetails>` shape.\n- Success path: one text content block containing merged command output (`result.output` from bash execution, or `(no output)`), plus any timeout clamp notice appended after a blank line.\n- Recipe does not return separate `stdout`, `stderr`, or `exitCode` fields. `stdout`/`stderr` are already merged into the text block by bash execution; `exitCode` is only observed indirectly (success requires `0`, non-zero becomes an error).\n- Error path: throws `ToolError`; for non-zero exits the message is the merged output followed by `Command exited with code <n>`.\n- `details` may include:\n - `timeoutSeconds`: effective timeout used by bash.\n - `requestedTimeoutSeconds`: only when bash clamped a requested timeout; recipe never sets one itself.\n - `meta`: output truncation metadata from bash execution.\n - `async`: defined by bash background execution paths, but recipe does not expose an `async` input.\n- When bash output is truncated, the full text is stored in an artifact and referenced via bash truncation metadata.\n- Call/result rendering in the TUI uses bash shell rendering with a resolved title, command preview, and optional task cwd.\n\n## Flow\n1. `RecipeTool.createIf()` in `packages/coding-agent/src/tools/recipe/index.ts` checks `session.settings.get(\"recipe.enabled\")`; disabled returns `null`.\n2. It probes every runner in `RUNNERS` from `packages/coding-agent/src/tools/recipe/runners/index.ts` with `Promise.all(...)` in this order: `just`, `pkg`, `cargo`, `make`, `task`.\n3. Each runner returns either `null` or a `DetectedRunner { id, label, commandPrefix, tasks }`; runners with zero tasks are discarded.\n4. If no runners remain, the tool is not registered.\n5. Constructor stores detected runners, instantiates `BashTool`, renders the model-facing description by passing `buildPromptModel(runners)` into `packages/coding-agent/src/prompts/tools/recipe.md`, and builds shell renderers from `createRecipeToolRenderer()`.\n6. On execution, `RecipeTool.execute()` calls `resolveCommand(op, this.#runners)`.\n7. `resolveCommand()` in `packages/coding-agent/src/tools/recipe/runner.ts`:\n 1. `parseOp()` trims only leading whitespace, extracts the first non-whitespace token as `head`, and keeps the remainder as `tail`.\n 2. `resolveRunnerAndTask()` resolves `head` either as `runnerId:taskName` or as an unqualified task name.\n 3. It throws `ToolError` for empty ops, missing explicit tasks, ambiguous task names, or unknown tasks; all error variants include the available task list.\n 4. It builds the final shell command with `buildCommand(commandPrefix, commandName, tail)`, joining non-empty parts with spaces.\n 5. If the task defines `cwd`, that relative path is returned alongside the command.\n8. `RecipeTool.execute()` forwards `{ command, cwd }` into `BashTool.execute()`; recipe does not pass timeout, env, async, or pty options.\n9. `BashTool.execute()` resolves internal URLs, validates/normalizes cwd against `session.cwd`, clamps timeout, applies bash interception rules, runs the command, and formats the final result.\n\n## Modes / Variants\n- Tool enablement:\n - Disabled by `recipe.enabled` setting: tool is absent.\n - Enabled but no detected tasks: tool is absent.\n- Task selection:\n - Unqualified task name: succeeds only when exactly one detected runner owns that task.\n - Explicit runner-qualified task: `<runner-id>:<task>`.\n- Runner detection paths:\n - `just`: requires `just` on `PATH`, a justfile, and successful `just --dump --dump-format=json`.\n - `pkg`: requires a readable root `package.json`; picks a package manager command from lockfiles or `bun` availability; discovers root scripts and workspace package scripts.\n - `cargo`: requires `cargo` on `PATH`, `Cargo.toml`, and successful `cargo metadata --no-deps --format-version=1`.\n - `make`: requires `make` on `PATH` and a makefile; parses targets statically.\n - `task`: requires `task` on `PATH`, a Taskfile, and successful `task --list-all --json`.\n- Execution path:\n - Always the synchronous `bash` call surface from recipe inputs.\n - Bash may still auto-background long-running work if `bash.autoBackground.enabled` and session async job support are enabled.\n\n## Side Effects\n- Filesystem\n - Reads manifests from the session cwd during detection: justfiles, `package.json`, workspace `package.json` files, `Cargo.toml`, makefiles, `Taskfile.yml` / `Taskfile.yaml`.\n - Command execution runs in `session.cwd` or a task-specific relative cwd resolved under it.\n - Bash may allocate output artifacts for truncated command output.\n- Subprocesses / native bindings\n - Detection may spawn `just --dump --dump-format=json`, `cargo metadata --no-deps --format-version=1`, and `task --list-all --json`.\n - Execution spawns the resolved shell command through `BashTool` / `executeBash()`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Tool availability depends on session settings.\n - Constructor prompt text is specialized to detected runners/tasks.\n - Bash execution may create async job records and output artifacts if bash auto-background triggers.\n- User-visible prompts / interactive UI\n - The model-facing tool description lists detected runners and up to 20 tasks per runner.\n - TUI rendering shows a shell-style preview using the resolved title/command/cwd.\n- Background work / cancellation\n - Detection is parallelized across runners.\n - Runtime command execution honors the passed abort signal through `BashTool`.\n\n## Limits & Caps\n- Prompt task listing is capped at `PROMPT_TASK_LIMIT = 20` per runner in `packages/coding-agent/src/tools/recipe/runner.ts`; this affects the rendered tool description, not execution.\n- Recipe itself defines no timeout input; delegated bash execution therefore uses bash's default `timeout = 300` seconds from `packages/coding-agent/src/tools/bash.ts`.\n- Bash clamps timeouts to the configured bash range (`clampTimeout(\"bash\", ...)` in `packages/coding-agent/src/tools/bash.ts`), but recipe cannot request a custom value.\n- `pkg` workspace discovery normalizes workspace globs to `.../package.json` and sorts matched package files lexicographically before task generation.\n- `cargo` deduplicates generated task names with a `Set`, so duplicate targets collapse to one recipe task.\n\n## Errors\n- Detection failures in runner modules are mostly soft-failed:\n - Missing binaries, missing manifests, parse failures, or non-zero probe exits usually return `null` and log with `logger.debug(...)`.\n - Result: the affected runner disappears instead of surfacing an error to the model.\n- Invocation failures are hard errors from `resolveRunnerAndTask()`:\n - Empty `op`.\n - Explicit runner prefix with missing/empty task.\n - Ambiguous unqualified task name across runners.\n - Unknown task name.\n- Execution failures come from `BashTool.execute()`:\n - Invalid cwd.\n - Bash interceptor blocks.\n - Aborts/timeouts.\n - Non-zero exit codes.\n - Missing exit status.\n- All `resolveRunnerAndTask()` errors include the current available task list to help the model retry.\n\n## Notes\n- `RecipeTool` sets `concurrency = \"exclusive\"`; calls do not run concurrently with other exclusive tools.\n- Tool registration is all-or-nothing per runner: a detected runner with zero tasks is dropped.\n- Runner ids are fixed string literals from the runner modules: `just`, `pkg`, `cargo`, `make`, `task`.\n- `buildPromptModel()` includes each task's rendered command (`commandPrefix` + `commandName`) and relative cwd when present; the prompt therefore exposes the exact shell form recipe will run.\n- `pkg` task names:\n - Root `package.json` scripts keep bare names like `test`.\n - Workspace scripts are always namespaced as `<package-name-or-dir>/<script>` and set `cwd` to that package directory.\n - Script names are shell-quoted into `commandName`, so a task like `build` becomes `bun run 'build'` / `npm run 'build'` / similar.\n- `pkg` command prefix selection prefers lockfiles in this order: `bun.lock`/`bun.lockb`, `pnpm-lock.yaml`, `yarn.lock`, `package-lock.json`/`npm-shrinkwrap.json`; otherwise it falls back to `bun run` if `bun` exists, else `npm run`.\n- `cargo` task names are generated from metadata targets:\n - Single-package manifests: `bin/<name>`, `example/<name>`, `test/<name>`.\n - Multi-package workspaces: `<package>/bin/<name>`, `<package>/example/<name>`, `<package>/test/<name>`.\n - Each task overrides `commandPrefix` to the full `cargo run ... --bin|--example` or `cargo test ... --test` prefix, and `commandName` to the quoted target name.\n- `make` target parsing is static text parsing, not `make -qp` output:\n - Recognizes makefiles named `Makefile`, `makefile`, `GNUmakefile`.\n - Uses `.PHONY` lines to decide whether to include undocumented file targets; without any `.PHONY`, all parsed targets are exposed.\n - If `.PHONY` exists, documented non-phony targets are kept with ` (file target)` appended to `doc`.\n- `just` detection ignores private recipes and preserves declared parameter names only for prompt display; execution still accepts arbitrary `tail` text.\n- `task` detection uses `desc` first, then `summary`, for task documentation.\n- Recipe has no env input of its own. Commands inherit whatever environment `BashTool` supplies for normal bash execution in the session.",
81
81
  "tools/reflect.md": "# reflect\n\n> Ask the Hindsight server to synthesize an answer over the active memory bank.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/hindsight-reflect.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/reflect.md`\n- Key collaborators:\n - `packages/coding-agent/src/hindsight/bank.ts` — best-effort bank mission initialization.\n - `packages/coding-agent/src/hindsight/state.ts` — session state, shared bank scope, recall/reflect config.\n - `packages/coding-agent/src/hindsight/client.ts` — HTTP `reflect` call and error mapping.\n - `docs/tools/retain.md` — shared backend, storage, seeding, and mental-model bootstrap.\n\n## Inputs\n\n| Field | Type | Required | Description |\n|---|---|---:|---|\n| `query` | `string` | Yes | Question to answer from long-term memory. |\n| `context` | `string` | No | Extra guidance sent to the Hindsight reflect endpoint. |\n\n## Outputs\nReturns a single-shot tool result:\n\n- `content[0].type = \"text\"`\n- `content[0].text = response.text?.trim() || \"No relevant information found to reflect on.\"`\n- `details = {}`\n\nThe tool returns the Hindsight server's synthesized text directly; it does not expose raw recall hits.\n\n## Flow\n1. `HindsightReflectTool.createIf(...)` only exposes the tool when `memory.backend == \"hindsight\"`.\n2. `execute(...)` runs under `untilAborted(...)`.\n3. It reads the active `HindsightSessionState`; missing state throws `Hindsight backend is not initialised for this session.`\n4. Before reflecting, it calls `ensureBankMission(...)` with the current `bankId`, config, and process-local `missionsSet`.\n5. `ensureBankMission(...)` best-effort `PUT`s `/v1/default/banks/{bank_id}` with `reflect_mission` and optional `retain_mission` exactly once per bank/process; failures are swallowed.\n6. It calls `state.client.reflect(...)` with the model `query`, optional `context`, configured recall budget, and bank-scope tag filters.\n7. `HindsightApi.reflect(...)` POSTs `/v1/default/banks/{bank_id}/reflect` and defaults its own budget to `\"low\"` when callers omit one; this tool always passes the configured budget.\n8. Blank or whitespace-only responses are replaced with `No relevant information found to reflect on.`\n9. Failures are logged with `logger.warn(\"reflect failed\", ...)` and rethrown.\n\n## Modes / Variants\n- Tool path: one reflect request, optionally focused by `context`.\n- Bank scoping is inherited from the active `HindsightSessionState`:\n - `global` — no tag filter.\n - `per-project` — separate bank id per cwd basename.\n - `per-project-tagged` — shared bank id plus `project:<cwd basename>` filter with `tagsMatch = \"any\"`.\n- Session scope: reads cross-session server-side memories, but does not persist local output.\n\n## Side Effects\n- Network\n - Optional `PUT /v1/default/banks/{bank_id}` from `ensureBankMission(...)`.\n - `POST /v1/default/banks/{bank_id}/reflect` via `packages/coding-agent/src/hindsight/client.ts`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Reads session-held bank scope and config only. Does not update `lastRecallSnippet`, the mental-model cache, or the retain queue.\n- Background work / cancellation\n - Aborts through `untilAborted(...)` if the tool call signal is cancelled.\n\n## Limits & Caps\n- Tool-level params: only `query` is required; `context` is optional.\n- Default budget setting comes from `hindsight.recallBudget` in `packages/coding-agent/src/config/settings-schema.ts`; default `\"mid\"`.\n- `reflect` itself has no client-side token cap parameter here; unlike `recall`, the tool does not pass `maxTokens`.\n- Mission initialization tracks up to `MISSION_SET_CAP = 10_000` bank ids in `packages/coding-agent/src/hindsight/bank.ts`, then drops the oldest half of the sorted set.\n\n## Errors\n- Throws `Hindsight backend is not initialised for this session.` when no state exists.\n- HTTP and fetch failures become `HindsightError` from `packages/coding-agent/src/hindsight/client.ts` with `statusCode` and parsed `details` when available.\n- `ensureBankMission(...)` failures are silent to the tool caller; only the later reflect request can fail visibly.\n- Non-`Error` failures are normalized to `new Error(String(err))` before rethrow.\n\n## Notes\n- Shared backend details are in `docs/tools/retain.md`: server-side storage, subagent aliasing, bank scoping, seed mental models from `packages/coding-agent/src/hindsight/seeds.json`, and mental-model prompt injection.\n- `reflect` does not read the cached `<mental_models>` block directly. It queries the Hindsight server over the bank contents. The same session may also have separate mental-model context injected into its developer instructions.\n- Reflect mission and retain mission are bank-level server settings, not per-request payload. The tool just ensures they are present best-effort before reflecting.\n",