agent-sh 0.15.0 → 0.15.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/dist/agent/agent-loop.js +11 -8
  2. package/dist/agent/events.d.ts +4 -0
  3. package/docs/README.md +14 -0
  4. package/docs/agent.md +398 -0
  5. package/docs/architecture.md +196 -0
  6. package/docs/context-management.md +200 -0
  7. package/docs/extensions.md +951 -0
  8. package/docs/library.md +84 -0
  9. package/docs/troubleshooting.md +65 -0
  10. package/docs/tui-composition.md +294 -0
  11. package/docs/usage.md +306 -0
  12. package/examples/extensions/ash-scheme/package.json +1 -1
  13. package/examples/extensions/ashi/EXTENDING.md +2 -2
  14. package/examples/extensions/ashi/README.md +2 -2
  15. package/examples/extensions/ashi/docs/ui-surface-protocol.md +1 -1
  16. package/examples/extensions/ashi/package.json +5 -3
  17. package/examples/extensions/ashi/src/chat/tool-group.ts +3 -2
  18. package/examples/extensions/ashi/src/cli.ts +9 -8
  19. package/examples/extensions/ashi/src/dialogs.ts +16 -1
  20. package/examples/extensions/ashi/src/events.ts +1 -0
  21. package/examples/extensions/ashi/src/frontend.ts +26 -6
  22. package/examples/extensions/ashi/src/renderer.ts +24 -4
  23. package/examples/extensions/ashi/src/renderers/pi-tui/schema-mount.ts +4 -3
  24. package/examples/extensions/ashi/src/renderers/pi-tui/tool-group.ts +5 -8
  25. package/examples/extensions/ashi/src/ui.ts +11 -0
  26. package/examples/extensions/ashi-ink/package.json +2 -2
  27. package/examples/extensions/claude-code-bridge/package.json +1 -1
  28. package/examples/extensions/opencode-bridge/package.json +1 -1
  29. package/package.json +3 -1
  30. package/src/agent/agent-loop.ts +1566 -0
  31. package/src/agent/entry-format.ts +19 -0
  32. package/src/agent/events.ts +153 -0
  33. package/src/agent/extensions/rolling-history/constants.ts +1 -0
  34. package/src/agent/extensions/rolling-history/index.ts +202 -0
  35. package/src/agent/extensions/rolling-history/recall.ts +131 -0
  36. package/src/agent/extensions/rolling-history/strategy.ts +404 -0
  37. package/src/agent/host-types.ts +192 -0
  38. package/src/agent/index.ts +591 -0
  39. package/src/agent/live-view.ts +279 -0
  40. package/src/agent/llm-client.ts +111 -0
  41. package/src/agent/llm-facade.ts +43 -0
  42. package/src/agent/normalize-args.ts +61 -0
  43. package/src/agent/nuclear-form.ts +382 -0
  44. package/src/agent/providers/deepseek.ts +39 -0
  45. package/src/agent/providers/ollama.ts +92 -0
  46. package/src/agent/providers/openai-compatible.ts +36 -0
  47. package/src/agent/providers/openai.ts +52 -0
  48. package/src/agent/providers/opencode.ts +142 -0
  49. package/src/agent/providers/openrouter.ts +105 -0
  50. package/src/agent/providers/zai-coding-plan.ts +33 -0
  51. package/src/agent/session-store.ts +336 -0
  52. package/src/agent/skills.ts +228 -0
  53. package/src/agent/store.ts +310 -0
  54. package/src/agent/subagent.ts +305 -0
  55. package/src/agent/system-prompt.ts +151 -0
  56. package/src/agent/token-budget.ts +12 -0
  57. package/src/agent/tool-protocol.ts +722 -0
  58. package/src/agent/tool-registry.ts +66 -0
  59. package/src/agent/tools/bash.ts +95 -0
  60. package/src/agent/tools/edit-file.ts +154 -0
  61. package/src/agent/tools/expand-home.ts +7 -0
  62. package/src/agent/tools/glob.ts +108 -0
  63. package/src/agent/tools/grep.ts +228 -0
  64. package/src/agent/tools/list-skills.ts +37 -0
  65. package/src/agent/tools/ls.ts +81 -0
  66. package/src/agent/tools/pwsh.ts +140 -0
  67. package/src/agent/tools/read-file.ts +164 -0
  68. package/src/agent/tools/write-file.ts +72 -0
  69. package/src/agent/types.ts +149 -0
  70. package/src/cli/args.ts +91 -0
  71. package/src/cli/auth/cli.ts +244 -0
  72. package/src/cli/auth/discover.ts +52 -0
  73. package/src/cli/auth/keys.ts +143 -0
  74. package/src/cli/index.ts +295 -0
  75. package/src/cli/init.ts +74 -0
  76. package/src/cli/install.ts +439 -0
  77. package/src/cli/shell-env.ts +68 -0
  78. package/src/cli/subcommands.ts +24 -0
  79. package/src/core/event-bus.ts +252 -0
  80. package/src/core/extension-loader.ts +347 -0
  81. package/src/core/index.ts +152 -0
  82. package/src/core/settings.ts +398 -0
  83. package/src/core/types.ts +61 -0
  84. package/src/extensions/file-autocomplete.ts +71 -0
  85. package/src/extensions/index.ts +38 -0
  86. package/src/extensions/slash-commands/events.ts +14 -0
  87. package/src/extensions/slash-commands/index.ts +269 -0
  88. package/src/shell/events.ts +73 -0
  89. package/src/shell/host-types.ts +150 -0
  90. package/src/shell/index.ts +159 -0
  91. package/src/shell/input-handler.ts +505 -0
  92. package/src/shell/output-parser.ts +156 -0
  93. package/src/shell/shell-context.ts +193 -0
  94. package/src/shell/shell.ts +414 -0
  95. package/src/shell/strategies/bash.ts +83 -0
  96. package/src/shell/strategies/fish.ts +77 -0
  97. package/src/shell/strategies/index.ts +24 -0
  98. package/src/shell/strategies/types.ts +64 -0
  99. package/src/shell/strategies/zsh.ts +92 -0
  100. package/src/shell/terminal.ts +124 -0
  101. package/src/shell/tui-input-view.ts +222 -0
  102. package/src/shell/tui-renderer.ts +1126 -0
  103. package/src/utils/ansi.ts +140 -0
  104. package/src/utils/box-frame.ts +138 -0
  105. package/src/utils/compositor.ts +157 -0
  106. package/src/utils/diff-renderer.ts +829 -0
  107. package/src/utils/diff.ts +244 -0
  108. package/src/utils/executor.ts +305 -0
  109. package/src/utils/file-watcher.ts +110 -0
  110. package/src/utils/floating-panel.ts +1160 -0
  111. package/src/utils/handler-registry.ts +110 -0
  112. package/src/utils/line-editor.ts +636 -0
  113. package/src/utils/markdown.ts +437 -0
  114. package/src/utils/message-utils.ts +113 -0
  115. package/src/utils/package-version.ts +12 -0
  116. package/src/utils/palette.ts +64 -0
  117. package/src/utils/ref-counter.ts +9 -0
  118. package/src/utils/ripgrep-path.ts +17 -0
  119. package/src/utils/shell-output-spill.ts +76 -0
  120. package/src/utils/stream-transform.ts +292 -0
  121. package/src/utils/terminal-buffer.ts +213 -0
  122. package/src/utils/tool-display.ts +315 -0
  123. package/src/utils/tool-interactive.ts +71 -0
  124. package/src/utils/tty.ts +14 -0
@@ -528,14 +528,15 @@ export class AgentLoop {
528
528
  // Advisable so extensions can inject fallback parsers without
529
529
  // subclassing the protocol.
530
530
  h.define("tool-protocol:extract-calls", (args) => this.toolProtocol.extractToolCalls(args.text, args.streamedCalls));
531
- // System prompt: static identity + behavioral instructions.
532
- // Extensions can use registerInstruction() for a managed section,
533
- // advise system-prompt:frontend to describe their surface high in the
534
- // prompt, or advise this handler directly for full control.
531
+ // System prompt: static identity + behavioral instructions. Extensions can
532
+ // use registerInstruction() for a managed section, advise system-prompt:identity
533
+ // to replace the kernel identity, advise system-prompt:frontend to describe their
534
+ // surface high in the prompt, or advise system-prompt:build directly for full control.
535
+ h.define("system-prompt:identity", () => STATIC_IDENTITY);
535
536
  h.define("system-prompt:build", () => {
536
537
  // The active frontend's surface goes right after the identity; omitted if none.
537
538
  const frontend = (this.handlers.call("system-prompt:frontend") ?? "").trim();
538
- const parts = [STATIC_IDENTITY];
539
+ const parts = [this.handlers.call("system-prompt:identity")];
539
540
  if (frontend)
540
541
  parts.push(frontend);
541
542
  parts.push(STATIC_GUIDE);
@@ -1229,8 +1230,10 @@ export class AgentLoop {
1229
1230
  let reasoning = "";
1230
1231
  const reasoningDetailsByIndex = new Map();
1231
1232
  const pendingToolCalls = [];
1232
- // Tool protocol controls what goes in the API tools param vs dynamic context
1233
- const toolView = this.getTools();
1233
+ // Tool protocol controls what goes in the API tools param vs dynamic context.
1234
+ // agent:tools:visible is a filter point on the assembled list — distinct from
1235
+ // getTools(), which other code (e.g. tool bridges) needs unfiltered.
1236
+ const toolView = this.bus.emitPipe("agent:tools:visible", { tools: this.getTools() }).tools;
1234
1237
  const apiTools = this.toolProtocol.getApiTools(toolView);
1235
1238
  const toolPrompt = this.toolProtocol.getToolPrompt(toolView);
1236
1239
  // Dynamic context rides on the trailing message — see
@@ -1242,7 +1245,7 @@ export class AgentLoop {
1242
1245
  // Let extensions transform the message array (compact, summarize, filter, etc.)
1243
1246
  const messages = this.handlers.call("conversation:prepare", rawMessages);
1244
1247
  // Stream filter strips tool tags from display (inline mode only)
1245
- const streamFilter = this.toolProtocol.createStreamFilter(this.getTools().map((t) => t.name));
1248
+ const streamFilter = this.toolProtocol.createStreamFilter(toolView.map((t) => t.name));
1246
1249
  const requestParams = {
1247
1250
  messages,
1248
1251
  tools: apiTools,
@@ -30,6 +30,10 @@ declare module "../core/event-bus.js" {
30
30
  "agent:tools": {
31
31
  tools: ToolDefinition[];
32
32
  };
33
+ /** Filter point: the assembled tool list as the model will see it, after getTools(). */
34
+ "agent:tools:visible": {
35
+ tools: ToolDefinition[];
36
+ };
33
37
  "agent:instructions": {
34
38
  instructions: Array<{
35
39
  name: string;
package/docs/README.md ADDED
@@ -0,0 +1,14 @@
1
+ # agent-sh Documentation
2
+
3
+ Start with **Usage** to get running, then **Architecture** for the mental model. Everything else builds on those two.
4
+
5
+ ## Guides
6
+
7
+ 1. [Usage Guide](usage.md) — install, run, configure providers and models
8
+ 2. [Architecture](architecture.md) — pure kernel + extensions, the shell ↔ agent boundary, project structure
9
+ 3. [The Built-in Agent: ash](agent.md) — how the default backend works: query flow, tools, system prompt, model switching
10
+ 4. [Context Management](context-management.md) — shell-output spill, three-tier conversation compaction, recall APIs
11
+ 5. [Extensions](extensions.md) — event bus, content transforms, custom agent backends, theming
12
+ 6. [TUI Composition](tui-composition.md) — compositor, render surfaces, stream routing
13
+ 7. [Library Usage](library.md) — embedding agent-sh in your own apps
14
+ 8. [Troubleshooting](troubleshooting.md) — common errors and debug mode
package/docs/agent.md ADDED
@@ -0,0 +1,398 @@
1
+ # The Built-in Agent: ash
2
+
3
+ agent-sh is designed to be backend-agnostic. The agent that drives a query — assembling context, calling an LLM, executing tools in a loop — is a replaceable component. Any extension can register a backend via `agent:register-backend` and become the default via the `defaultBackend` setting or the `/backend` slash command. Bridge backends like `claude-code` and `pi` plug external CLI agents into the same shell and TUI surface.
4
+
5
+ This document describes **ash**, the built-in backend that ships with agent-sh. The agent host (`src/agent/index.ts`) is activated unconditionally via `activateAgent(ctx)` — it attaches the `ctx.agent` surface, registers core tools, and emits `agent:register-backend` to register `ash` with the core's backend registry. ash only *activates* (constructs its `AgentLoop` and starts handling queries) when an LLM provider has both an apiKey and a model resolved, and `activateBackend("ash")` runs. It resolves providers from registered catalogs + settings overlay, configures an `LlmClient`, and calls any OpenAI-compatible API directly. It manages conversation state and executes tools in a loop until the LLM is done.
6
+
7
+ If you're looking to write your own backend instead of reading how ash works internally, see [Extensions: Custom Agent Backends](extensions.md#custom-agent-backends).
8
+
9
+ ## The Query Flow
10
+
11
+ Here's what happens when you submit a query:
12
+
13
+ ```
14
+ User types "> fix the failing test"
15
+
16
+ ├─ 1. Context assembly — gather recent shell commands, output, cwd
17
+ ├─ 2. System prompt (cached per cwd) + dynamic context (rebuilt every LLM call)
18
+ ├─ 3. LLM call — stream response from the API
19
+ ├─ 4. Tool loop — if LLM requested tool calls:
20
+ │ ├─ Execute each tool (with permission check if needed)
21
+ │ ├─ Add results to conversation
22
+ │ └─ Go back to step 3 (LLM sees tool results, decides next action)
23
+ └─ 5. Done — no more tool calls, emit response
24
+ ```
25
+
26
+ The key insight: **the agent is a loop, not a single call**. The LLM calls tools, sees results, calls more tools, until it has enough information to respond. A single query might trigger dozens of LLM calls and tool executions.
27
+
28
+ ## Context Assembly
29
+
30
+ Every query draws on two distinct streams of context:
31
+
32
+ - **Shell context** — the user's terminal activity (commands + outputs) plus the live cwd. This is what lets ash understand "fix this" after you ran a failing command, and what keeps it anchored in the right working directory across compactions. The current cwd is wrapped as `<cwd>` (always) and new shell activity since the last turn as `<shell_events>` (when there is any), both nested inside the per-query `<query_context>` envelope and prepended to your user message.
33
+ - **Conversation state** — the OpenAI chat messages array (`user`/`assistant`/`tool` messages). This is the LLM's memory of what it already said and did within this session.
34
+
35
+ The two streams don't overlap: agent tool outputs live only in the conversation, and shell context tracks only user-initiated activity. When either stream grows large, ash has escape hatches rather than silent truncation:
36
+
37
+ - **Long shell outputs** are spilled to tempfiles (`<tmpdir>/agent-sh-<pid>/<id>.out`) at capture time. The LLM sees a head+tail stub with the path and recovers the full output via plain `read_file`.
38
+ - **Older conversation turns** are compacted by the built-in `rolling-history` extension: each is nucleated into a one-line summary in a persistent store (`~/.agent-sh/rolling-history/history.jsonl`), with the full message kept in an ephemeral per-session cache. The `conversation_recall` tool browses, searches, and expands those entries.
39
+
40
+ Compaction is pluggable: the `conversation:compact` handler is advisable, so extensions can install richer strategies without changing the recall surface. See [Context Management](context-management.md) for the full design.
41
+
42
+ ## System Prompt
43
+
44
+ The system prompt is assembled once per `cwd` and cached (invalidated when the working directory changes), so the prefix is stable for provider-side prompt caching. It includes:
45
+
46
+ 1. **Identity** — "You are an AI coding assistant running inside agent-sh..."
47
+ 2. **Tool decision guide** — when to use which built-in tool
48
+ 3. **Tool usage guidelines** — read before editing, prefer edit over write, use grep/glob to find files, etc.
49
+ 4. **Project conventions** — `CLAUDE.md`/`AGENT.md` walked from cwd to root (cwd-stable; see next section)
50
+ 5. **Skills** — discovered project/global skills (cwd-stable)
51
+ 6. **Extension instructions** — blocks registered by extensions via `registerInstruction()` (e.g. proactive recall guidance)
52
+ 7. **Available tools** — name + description of every registered tool
53
+ 8. **Extension-appended content** — extensions can advise `system-prompt:build` to append additional context (instance IDs, memory files, etc.)
54
+
55
+ Per-turn signals live in two symmetric handlers, both empty by default:
56
+
57
+ - **`query-context:build`** — fires once at user-query start. Output is wrapped in `<query_context>` and frozen into the user message, so it persists in conversation history. Shell context is the canonical example (`<cwd>` always, `<shell_events>` when there is fresh activity); other "what happened between turns" signals (notifications, calendar/inbox deltas) go here too.
58
+ - **`dynamic-context:build`** — fires on every LLM call (each tool-loop iteration). Output is wrapped in `<dynamic_context>` and ephemerally prepended to the trailing message at request time, so the cacheable prefix stays byte-stable. Use for "current state" signals: in-flight subagents, threshold warnings, active mode markers.
59
+
60
+ Extensions populate either via `ctx.agent.registerContextProducer(name, fn, { mode: "per-query" | "per-request" })`. When no producer contributes, no envelope tag is emitted at all — vanilla sessions send exactly `[system, ...history]`.
61
+
62
+ ## Project Conventions
63
+
64
+ The agent automatically loads `CLAUDE.md` or `AGENT.md` files from your working directory hierarchy. These are included in the system prompt on every query, so the agent respects project-specific conventions without being told each time.
65
+
66
+ The agent scans from your current directory upward to the filesystem root. In each directory it checks for `CLAUDE.md` first, then `AGENT.md` as a fallback (only one per directory). Files are included root-first, so more specific project conventions appear last and take precedence.
67
+
68
+ ```
69
+ ~/projects/myapp/src/ ← cwd
70
+ ~/projects/myapp/CLAUDE.md ← included (project-level)
71
+ ~/CLAUDE.md ← included first (global conventions)
72
+ ```
73
+
74
+ Since the system prompt is rebuilt on every query, `cd`-ing to a new project picks up its conventions automatically.
75
+
76
+ This follows the same convention as Claude Code — if you already have `CLAUDE.md` files, they work out of the box.
77
+
78
+ ## Skills
79
+
80
+ Skills are reusable instruction sets that the agent can load on demand. They follow the [Agent Skills standard](https://agentskills.io/specification).
81
+
82
+ ### Skill format
83
+
84
+ A skill is a directory containing a `SKILL.md` file with YAML frontmatter:
85
+
86
+ ```markdown
87
+ ---
88
+ name: docker-deploy
89
+ description: Build and deploy Docker containers to production
90
+ ---
91
+
92
+ # Docker Deploy
93
+
94
+ ## Steps
95
+
96
+ 1. Build the image: `docker build -t app .`
97
+ 2. Tag for registry: `docker tag app registry.example.com/app:latest`
98
+ 3. Push: `docker push registry.example.com/app:latest`
99
+ ...
100
+ ```
101
+
102
+ The `name` and `description` fields are required. An optional `disable-model-invocation: true` hides the skill from the agent's automatic discovery.
103
+
104
+ ### Discovery
105
+
106
+ **Global skills** are discovered from `~/.agent-sh/skills/` by default. Add more locations via `skillPaths` in `~/.agent-sh/settings.json`:
107
+
108
+ ```json
109
+ {
110
+ "skillPaths": ["~/.agents/skills", "~/.claude/skills"]
111
+ }
112
+ ```
113
+
114
+ **Project skills** are discovered from `.agents/skills/` in your working directory hierarchy (up to the git root). When you `cd` into a directory with new project skills, the agent is notified with their names.
115
+
116
+ ### How the agent uses skills
117
+
118
+ Only skill **metadata** (name, description, file path) is included in the system prompt — not the full skill content. This keeps the prompt small regardless of how many skills you have.
119
+
120
+ 1. The system prompt lists available skills with their descriptions and paths
121
+ 2. The agent decides which skill is relevant (no extra round-trip needed)
122
+ 3. The agent calls `read_file` on the skill's `SKILL.md` to load full instructions when ready to use it
123
+
124
+ The `list_skills` tool is also available for broader discovery.
125
+
126
+ ### Slash command
127
+
128
+ Users can force-load a skill directly:
129
+
130
+ ```
131
+ > /skill:docker-deploy
132
+ > /skill:docker-deploy deploy the staging branch
133
+ ```
134
+
135
+ This injects the full skill content into the conversation. Tab completion works for skill names.
136
+
137
+ ## The Tool Loop
138
+
139
+ This is the core of how the agent works. After each LLM call, the agent checks if the response includes tool calls. If yes, it executes them and feeds the results back to the LLM.
140
+
141
+ ```
142
+ LLM response
143
+ ├─ Text only → done, emit response
144
+ └─ Tool calls → for each tool call:
145
+ ├─ Look up tool in registry
146
+ ├─ Execute via the `tool:<name>` handler chain (advisors can wrap)
147
+ ├─ Emit tool events (tool-started, tool-output-chunk, tool-completed)
148
+ ├─ Add tool result to conversation
149
+ └─ After all tools: call LLM again with updated conversation
150
+ ```
151
+
152
+ The loop continues until the LLM returns a response with no tool calls. There's no hard limit on iterations — the LLM decides when it's done.
153
+
154
+ ### Permission gating
155
+
156
+ The kernel has no opinion on permission. By default every tool runs (yolo
157
+ mode). Gating extensions register tool advisors via `ctx.agent.adviseTool(name, ...)`
158
+ to interpose a confirmation prompt, audit log, or policy check before calling
159
+ `next(args, onChunk, ctx)`. See `examples/extensions/interactive-prompts.ts`
160
+ for a reference implementation that gates `bash`, `pwsh`, `write_file`, and
161
+ `edit_file`.
162
+
163
+ ## Built-in Tools
164
+
165
+ Core tools are registered when `activateAgent(ctx)` runs — *before* extensions load. This means extensions can look up or advise tools at their own activate time. Additional tools come from extensions in `~/.agent-sh/extensions/`.
166
+
167
+ ### bash
168
+
169
+ The primary tool for investigation and code execution. **`bash`** runs in an **isolated subprocess** (`/bin/bash -c`). The agent uses this for reading files, running tests, checking state, and executing commands. A `cd` here doesn't affect your shell. Output is captured and returned to the LLM.
170
+
171
+ Extensions can add tools that cross the shell↔agent boundary via `shell:exec-request` — for example, running commands with lasting effects in the live PTY (`cd`, `export`, `source`). We don't include such a tool as built-in because the right behavior depends on user preference. See `examples/extensions/user-shell.ts` for a ready-made implementation to start from.
172
+
173
+ ### All tools
174
+
175
+ | Tool | Purpose | Side effects |
176
+ |---|---|---|
177
+ | `bash` | Run commands in isolated subprocess | Yes |
178
+ | `read_file` | Read file contents (line-numbered, with offset/limit) | No |
179
+ | `write_file` | Create or overwrite a file | Yes |
180
+ | `edit_file` | Find-and-replace in a file (old_text → new_text) | Yes |
181
+ | `grep` | Search file contents with regex (via ripgrep) | No |
182
+ | `glob` | Find files by name pattern | No |
183
+ | `ls` | List directory contents (with timestamps and sizes) | No |
184
+ | `list_skills` | List available skills (name, description, path) | No |
185
+ | `conversation_recall`\* | Browse/search/expand evicted conversation turns from the rolling-history store | No |
186
+
187
+ \* `conversation_recall` is not a core tool — it's registered by the built-in `rolling-history` extension, so it's absent under headless/bridge backends. Every other row is a core tool registered by `activateAgent(ctx)`.
188
+
189
+ **Common pattern**: all file-based tools resolve relative paths from the current working directory, looked up via the `cwd` handler (`ctx.call("cwd")`). The shell-context built-in advises this with the PTY-tracked cwd; without it, tools fall back to `process.cwd()`.
190
+
191
+ ### Tool-specific enhancements
192
+
193
+ **`grep`** supports three output modes and pagination:
194
+
195
+ - `output_mode`: `files_with_matches` (default, file paths only), `content` (matching lines with optional `context_before`/`context_after`), or `count` (match counts per file)
196
+ - `case_insensitive`: case-insensitive search
197
+ - `head_limit` / `offset`: pagination — default limits are 200 entries for `files_with_matches`, 150 for `content`/`count`. Pass `head_limit=0` for unlimited. Long lines in `content` mode are capped at 500 characters.
198
+
199
+ **`read_file`** deduplicates reads:
200
+
201
+ - Tracks file modification time. If a file hasn't changed since the last read (same offset/limit), returns a stub instead of re-reading — saves context tokens.
202
+ - Files over 2MB require `offset` and `limit` to prevent OOM.
203
+ - Cache is automatically invalidated when a file-modifying tool (`write_file`, `edit_file`) succeeds on the same path.
204
+
205
+ **`edit_file`** provides diagnostic hints:
206
+
207
+ - When `old_text` isn't found, the tool searches for the closest match and suggests fixes (e.g. whitespace differences, wrong line location).
208
+
209
+ **`glob`** returns results sorted by modification time (newest first), capped at 200 files.
210
+
211
+ **`ls`** returns formatted output with timestamps (YYYY-MM-DD HH:MM) and human-readable file sizes.
212
+
213
+ ### Tool batching and parallel execution
214
+
215
+ When the LLM requests multiple tool calls in a single response, the agent groups and executes them efficiently:
216
+
217
+ 1. **Batch event** — before execution, the agent emits `agent:tool-batch` with tools grouped by kind (`read`, `search`, `execute`, etc.). The TUI uses this to render group headers with tree-style connectors.
218
+
219
+ 2. **Parallel execution** — side-effect-free tools (`modifiesFiles` unset) run in parallel via `Promise.all`. Side-effecting tools run sequentially.
220
+
221
+ 3. **Output truncation** — tool results over 16KB (~4K tokens) are head+tail truncated before being added to the conversation, preventing a single tool call from blowing through the context window.
222
+
223
+ ### Structured result display
224
+
225
+ Tools can provide structured result information for the TUI via two optional methods on `ToolDefinition`:
226
+
227
+ - **`formatCall(args)`** — returns a short display string when the tool is called (e.g. the file path or search pattern). Shown in the TUI next to the tool icon.
228
+ - **`formatResult(args, result)`** — returns a `ToolResultDisplay` with an optional `summary` string (e.g. "42 files", "cached") and an optional structured `body` for richer rendering (diffs, line lists). The TUI's `render:result-body` handler renders the body — extensions can advise it.
229
+
230
+ ### Retry and error handling
231
+
232
+ The agent retries transient failures with exponential backoff:
233
+
234
+ - **Context overflow** — compacts the conversation and retries immediately
235
+ - **Rate limits (429)** — respects `Retry-After` header, otherwise backs off exponentially
236
+ - **Transient errors (500/502/503, network)** — exponential backoff (1s, 2s, 4s..., capped at 30s), up to 3 retries
237
+ - **Non-retryable errors** — reported with provider-aware context (model name, endpoint, actionable hints)
238
+
239
+ ### Thinking levels
240
+
241
+ The agent supports configurable thinking/reasoning levels for models that support `reasoning_effort`:
242
+
243
+ - Levels: `off` (default), `low`, `medium`, `high`, `xhigh` (`xhigh` falls back to `high` on providers that don't support it)
244
+ - Set via the `config:set-thinking` event (wired to `/thinking` slash command)
245
+ - Query current state via `config:get-thinking` pipe
246
+ - The agent validates that the current model/provider supports reasoning before enabling
247
+
248
+ ### Echoing reasoning back to the model
249
+
250
+ DeepSeek-family models require their previous-turn `reasoning_content` / `reasoning_details` to be echoed back on the next assistant message. Most other reasoning models do **not** want that — feeding prior chain-of-thought back through lenient OpenAI-compatible shims can register as out-of-distribution input and degrade quality.
251
+
252
+ agent-sh gates this behavior per-model via the `Model.echoReasoning` flag (default `false`). Reasoning extras are only attached to assistant messages when the active model opts in.
253
+
254
+ For OpenRouter, the flag is set automatically: model ids matching the built-in pattern `/deepseek/i` (V3, V3.2, V4, rebadges) get `echoReasoning: true`. You can extend or override this in `settings.json`:
255
+
256
+ ```json
257
+ {
258
+ "providers": {
259
+ "openrouter": {
260
+ "echoReasoningPatterns": ["my-custom-deepseek-fork"],
261
+ "models": [
262
+ { "id": "deepseek/deepseek-v3.2", "echoReasoning": false },
263
+ { "id": "openai/gpt-5.5", "reasoning": true }
264
+ ]
265
+ }
266
+ }
267
+ }
268
+ ```
269
+
270
+ - `echoReasoningPatterns` — regex **source strings** (no `/.../` delimiters), each compiled with the case-insensitive flag and tested against the model id. Examples: `"deepseek"` (substring match), `"^vendor/.*-r1$"` (anchored). Merged with the built-in `deepseek` pattern.
271
+ - Per-model `echoReasoning` — explicit boolean override that always wins over patterns.
272
+ - Invalid regexes are silently skipped, so a typo can't break provider registration.
273
+
274
+ ### Tool interface
275
+
276
+ Every tool implements this interface:
277
+
278
+ ```typescript
279
+ interface ToolDefinition {
280
+ name: string;
281
+ displayName?: string; // short label for TUI (defaults to name)
282
+ description: string;
283
+ input_schema: Record<string, unknown>; // JSON Schema for parameters
284
+
285
+ execute(
286
+ args: Record<string, unknown>,
287
+ onChunk?: (chunk: string) => void, // optional streaming callback
288
+ ): Promise<ToolResult>;
289
+
290
+ modifiesFiles?: boolean; // has side effects (skips caching + parallel execution)
291
+ readOnly?: boolean; // safe to run without nuclear-form gating (registry tags accordingly)
292
+ showOutput?: boolean; // stream output to TUI (default: true)
293
+
294
+ // Display hooks (all optional)
295
+ getDisplayInfo?: (args) => ToolDisplayInfo; // icon, kind, file locations
296
+ formatCall?: (args) => string; // short call summary for TUI
297
+ formatResult?: (args, result) => ToolResultDisplay; // structured result
298
+ }
299
+
300
+ interface ToolResult {
301
+ content: string; // text returned to the LLM
302
+ exitCode: number | null;
303
+ isError: boolean;
304
+ }
305
+
306
+ interface ToolResultDisplay {
307
+ summary?: string; // one-line (e.g. "42 files", "+3/-1")
308
+ body?: ToolResultBody; // structured content for richer rendering
309
+ }
310
+
311
+ type ToolResultBody =
312
+ | { kind: "diff"; diff: unknown; filePath: string }
313
+ | { kind: "lines"; lines: string[]; maxLines?: number }
314
+
315
+ interface ToolDisplayInfo {
316
+ kind: "read" | "write" | "execute" | "search";
317
+ locations?: { path: string; line?: number | null }[];
318
+ icon?: string; // custom icon (e.g. "◆", "⌕")
319
+ }
320
+ ```
321
+
322
+ The `onChunk` callback enables streaming tool output to the TUI in real-time (used by `bash`). Tools that don't stream (like `read_file`) just return the final result. Extensions can wrap `onChunk` via the `tool:execute` handler to intercept or transform streamed output (e.g. secret redaction).
323
+
324
+ ## Streaming
325
+
326
+ Response streaming has two phases:
327
+
328
+ **Phase 1 — LLM stream**: The agent iterates chunks from the OpenAI streaming API. Each chunk can contain:
329
+ - `delta.content` — response text
330
+ - `delta.tool_calls` — tool call arguments (streamed incrementally, parsed by index)
331
+ - `delta.reasoning_content` — thinking/reasoning tokens (non-standard, used by models like DeepSeek-r1)
332
+
333
+ **Phase 2 — Content transform pipeline**: Text chunks are emitted via `bus.emitTransform("agent:response-chunk", { blocks })`. This runs the content through the extension transform pipeline (parsers, post-transforms) before the renderer sees it. See [Extensions: Content Transform Pipeline](extensions.md#content-transform-pipeline).
334
+
335
+ The agent accumulates the full response text separately for the final `agent:response-done` event.
336
+
337
+ ## Conversation State
338
+
339
+ The conversation state is an OpenAI-compatible chat messages array. Each query adds messages:
340
+
341
+ ```
342
+ User submits query → { role: "user", content: "fix the test" }
343
+ LLM responds with text → { role: "assistant", content: "I'll look at..." }
344
+ LLM requests tool call → { role: "assistant", tool_calls: [...] }
345
+ Tool returns result → { role: "tool", tool_call_id: "...", content: "..." }
346
+ ```
347
+
348
+ This array grows with every turn. To prevent context overflow, ash auto-compacts when estimated prompt tokens cross `autoCompactThreshold` (default 0.5) of the model's usable context window.
349
+
350
+ ### Auto-compaction
351
+
352
+ Before each LLM call, ash estimates the total prompt tokens. If it's over the threshold, it invokes the `conversation:compact` handler to free space, then proceeds. If the API still returns a context-overflow error, ash compacts more aggressively and retries once; if compaction frees nothing, it aborts rather than looping.
353
+
354
+ The default compaction strategy evicts older turns into the nuclear archive and leaves a bridge note; `conversation_recall` can bring them back on demand. See [Context Management](context-management.md#conversation-compaction) for the three-tier design and how to swap the strategy.
355
+
356
+ The user can also trigger compaction manually with `/compact`.
357
+
358
+ **Note**: reasoning/thinking tokens from the LLM stream are emitted as `agent:thinking-chunk` events for display but are **not stored in conversation state**. They're ephemeral — the LLM doesn't see its own reasoning on the next turn.
359
+
360
+ ## Provider Profiles & Model Switching
361
+
362
+ ash supports multiple models and providers, switchable at runtime.
363
+
364
+ ### Models
365
+
366
+ Each entry is a `(provider, model)` target — a serializable identity plus capabilities:
367
+
368
+ ```typescript
369
+ interface Model {
370
+ id: string; // model id, e.g. "openai/gpt-5"
371
+ provider: string; // identity is the (provider, id) pair
372
+ contextWindow?: number; // per-model override for the auto-compact threshold
373
+ maxTokens?: number;
374
+ reasoning?: boolean;
375
+ supportsReasoningEffort?: boolean;
376
+ echoReasoning?: boolean;
377
+ modalities?: ("text" | "image")[];
378
+ }
379
+ ```
380
+
381
+ The credentials and provider-shape transforms needed to actually call a model — `apiKey`/`baseURL` plus the reasoning/cache encoders — live in a separate `ModelEndpoint`, resolved internally by `(provider, id)`. It never travels on a frontend-facing event, so `Model` stays secret-free and serializable.
382
+
383
+ `agent:get-models` returns the catalog; `agent:models-changed` fires when it changes. When all models share the same provider, switching just changes the model. When they span providers (e.g. OpenAI + Anthropic via OpenRouter), switching also reconfigures the LLM client with the new endpoint's credentials and base URL.
384
+
385
+ ### Switching
386
+
387
+ - **`/model`** — show the current model
388
+ - **`/model <name>`** — switch to a specific model (may cross providers; credentials and base URL are reconfigured automatically)
389
+
390
+ The current model is shown in the TUI prompt. Switching mid-conversation preserves the conversation state — only the LLM endpoint changes.
391
+
392
+ To swap the backend itself (e.g. to `claude-code` or `pi`), use `/backend <name>` or set `defaultBackend` in settings.
393
+
394
+ ## Extension Tools
395
+
396
+ Extensions can register custom tools via `ctx.agent.registerTool()`. These appear alongside built-in tools and follow the same `ToolDefinition` interface. Only works with the built-in `ash` backend — bridge backends manage their own tools.
397
+
398
+ See [Extensions: ExtensionContext API](extensions.md#extensioncontext-api) for the interface and [Extensions: Custom Agent Backends](extensions.md#custom-agent-backends) for writing backend extensions.