agent-sh 0.15.0 → 0.15.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/agent-loop.js +11 -8
- package/dist/agent/events.d.ts +4 -0
- package/docs/README.md +14 -0
- package/docs/agent.md +398 -0
- package/docs/architecture.md +196 -0
- package/docs/context-management.md +200 -0
- package/docs/extensions.md +951 -0
- package/docs/library.md +84 -0
- package/docs/troubleshooting.md +65 -0
- package/docs/tui-composition.md +294 -0
- package/docs/usage.md +306 -0
- package/examples/extensions/ash-scheme/package.json +1 -1
- package/examples/extensions/ashi/EXTENDING.md +2 -2
- package/examples/extensions/ashi/README.md +2 -2
- package/examples/extensions/ashi/docs/ui-surface-protocol.md +1 -1
- package/examples/extensions/ashi/package.json +5 -3
- package/examples/extensions/ashi/src/chat/tool-group.ts +3 -2
- package/examples/extensions/ashi/src/cli.ts +9 -8
- package/examples/extensions/ashi/src/dialogs.ts +16 -1
- package/examples/extensions/ashi/src/events.ts +1 -0
- package/examples/extensions/ashi/src/frontend.ts +26 -6
- package/examples/extensions/ashi/src/renderer.ts +24 -4
- package/examples/extensions/ashi/src/renderers/pi-tui/schema-mount.ts +4 -3
- package/examples/extensions/ashi/src/renderers/pi-tui/tool-group.ts +5 -8
- package/examples/extensions/ashi/src/ui.ts +11 -0
- package/examples/extensions/ashi-ink/package.json +2 -2
- package/examples/extensions/claude-code-bridge/package.json +1 -1
- package/examples/extensions/opencode-bridge/package.json +1 -1
- package/package.json +3 -1
- package/src/agent/agent-loop.ts +1566 -0
- package/src/agent/entry-format.ts +19 -0
- package/src/agent/events.ts +153 -0
- package/src/agent/extensions/rolling-history/constants.ts +1 -0
- package/src/agent/extensions/rolling-history/index.ts +202 -0
- package/src/agent/extensions/rolling-history/recall.ts +131 -0
- package/src/agent/extensions/rolling-history/strategy.ts +404 -0
- package/src/agent/host-types.ts +192 -0
- package/src/agent/index.ts +591 -0
- package/src/agent/live-view.ts +279 -0
- package/src/agent/llm-client.ts +111 -0
- package/src/agent/llm-facade.ts +43 -0
- package/src/agent/normalize-args.ts +61 -0
- package/src/agent/nuclear-form.ts +382 -0
- package/src/agent/providers/deepseek.ts +39 -0
- package/src/agent/providers/ollama.ts +92 -0
- package/src/agent/providers/openai-compatible.ts +36 -0
- package/src/agent/providers/openai.ts +52 -0
- package/src/agent/providers/opencode.ts +142 -0
- package/src/agent/providers/openrouter.ts +105 -0
- package/src/agent/providers/zai-coding-plan.ts +33 -0
- package/src/agent/session-store.ts +336 -0
- package/src/agent/skills.ts +228 -0
- package/src/agent/store.ts +310 -0
- package/src/agent/subagent.ts +305 -0
- package/src/agent/system-prompt.ts +151 -0
- package/src/agent/token-budget.ts +12 -0
- package/src/agent/tool-protocol.ts +722 -0
- package/src/agent/tool-registry.ts +66 -0
- package/src/agent/tools/bash.ts +95 -0
- package/src/agent/tools/edit-file.ts +154 -0
- package/src/agent/tools/expand-home.ts +7 -0
- package/src/agent/tools/glob.ts +108 -0
- package/src/agent/tools/grep.ts +228 -0
- package/src/agent/tools/list-skills.ts +37 -0
- package/src/agent/tools/ls.ts +81 -0
- package/src/agent/tools/pwsh.ts +140 -0
- package/src/agent/tools/read-file.ts +164 -0
- package/src/agent/tools/write-file.ts +72 -0
- package/src/agent/types.ts +149 -0
- package/src/cli/args.ts +91 -0
- package/src/cli/auth/cli.ts +244 -0
- package/src/cli/auth/discover.ts +52 -0
- package/src/cli/auth/keys.ts +143 -0
- package/src/cli/index.ts +295 -0
- package/src/cli/init.ts +74 -0
- package/src/cli/install.ts +439 -0
- package/src/cli/shell-env.ts +68 -0
- package/src/cli/subcommands.ts +24 -0
- package/src/core/event-bus.ts +252 -0
- package/src/core/extension-loader.ts +347 -0
- package/src/core/index.ts +152 -0
- package/src/core/settings.ts +398 -0
- package/src/core/types.ts +61 -0
- package/src/extensions/file-autocomplete.ts +71 -0
- package/src/extensions/index.ts +38 -0
- package/src/extensions/slash-commands/events.ts +14 -0
- package/src/extensions/slash-commands/index.ts +269 -0
- package/src/shell/events.ts +73 -0
- package/src/shell/host-types.ts +150 -0
- package/src/shell/index.ts +159 -0
- package/src/shell/input-handler.ts +505 -0
- package/src/shell/output-parser.ts +156 -0
- package/src/shell/shell-context.ts +193 -0
- package/src/shell/shell.ts +414 -0
- package/src/shell/strategies/bash.ts +83 -0
- package/src/shell/strategies/fish.ts +77 -0
- package/src/shell/strategies/index.ts +24 -0
- package/src/shell/strategies/types.ts +64 -0
- package/src/shell/strategies/zsh.ts +92 -0
- package/src/shell/terminal.ts +124 -0
- package/src/shell/tui-input-view.ts +222 -0
- package/src/shell/tui-renderer.ts +1126 -0
- package/src/utils/ansi.ts +140 -0
- package/src/utils/box-frame.ts +138 -0
- package/src/utils/compositor.ts +157 -0
- package/src/utils/diff-renderer.ts +829 -0
- package/src/utils/diff.ts +244 -0
- package/src/utils/executor.ts +305 -0
- package/src/utils/file-watcher.ts +110 -0
- package/src/utils/floating-panel.ts +1160 -0
- package/src/utils/handler-registry.ts +110 -0
- package/src/utils/line-editor.ts +636 -0
- package/src/utils/markdown.ts +437 -0
- package/src/utils/message-utils.ts +113 -0
- package/src/utils/package-version.ts +12 -0
- package/src/utils/palette.ts +64 -0
- package/src/utils/ref-counter.ts +9 -0
- package/src/utils/ripgrep-path.ts +17 -0
- package/src/utils/shell-output-spill.ts +76 -0
- package/src/utils/stream-transform.ts +292 -0
- package/src/utils/terminal-buffer.ts +213 -0
- package/src/utils/tool-display.ts +315 -0
- package/src/utils/tool-interactive.ts +71 -0
- package/src/utils/tty.ts +14 -0
package/dist/agent/agent-loop.js
CHANGED
|
@@ -528,14 +528,15 @@ export class AgentLoop {
|
|
|
528
528
|
// Advisable so extensions can inject fallback parsers without
|
|
529
529
|
// subclassing the protocol.
|
|
530
530
|
h.define("tool-protocol:extract-calls", (args) => this.toolProtocol.extractToolCalls(args.text, args.streamedCalls));
|
|
531
|
-
// System prompt: static identity + behavioral instructions.
|
|
532
|
-
//
|
|
533
|
-
// advise system-prompt:frontend to describe their
|
|
534
|
-
// prompt, or advise
|
|
531
|
+
// System prompt: static identity + behavioral instructions. Extensions can
|
|
532
|
+
// use registerInstruction() for a managed section, advise system-prompt:identity
|
|
533
|
+
// to replace the kernel identity, advise system-prompt:frontend to describe their
|
|
534
|
+
// surface high in the prompt, or advise system-prompt:build directly for full control.
|
|
535
|
+
h.define("system-prompt:identity", () => STATIC_IDENTITY);
|
|
535
536
|
h.define("system-prompt:build", () => {
|
|
536
537
|
// The active frontend's surface goes right after the identity; omitted if none.
|
|
537
538
|
const frontend = (this.handlers.call("system-prompt:frontend") ?? "").trim();
|
|
538
|
-
const parts = [
|
|
539
|
+
const parts = [this.handlers.call("system-prompt:identity")];
|
|
539
540
|
if (frontend)
|
|
540
541
|
parts.push(frontend);
|
|
541
542
|
parts.push(STATIC_GUIDE);
|
|
@@ -1229,8 +1230,10 @@ export class AgentLoop {
|
|
|
1229
1230
|
let reasoning = "";
|
|
1230
1231
|
const reasoningDetailsByIndex = new Map();
|
|
1231
1232
|
const pendingToolCalls = [];
|
|
1232
|
-
// Tool protocol controls what goes in the API tools param vs dynamic context
|
|
1233
|
-
|
|
1233
|
+
// Tool protocol controls what goes in the API tools param vs dynamic context.
|
|
1234
|
+
// agent:tools:visible is a filter point on the assembled list — distinct from
|
|
1235
|
+
// getTools(), which other code (e.g. tool bridges) needs unfiltered.
|
|
1236
|
+
const toolView = this.bus.emitPipe("agent:tools:visible", { tools: this.getTools() }).tools;
|
|
1234
1237
|
const apiTools = this.toolProtocol.getApiTools(toolView);
|
|
1235
1238
|
const toolPrompt = this.toolProtocol.getToolPrompt(toolView);
|
|
1236
1239
|
// Dynamic context rides on the trailing message — see
|
|
@@ -1242,7 +1245,7 @@ export class AgentLoop {
|
|
|
1242
1245
|
// Let extensions transform the message array (compact, summarize, filter, etc.)
|
|
1243
1246
|
const messages = this.handlers.call("conversation:prepare", rawMessages);
|
|
1244
1247
|
// Stream filter strips tool tags from display (inline mode only)
|
|
1245
|
-
const streamFilter = this.toolProtocol.createStreamFilter(
|
|
1248
|
+
const streamFilter = this.toolProtocol.createStreamFilter(toolView.map((t) => t.name));
|
|
1246
1249
|
const requestParams = {
|
|
1247
1250
|
messages,
|
|
1248
1251
|
tools: apiTools,
|
package/dist/agent/events.d.ts
CHANGED
|
@@ -30,6 +30,10 @@ declare module "../core/event-bus.js" {
|
|
|
30
30
|
"agent:tools": {
|
|
31
31
|
tools: ToolDefinition[];
|
|
32
32
|
};
|
|
33
|
+
/** Filter point: the assembled tool list as the model will see it, after getTools(). */
|
|
34
|
+
"agent:tools:visible": {
|
|
35
|
+
tools: ToolDefinition[];
|
|
36
|
+
};
|
|
33
37
|
"agent:instructions": {
|
|
34
38
|
instructions: Array<{
|
|
35
39
|
name: string;
|
package/docs/README.md
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# agent-sh Documentation
|
|
2
|
+
|
|
3
|
+
Start with **Usage** to get running, then **Architecture** for the mental model. Everything else builds on those two.
|
|
4
|
+
|
|
5
|
+
## Guides
|
|
6
|
+
|
|
7
|
+
1. [Usage Guide](usage.md) — install, run, configure providers and models
|
|
8
|
+
2. [Architecture](architecture.md) — pure kernel + extensions, the shell ↔ agent boundary, project structure
|
|
9
|
+
3. [The Built-in Agent: ash](agent.md) — how the default backend works: query flow, tools, system prompt, model switching
|
|
10
|
+
4. [Context Management](context-management.md) — shell-output spill, three-tier conversation compaction, recall APIs
|
|
11
|
+
5. [Extensions](extensions.md) — event bus, content transforms, custom agent backends, theming
|
|
12
|
+
6. [TUI Composition](tui-composition.md) — compositor, render surfaces, stream routing
|
|
13
|
+
7. [Library Usage](library.md) — embedding agent-sh in your own apps
|
|
14
|
+
8. [Troubleshooting](troubleshooting.md) — common errors and debug mode
|
package/docs/agent.md
ADDED
|
@@ -0,0 +1,398 @@
|
|
|
1
|
+
# The Built-in Agent: ash
|
|
2
|
+
|
|
3
|
+
agent-sh is designed to be backend-agnostic. The agent that drives a query — assembling context, calling an LLM, executing tools in a loop — is a replaceable component. Any extension can register a backend via `agent:register-backend` and become the default via the `defaultBackend` setting or the `/backend` slash command. Bridge backends like `claude-code` and `pi` plug external CLI agents into the same shell and TUI surface.
|
|
4
|
+
|
|
5
|
+
This document describes **ash**, the built-in backend that ships with agent-sh. The agent host (`src/agent/index.ts`) is activated unconditionally via `activateAgent(ctx)` — it attaches the `ctx.agent` surface, registers core tools, and emits `agent:register-backend` to register `ash` with the core's backend registry. ash only *activates* (constructs its `AgentLoop` and starts handling queries) when an LLM provider has both an apiKey and a model resolved, and `activateBackend("ash")` runs. It resolves providers from registered catalogs + settings overlay, configures an `LlmClient`, and calls any OpenAI-compatible API directly. It manages conversation state and executes tools in a loop until the LLM is done.
|
|
6
|
+
|
|
7
|
+
If you're looking to write your own backend instead of reading how ash works internally, see [Extensions: Custom Agent Backends](extensions.md#custom-agent-backends).
|
|
8
|
+
|
|
9
|
+
## The Query Flow
|
|
10
|
+
|
|
11
|
+
Here's what happens when you submit a query:
|
|
12
|
+
|
|
13
|
+
```
|
|
14
|
+
User types "> fix the failing test"
|
|
15
|
+
│
|
|
16
|
+
├─ 1. Context assembly — gather recent shell commands, output, cwd
|
|
17
|
+
├─ 2. System prompt (cached per cwd) + dynamic context (rebuilt every LLM call)
|
|
18
|
+
├─ 3. LLM call — stream response from the API
|
|
19
|
+
├─ 4. Tool loop — if LLM requested tool calls:
|
|
20
|
+
│ ├─ Execute each tool (with permission check if needed)
|
|
21
|
+
│ ├─ Add results to conversation
|
|
22
|
+
│ └─ Go back to step 3 (LLM sees tool results, decides next action)
|
|
23
|
+
└─ 5. Done — no more tool calls, emit response
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
The key insight: **the agent is a loop, not a single call**. The LLM calls tools, sees results, calls more tools, until it has enough information to respond. A single query might trigger dozens of LLM calls and tool executions.
|
|
27
|
+
|
|
28
|
+
## Context Assembly
|
|
29
|
+
|
|
30
|
+
Every query draws on two distinct streams of context:
|
|
31
|
+
|
|
32
|
+
- **Shell context** — the user's terminal activity (commands + outputs) plus the live cwd. This is what lets ash understand "fix this" after you ran a failing command, and what keeps it anchored in the right working directory across compactions. The current cwd is wrapped as `<cwd>` (always) and new shell activity since the last turn as `<shell_events>` (when there is any), both nested inside the per-query `<query_context>` envelope and prepended to your user message.
|
|
33
|
+
- **Conversation state** — the OpenAI chat messages array (`user`/`assistant`/`tool` messages). This is the LLM's memory of what it already said and did within this session.
|
|
34
|
+
|
|
35
|
+
The two streams don't overlap: agent tool outputs live only in the conversation, and shell context tracks only user-initiated activity. When either stream grows large, ash has escape hatches rather than silent truncation:
|
|
36
|
+
|
|
37
|
+
- **Long shell outputs** are spilled to tempfiles (`<tmpdir>/agent-sh-<pid>/<id>.out`) at capture time. The LLM sees a head+tail stub with the path and recovers the full output via plain `read_file`.
|
|
38
|
+
- **Older conversation turns** are compacted by the built-in `rolling-history` extension: each is nucleated into a one-line summary in a persistent store (`~/.agent-sh/rolling-history/history.jsonl`), with the full message kept in an ephemeral per-session cache. The `conversation_recall` tool browses, searches, and expands those entries.
|
|
39
|
+
|
|
40
|
+
Compaction is pluggable: the `conversation:compact` handler is advisable, so extensions can install richer strategies without changing the recall surface. See [Context Management](context-management.md) for the full design.
|
|
41
|
+
|
|
42
|
+
## System Prompt
|
|
43
|
+
|
|
44
|
+
The system prompt is assembled once per `cwd` and cached (invalidated when the working directory changes), so the prefix is stable for provider-side prompt caching. It includes:
|
|
45
|
+
|
|
46
|
+
1. **Identity** — "You are an AI coding assistant running inside agent-sh..."
|
|
47
|
+
2. **Tool decision guide** — when to use which built-in tool
|
|
48
|
+
3. **Tool usage guidelines** — read before editing, prefer edit over write, use grep/glob to find files, etc.
|
|
49
|
+
4. **Project conventions** — `CLAUDE.md`/`AGENT.md` walked from cwd to root (cwd-stable; see next section)
|
|
50
|
+
5. **Skills** — discovered project/global skills (cwd-stable)
|
|
51
|
+
6. **Extension instructions** — blocks registered by extensions via `registerInstruction()` (e.g. proactive recall guidance)
|
|
52
|
+
7. **Available tools** — name + description of every registered tool
|
|
53
|
+
8. **Extension-appended content** — extensions can advise `system-prompt:build` to append additional context (instance IDs, memory files, etc.)
|
|
54
|
+
|
|
55
|
+
Per-turn signals live in two symmetric handlers, both empty by default:
|
|
56
|
+
|
|
57
|
+
- **`query-context:build`** — fires once at user-query start. Output is wrapped in `<query_context>` and frozen into the user message, so it persists in conversation history. Shell context is the canonical example (`<cwd>` always, `<shell_events>` when there is fresh activity); other "what happened between turns" signals (notifications, calendar/inbox deltas) go here too.
|
|
58
|
+
- **`dynamic-context:build`** — fires on every LLM call (each tool-loop iteration). Output is wrapped in `<dynamic_context>` and ephemerally prepended to the trailing message at request time, so the cacheable prefix stays byte-stable. Use for "current state" signals: in-flight subagents, threshold warnings, active mode markers.
|
|
59
|
+
|
|
60
|
+
Extensions populate either via `ctx.agent.registerContextProducer(name, fn, { mode: "per-query" | "per-request" })`. When no producer contributes, no envelope tag is emitted at all — vanilla sessions send exactly `[system, ...history]`.
|
|
61
|
+
|
|
62
|
+
## Project Conventions
|
|
63
|
+
|
|
64
|
+
The agent automatically loads `CLAUDE.md` or `AGENT.md` files from your working directory hierarchy. These are included in the system prompt on every query, so the agent respects project-specific conventions without being told each time.
|
|
65
|
+
|
|
66
|
+
The agent scans from your current directory upward to the filesystem root. In each directory it checks for `CLAUDE.md` first, then `AGENT.md` as a fallback (only one per directory). Files are included root-first, so more specific project conventions appear last and take precedence.
|
|
67
|
+
|
|
68
|
+
```
|
|
69
|
+
~/projects/myapp/src/ ← cwd
|
|
70
|
+
~/projects/myapp/CLAUDE.md ← included (project-level)
|
|
71
|
+
~/CLAUDE.md ← included first (global conventions)
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
Since the system prompt is rebuilt on every query, `cd`-ing to a new project picks up its conventions automatically.
|
|
75
|
+
|
|
76
|
+
This follows the same convention as Claude Code — if you already have `CLAUDE.md` files, they work out of the box.
|
|
77
|
+
|
|
78
|
+
## Skills
|
|
79
|
+
|
|
80
|
+
Skills are reusable instruction sets that the agent can load on demand. They follow the [Agent Skills standard](https://agentskills.io/specification).
|
|
81
|
+
|
|
82
|
+
### Skill format
|
|
83
|
+
|
|
84
|
+
A skill is a directory containing a `SKILL.md` file with YAML frontmatter:
|
|
85
|
+
|
|
86
|
+
```markdown
|
|
87
|
+
---
|
|
88
|
+
name: docker-deploy
|
|
89
|
+
description: Build and deploy Docker containers to production
|
|
90
|
+
---
|
|
91
|
+
|
|
92
|
+
# Docker Deploy
|
|
93
|
+
|
|
94
|
+
## Steps
|
|
95
|
+
|
|
96
|
+
1. Build the image: `docker build -t app .`
|
|
97
|
+
2. Tag for registry: `docker tag app registry.example.com/app:latest`
|
|
98
|
+
3. Push: `docker push registry.example.com/app:latest`
|
|
99
|
+
...
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
The `name` and `description` fields are required. An optional `disable-model-invocation: true` hides the skill from the agent's automatic discovery.
|
|
103
|
+
|
|
104
|
+
### Discovery
|
|
105
|
+
|
|
106
|
+
**Global skills** are discovered from `~/.agent-sh/skills/` by default. Add more locations via `skillPaths` in `~/.agent-sh/settings.json`:
|
|
107
|
+
|
|
108
|
+
```json
|
|
109
|
+
{
|
|
110
|
+
"skillPaths": ["~/.agents/skills", "~/.claude/skills"]
|
|
111
|
+
}
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
**Project skills** are discovered from `.agents/skills/` in your working directory hierarchy (up to the git root). When you `cd` into a directory with new project skills, the agent is notified with their names.
|
|
115
|
+
|
|
116
|
+
### How the agent uses skills
|
|
117
|
+
|
|
118
|
+
Only skill **metadata** (name, description, file path) is included in the system prompt — not the full skill content. This keeps the prompt small regardless of how many skills you have.
|
|
119
|
+
|
|
120
|
+
1. The system prompt lists available skills with their descriptions and paths
|
|
121
|
+
2. The agent decides which skill is relevant (no extra round-trip needed)
|
|
122
|
+
3. The agent calls `read_file` on the skill's `SKILL.md` to load full instructions when ready to use it
|
|
123
|
+
|
|
124
|
+
The `list_skills` tool is also available for broader discovery.
|
|
125
|
+
|
|
126
|
+
### Slash command
|
|
127
|
+
|
|
128
|
+
Users can force-load a skill directly:
|
|
129
|
+
|
|
130
|
+
```
|
|
131
|
+
> /skill:docker-deploy
|
|
132
|
+
> /skill:docker-deploy deploy the staging branch
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
This injects the full skill content into the conversation. Tab completion works for skill names.
|
|
136
|
+
|
|
137
|
+
## The Tool Loop
|
|
138
|
+
|
|
139
|
+
This is the core of how the agent works. After each LLM call, the agent checks if the response includes tool calls. If yes, it executes them and feeds the results back to the LLM.
|
|
140
|
+
|
|
141
|
+
```
|
|
142
|
+
LLM response
|
|
143
|
+
├─ Text only → done, emit response
|
|
144
|
+
└─ Tool calls → for each tool call:
|
|
145
|
+
├─ Look up tool in registry
|
|
146
|
+
├─ Execute via the `tool:<name>` handler chain (advisors can wrap)
|
|
147
|
+
├─ Emit tool events (tool-started, tool-output-chunk, tool-completed)
|
|
148
|
+
├─ Add tool result to conversation
|
|
149
|
+
└─ After all tools: call LLM again with updated conversation
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
The loop continues until the LLM returns a response with no tool calls. There's no hard limit on iterations — the LLM decides when it's done.
|
|
153
|
+
|
|
154
|
+
### Permission gating
|
|
155
|
+
|
|
156
|
+
The kernel has no opinion on permission. By default every tool runs (yolo
|
|
157
|
+
mode). Gating extensions register tool advisors via `ctx.agent.adviseTool(name, ...)`
|
|
158
|
+
to interpose a confirmation prompt, audit log, or policy check before calling
|
|
159
|
+
`next(args, onChunk, ctx)`. See `examples/extensions/interactive-prompts.ts`
|
|
160
|
+
for a reference implementation that gates `bash`, `pwsh`, `write_file`, and
|
|
161
|
+
`edit_file`.
|
|
162
|
+
|
|
163
|
+
## Built-in Tools
|
|
164
|
+
|
|
165
|
+
Core tools are registered when `activateAgent(ctx)` runs — *before* extensions load. This means extensions can look up or advise tools at their own activate time. Additional tools come from extensions in `~/.agent-sh/extensions/`.
|
|
166
|
+
|
|
167
|
+
### bash
|
|
168
|
+
|
|
169
|
+
The primary tool for investigation and code execution. **`bash`** runs in an **isolated subprocess** (`/bin/bash -c`). The agent uses this for reading files, running tests, checking state, and executing commands. A `cd` here doesn't affect your shell. Output is captured and returned to the LLM.
|
|
170
|
+
|
|
171
|
+
Extensions can add tools that cross the shell↔agent boundary via `shell:exec-request` — for example, running commands with lasting effects in the live PTY (`cd`, `export`, `source`). We don't include such a tool as built-in because the right behavior depends on user preference. See `examples/extensions/user-shell.ts` for a ready-made implementation to start from.
|
|
172
|
+
|
|
173
|
+
### All tools
|
|
174
|
+
|
|
175
|
+
| Tool | Purpose | Side effects |
|
|
176
|
+
|---|---|---|
|
|
177
|
+
| `bash` | Run commands in isolated subprocess | Yes |
|
|
178
|
+
| `read_file` | Read file contents (line-numbered, with offset/limit) | No |
|
|
179
|
+
| `write_file` | Create or overwrite a file | Yes |
|
|
180
|
+
| `edit_file` | Find-and-replace in a file (old_text → new_text) | Yes |
|
|
181
|
+
| `grep` | Search file contents with regex (via ripgrep) | No |
|
|
182
|
+
| `glob` | Find files by name pattern | No |
|
|
183
|
+
| `ls` | List directory contents (with timestamps and sizes) | No |
|
|
184
|
+
| `list_skills` | List available skills (name, description, path) | No |
|
|
185
|
+
| `conversation_recall`\* | Browse/search/expand evicted conversation turns from the rolling-history store | No |
|
|
186
|
+
|
|
187
|
+
\* `conversation_recall` is not a core tool — it's registered by the built-in `rolling-history` extension, so it's absent under headless/bridge backends. Every other row is a core tool registered by `activateAgent(ctx)`.
|
|
188
|
+
|
|
189
|
+
**Common pattern**: all file-based tools resolve relative paths from the current working directory, looked up via the `cwd` handler (`ctx.call("cwd")`). The shell-context built-in advises this with the PTY-tracked cwd; without it, tools fall back to `process.cwd()`.
|
|
190
|
+
|
|
191
|
+
### Tool-specific enhancements
|
|
192
|
+
|
|
193
|
+
**`grep`** supports three output modes and pagination:
|
|
194
|
+
|
|
195
|
+
- `output_mode`: `files_with_matches` (default, file paths only), `content` (matching lines with optional `context_before`/`context_after`), or `count` (match counts per file)
|
|
196
|
+
- `case_insensitive`: case-insensitive search
|
|
197
|
+
- `head_limit` / `offset`: pagination — default limits are 200 entries for `files_with_matches`, 150 for `content`/`count`. Pass `head_limit=0` for unlimited. Long lines in `content` mode are capped at 500 characters.
|
|
198
|
+
|
|
199
|
+
**`read_file`** deduplicates reads:
|
|
200
|
+
|
|
201
|
+
- Tracks file modification time. If a file hasn't changed since the last read (same offset/limit), returns a stub instead of re-reading — saves context tokens.
|
|
202
|
+
- Files over 2MB require `offset` and `limit` to prevent OOM.
|
|
203
|
+
- Cache is automatically invalidated when a file-modifying tool (`write_file`, `edit_file`) succeeds on the same path.
|
|
204
|
+
|
|
205
|
+
**`edit_file`** provides diagnostic hints:
|
|
206
|
+
|
|
207
|
+
- When `old_text` isn't found, the tool searches for the closest match and suggests fixes (e.g. whitespace differences, wrong line location).
|
|
208
|
+
|
|
209
|
+
**`glob`** returns results sorted by modification time (newest first), capped at 200 files.
|
|
210
|
+
|
|
211
|
+
**`ls`** returns formatted output with timestamps (YYYY-MM-DD HH:MM) and human-readable file sizes.
|
|
212
|
+
|
|
213
|
+
### Tool batching and parallel execution
|
|
214
|
+
|
|
215
|
+
When the LLM requests multiple tool calls in a single response, the agent groups and executes them efficiently:
|
|
216
|
+
|
|
217
|
+
1. **Batch event** — before execution, the agent emits `agent:tool-batch` with tools grouped by kind (`read`, `search`, `execute`, etc.). The TUI uses this to render group headers with tree-style connectors.
|
|
218
|
+
|
|
219
|
+
2. **Parallel execution** — side-effect-free tools (`modifiesFiles` unset) run in parallel via `Promise.all`. Side-effecting tools run sequentially.
|
|
220
|
+
|
|
221
|
+
3. **Output truncation** — tool results over 16KB (~4K tokens) are head+tail truncated before being added to the conversation, preventing a single tool call from blowing through the context window.
|
|
222
|
+
|
|
223
|
+
### Structured result display
|
|
224
|
+
|
|
225
|
+
Tools can provide structured result information for the TUI via two optional methods on `ToolDefinition`:
|
|
226
|
+
|
|
227
|
+
- **`formatCall(args)`** — returns a short display string when the tool is called (e.g. the file path or search pattern). Shown in the TUI next to the tool icon.
|
|
228
|
+
- **`formatResult(args, result)`** — returns a `ToolResultDisplay` with an optional `summary` string (e.g. "42 files", "cached") and an optional structured `body` for richer rendering (diffs, line lists). The TUI's `render:result-body` handler renders the body — extensions can advise it.
|
|
229
|
+
|
|
230
|
+
### Retry and error handling
|
|
231
|
+
|
|
232
|
+
The agent retries transient failures with exponential backoff:
|
|
233
|
+
|
|
234
|
+
- **Context overflow** — compacts the conversation and retries immediately
|
|
235
|
+
- **Rate limits (429)** — respects `Retry-After` header, otherwise backs off exponentially
|
|
236
|
+
- **Transient errors (500/502/503, network)** — exponential backoff (1s, 2s, 4s..., capped at 30s), up to 3 retries
|
|
237
|
+
- **Non-retryable errors** — reported with provider-aware context (model name, endpoint, actionable hints)
|
|
238
|
+
|
|
239
|
+
### Thinking levels
|
|
240
|
+
|
|
241
|
+
The agent supports configurable thinking/reasoning levels for models that support `reasoning_effort`:
|
|
242
|
+
|
|
243
|
+
- Levels: `off` (default), `low`, `medium`, `high`, `xhigh` (`xhigh` falls back to `high` on providers that don't support it)
|
|
244
|
+
- Set via the `config:set-thinking` event (wired to `/thinking` slash command)
|
|
245
|
+
- Query current state via `config:get-thinking` pipe
|
|
246
|
+
- The agent validates that the current model/provider supports reasoning before enabling
|
|
247
|
+
|
|
248
|
+
### Echoing reasoning back to the model
|
|
249
|
+
|
|
250
|
+
DeepSeek-family models require their previous-turn `reasoning_content` / `reasoning_details` to be echoed back on the next assistant message. Most other reasoning models do **not** want that — feeding prior chain-of-thought back through lenient OpenAI-compatible shims can register as out-of-distribution input and degrade quality.
|
|
251
|
+
|
|
252
|
+
agent-sh gates this behavior per-model via the `Model.echoReasoning` flag (default `false`). Reasoning extras are only attached to assistant messages when the active model opts in.
|
|
253
|
+
|
|
254
|
+
For OpenRouter, the flag is set automatically: model ids matching the built-in pattern `/deepseek/i` (V3, V3.2, V4, rebadges) get `echoReasoning: true`. You can extend or override this in `settings.json`:
|
|
255
|
+
|
|
256
|
+
```json
|
|
257
|
+
{
|
|
258
|
+
"providers": {
|
|
259
|
+
"openrouter": {
|
|
260
|
+
"echoReasoningPatterns": ["my-custom-deepseek-fork"],
|
|
261
|
+
"models": [
|
|
262
|
+
{ "id": "deepseek/deepseek-v3.2", "echoReasoning": false },
|
|
263
|
+
{ "id": "openai/gpt-5.5", "reasoning": true }
|
|
264
|
+
]
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
- `echoReasoningPatterns` — regex **source strings** (no `/.../` delimiters), each compiled with the case-insensitive flag and tested against the model id. Examples: `"deepseek"` (substring match), `"^vendor/.*-r1$"` (anchored). Merged with the built-in `deepseek` pattern.
|
|
271
|
+
- Per-model `echoReasoning` — explicit boolean override that always wins over patterns.
|
|
272
|
+
- Invalid regexes are silently skipped, so a typo can't break provider registration.
|
|
273
|
+
|
|
274
|
+
### Tool interface
|
|
275
|
+
|
|
276
|
+
Every tool implements this interface:
|
|
277
|
+
|
|
278
|
+
```typescript
|
|
279
|
+
interface ToolDefinition {
|
|
280
|
+
name: string;
|
|
281
|
+
displayName?: string; // short label for TUI (defaults to name)
|
|
282
|
+
description: string;
|
|
283
|
+
input_schema: Record<string, unknown>; // JSON Schema for parameters
|
|
284
|
+
|
|
285
|
+
execute(
|
|
286
|
+
args: Record<string, unknown>,
|
|
287
|
+
onChunk?: (chunk: string) => void, // optional streaming callback
|
|
288
|
+
): Promise<ToolResult>;
|
|
289
|
+
|
|
290
|
+
modifiesFiles?: boolean; // has side effects (skips caching + parallel execution)
|
|
291
|
+
readOnly?: boolean; // safe to run without nuclear-form gating (registry tags accordingly)
|
|
292
|
+
showOutput?: boolean; // stream output to TUI (default: true)
|
|
293
|
+
|
|
294
|
+
// Display hooks (all optional)
|
|
295
|
+
getDisplayInfo?: (args) => ToolDisplayInfo; // icon, kind, file locations
|
|
296
|
+
formatCall?: (args) => string; // short call summary for TUI
|
|
297
|
+
formatResult?: (args, result) => ToolResultDisplay; // structured result
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
interface ToolResult {
|
|
301
|
+
content: string; // text returned to the LLM
|
|
302
|
+
exitCode: number | null;
|
|
303
|
+
isError: boolean;
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
interface ToolResultDisplay {
|
|
307
|
+
summary?: string; // one-line (e.g. "42 files", "+3/-1")
|
|
308
|
+
body?: ToolResultBody; // structured content for richer rendering
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
type ToolResultBody =
|
|
312
|
+
| { kind: "diff"; diff: unknown; filePath: string }
|
|
313
|
+
| { kind: "lines"; lines: string[]; maxLines?: number }
|
|
314
|
+
|
|
315
|
+
interface ToolDisplayInfo {
|
|
316
|
+
kind: "read" | "write" | "execute" | "search";
|
|
317
|
+
locations?: { path: string; line?: number | null }[];
|
|
318
|
+
icon?: string; // custom icon (e.g. "◆", "⌕")
|
|
319
|
+
}
|
|
320
|
+
```
|
|
321
|
+
|
|
322
|
+
The `onChunk` callback enables streaming tool output to the TUI in real-time (used by `bash`). Tools that don't stream (like `read_file`) just return the final result. Extensions can wrap `onChunk` via the `tool:execute` handler to intercept or transform streamed output (e.g. secret redaction).
|
|
323
|
+
|
|
324
|
+
## Streaming
|
|
325
|
+
|
|
326
|
+
Response streaming has two phases:
|
|
327
|
+
|
|
328
|
+
**Phase 1 — LLM stream**: The agent iterates chunks from the OpenAI streaming API. Each chunk can contain:
|
|
329
|
+
- `delta.content` — response text
|
|
330
|
+
- `delta.tool_calls` — tool call arguments (streamed incrementally, parsed by index)
|
|
331
|
+
- `delta.reasoning_content` — thinking/reasoning tokens (non-standard, used by models like DeepSeek-r1)
|
|
332
|
+
|
|
333
|
+
**Phase 2 — Content transform pipeline**: Text chunks are emitted via `bus.emitTransform("agent:response-chunk", { blocks })`. This runs the content through the extension transform pipeline (parsers, post-transforms) before the renderer sees it. See [Extensions: Content Transform Pipeline](extensions.md#content-transform-pipeline).
|
|
334
|
+
|
|
335
|
+
The agent accumulates the full response text separately for the final `agent:response-done` event.
|
|
336
|
+
|
|
337
|
+
## Conversation State
|
|
338
|
+
|
|
339
|
+
The conversation state is an OpenAI-compatible chat messages array. Each query adds messages:
|
|
340
|
+
|
|
341
|
+
```
|
|
342
|
+
User submits query → { role: "user", content: "fix the test" }
|
|
343
|
+
LLM responds with text → { role: "assistant", content: "I'll look at..." }
|
|
344
|
+
LLM requests tool call → { role: "assistant", tool_calls: [...] }
|
|
345
|
+
Tool returns result → { role: "tool", tool_call_id: "...", content: "..." }
|
|
346
|
+
```
|
|
347
|
+
|
|
348
|
+
This array grows with every turn. To prevent context overflow, ash auto-compacts when estimated prompt tokens cross `autoCompactThreshold` (default 0.5) of the model's usable context window.
|
|
349
|
+
|
|
350
|
+
### Auto-compaction
|
|
351
|
+
|
|
352
|
+
Before each LLM call, ash estimates the total prompt tokens. If it's over the threshold, it invokes the `conversation:compact` handler to free space, then proceeds. If the API still returns a context-overflow error, ash compacts more aggressively and retries once; if compaction frees nothing, it aborts rather than looping.
|
|
353
|
+
|
|
354
|
+
The default compaction strategy evicts older turns into the nuclear archive and leaves a bridge note; `conversation_recall` can bring them back on demand. See [Context Management](context-management.md#conversation-compaction) for the three-tier design and how to swap the strategy.
|
|
355
|
+
|
|
356
|
+
The user can also trigger compaction manually with `/compact`.
|
|
357
|
+
|
|
358
|
+
**Note**: reasoning/thinking tokens from the LLM stream are emitted as `agent:thinking-chunk` events for display but are **not stored in conversation state**. They're ephemeral — the LLM doesn't see its own reasoning on the next turn.
|
|
359
|
+
|
|
360
|
+
## Provider Profiles & Model Switching
|
|
361
|
+
|
|
362
|
+
ash supports multiple models and providers, switchable at runtime.
|
|
363
|
+
|
|
364
|
+
### Models
|
|
365
|
+
|
|
366
|
+
Each entry is a `(provider, model)` target — a serializable identity plus capabilities:
|
|
367
|
+
|
|
368
|
+
```typescript
|
|
369
|
+
interface Model {
|
|
370
|
+
id: string; // model id, e.g. "openai/gpt-5"
|
|
371
|
+
provider: string; // identity is the (provider, id) pair
|
|
372
|
+
contextWindow?: number; // per-model override for the auto-compact threshold
|
|
373
|
+
maxTokens?: number;
|
|
374
|
+
reasoning?: boolean;
|
|
375
|
+
supportsReasoningEffort?: boolean;
|
|
376
|
+
echoReasoning?: boolean;
|
|
377
|
+
modalities?: ("text" | "image")[];
|
|
378
|
+
}
|
|
379
|
+
```
|
|
380
|
+
|
|
381
|
+
The credentials and provider-shape transforms needed to actually call a model — `apiKey`/`baseURL` plus the reasoning/cache encoders — live in a separate `ModelEndpoint`, resolved internally by `(provider, id)`. It never travels on a frontend-facing event, so `Model` stays secret-free and serializable.
|
|
382
|
+
|
|
383
|
+
`agent:get-models` returns the catalog; `agent:models-changed` fires when it changes. When all models share the same provider, switching just changes the model. When they span providers (e.g. OpenAI + Anthropic via OpenRouter), switching also reconfigures the LLM client with the new endpoint's credentials and base URL.
|
|
384
|
+
|
|
385
|
+
### Switching
|
|
386
|
+
|
|
387
|
+
- **`/model`** — show the current model
|
|
388
|
+
- **`/model <name>`** — switch to a specific model (may cross providers; credentials and base URL are reconfigured automatically)
|
|
389
|
+
|
|
390
|
+
The current model is shown in the TUI prompt. Switching mid-conversation preserves the conversation state — only the LLM endpoint changes.
|
|
391
|
+
|
|
392
|
+
To swap the backend itself (e.g. to `claude-code` or `pi`), use `/backend <name>` or set `defaultBackend` in settings.
|
|
393
|
+
|
|
394
|
+
## Extension Tools
|
|
395
|
+
|
|
396
|
+
Extensions can register custom tools via `ctx.agent.registerTool()`. These appear alongside built-in tools and follow the same `ToolDefinition` interface. Only works with the built-in `ash` backend — bridge backends manage their own tools.
|
|
397
|
+
|
|
398
|
+
See [Extensions: ExtensionContext API](extensions.md#extensioncontext-api) for the interface and [Extensions: Custom Agent Backends](extensions.md#custom-agent-backends) for writing backend extensions.
|