agent-sh 0.15.0 → 0.15.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. package/docs/README.md +14 -0
  2. package/docs/agent.md +398 -0
  3. package/docs/architecture.md +196 -0
  4. package/docs/context-management.md +200 -0
  5. package/docs/extensions.md +951 -0
  6. package/docs/library.md +84 -0
  7. package/docs/troubleshooting.md +65 -0
  8. package/docs/tui-composition.md +294 -0
  9. package/docs/usage.md +306 -0
  10. package/examples/extensions/ash-scheme/package.json +1 -1
  11. package/examples/extensions/ashi/EXTENDING.md +2 -2
  12. package/examples/extensions/ashi/README.md +2 -2
  13. package/examples/extensions/ashi/docs/ui-surface-protocol.md +1 -1
  14. package/examples/extensions/ashi/package.json +5 -3
  15. package/examples/extensions/ashi/src/cli.ts +6 -5
  16. package/examples/extensions/ashi/src/renderer.ts +22 -2
  17. package/examples/extensions/ashi/src/renderers/pi-tui/tool-group.ts +5 -8
  18. package/examples/extensions/ashi-ink/package.json +2 -2
  19. package/examples/extensions/claude-code-bridge/package.json +1 -1
  20. package/examples/extensions/opencode-bridge/package.json +1 -1
  21. package/package.json +3 -1
  22. package/src/agent/agent-loop.ts +1563 -0
  23. package/src/agent/entry-format.ts +19 -0
  24. package/src/agent/events.ts +151 -0
  25. package/src/agent/extensions/rolling-history/constants.ts +1 -0
  26. package/src/agent/extensions/rolling-history/index.ts +202 -0
  27. package/src/agent/extensions/rolling-history/recall.ts +131 -0
  28. package/src/agent/extensions/rolling-history/strategy.ts +404 -0
  29. package/src/agent/host-types.ts +192 -0
  30. package/src/agent/index.ts +591 -0
  31. package/src/agent/live-view.ts +279 -0
  32. package/src/agent/llm-client.ts +111 -0
  33. package/src/agent/llm-facade.ts +43 -0
  34. package/src/agent/normalize-args.ts +61 -0
  35. package/src/agent/nuclear-form.ts +382 -0
  36. package/src/agent/providers/deepseek.ts +39 -0
  37. package/src/agent/providers/ollama.ts +92 -0
  38. package/src/agent/providers/openai-compatible.ts +36 -0
  39. package/src/agent/providers/openai.ts +52 -0
  40. package/src/agent/providers/opencode.ts +142 -0
  41. package/src/agent/providers/openrouter.ts +105 -0
  42. package/src/agent/providers/zai-coding-plan.ts +33 -0
  43. package/src/agent/session-store.ts +336 -0
  44. package/src/agent/skills.ts +228 -0
  45. package/src/agent/store.ts +310 -0
  46. package/src/agent/subagent.ts +305 -0
  47. package/src/agent/system-prompt.ts +151 -0
  48. package/src/agent/token-budget.ts +12 -0
  49. package/src/agent/tool-protocol.ts +722 -0
  50. package/src/agent/tool-registry.ts +66 -0
  51. package/src/agent/tools/bash.ts +95 -0
  52. package/src/agent/tools/edit-file.ts +154 -0
  53. package/src/agent/tools/expand-home.ts +7 -0
  54. package/src/agent/tools/glob.ts +108 -0
  55. package/src/agent/tools/grep.ts +228 -0
  56. package/src/agent/tools/list-skills.ts +37 -0
  57. package/src/agent/tools/ls.ts +81 -0
  58. package/src/agent/tools/pwsh.ts +140 -0
  59. package/src/agent/tools/read-file.ts +164 -0
  60. package/src/agent/tools/write-file.ts +72 -0
  61. package/src/agent/types.ts +149 -0
  62. package/src/cli/args.ts +91 -0
  63. package/src/cli/auth/cli.ts +244 -0
  64. package/src/cli/auth/discover.ts +52 -0
  65. package/src/cli/auth/keys.ts +143 -0
  66. package/src/cli/index.ts +295 -0
  67. package/src/cli/init.ts +74 -0
  68. package/src/cli/install.ts +439 -0
  69. package/src/cli/shell-env.ts +68 -0
  70. package/src/cli/subcommands.ts +24 -0
  71. package/src/core/event-bus.ts +252 -0
  72. package/src/core/extension-loader.ts +347 -0
  73. package/src/core/index.ts +152 -0
  74. package/src/core/settings.ts +398 -0
  75. package/src/core/types.ts +61 -0
  76. package/src/extensions/file-autocomplete.ts +71 -0
  77. package/src/extensions/index.ts +38 -0
  78. package/src/extensions/slash-commands/events.ts +14 -0
  79. package/src/extensions/slash-commands/index.ts +269 -0
  80. package/src/shell/events.ts +73 -0
  81. package/src/shell/host-types.ts +150 -0
  82. package/src/shell/index.ts +159 -0
  83. package/src/shell/input-handler.ts +505 -0
  84. package/src/shell/output-parser.ts +156 -0
  85. package/src/shell/shell-context.ts +193 -0
  86. package/src/shell/shell.ts +414 -0
  87. package/src/shell/strategies/bash.ts +83 -0
  88. package/src/shell/strategies/fish.ts +77 -0
  89. package/src/shell/strategies/index.ts +24 -0
  90. package/src/shell/strategies/types.ts +64 -0
  91. package/src/shell/strategies/zsh.ts +92 -0
  92. package/src/shell/terminal.ts +124 -0
  93. package/src/shell/tui-input-view.ts +222 -0
  94. package/src/shell/tui-renderer.ts +1126 -0
  95. package/src/utils/ansi.ts +140 -0
  96. package/src/utils/box-frame.ts +138 -0
  97. package/src/utils/compositor.ts +157 -0
  98. package/src/utils/diff-renderer.ts +829 -0
  99. package/src/utils/diff.ts +244 -0
  100. package/src/utils/executor.ts +305 -0
  101. package/src/utils/file-watcher.ts +110 -0
  102. package/src/utils/floating-panel.ts +1160 -0
  103. package/src/utils/handler-registry.ts +110 -0
  104. package/src/utils/line-editor.ts +636 -0
  105. package/src/utils/markdown.ts +437 -0
  106. package/src/utils/message-utils.ts +113 -0
  107. package/src/utils/package-version.ts +12 -0
  108. package/src/utils/palette.ts +64 -0
  109. package/src/utils/ref-counter.ts +9 -0
  110. package/src/utils/ripgrep-path.ts +17 -0
  111. package/src/utils/shell-output-spill.ts +76 -0
  112. package/src/utils/stream-transform.ts +292 -0
  113. package/src/utils/terminal-buffer.ts +213 -0
  114. package/src/utils/tool-display.ts +315 -0
  115. package/src/utils/tool-interactive.ts +71 -0
  116. package/src/utils/tty.ts +14 -0
package/docs/README.md ADDED
@@ -0,0 +1,14 @@
1
+ # agent-sh Documentation
2
+
3
+ Start with **Usage** to get running, then **Architecture** for the mental model. Everything else builds on those two.
4
+
5
+ ## Guides
6
+
7
+ 1. [Usage Guide](usage.md) — install, run, configure providers and models
8
+ 2. [Architecture](architecture.md) — pure kernel + extensions, the shell ↔ agent boundary, project structure
9
+ 3. [The Built-in Agent: ash](agent.md) — how the default backend works: query flow, tools, system prompt, model switching
10
+ 4. [Context Management](context-management.md) — shell-output spill, three-tier conversation compaction, recall APIs
11
+ 5. [Extensions](extensions.md) — event bus, content transforms, custom agent backends, theming
12
+ 6. [TUI Composition](tui-composition.md) — compositor, render surfaces, stream routing
13
+ 7. [Library Usage](library.md) — embedding agent-sh in your own apps
14
+ 8. [Troubleshooting](troubleshooting.md) — common errors and debug mode
package/docs/agent.md ADDED
@@ -0,0 +1,398 @@
1
+ # The Built-in Agent: ash
2
+
3
+ agent-sh is designed to be backend-agnostic. The agent that drives a query — assembling context, calling an LLM, executing tools in a loop — is a replaceable component. Any extension can register a backend via `agent:register-backend` and become the default via the `defaultBackend` setting or the `/backend` slash command. Bridge backends like `claude-code` and `pi` plug external CLI agents into the same shell and TUI surface.
4
+
5
+ This document describes **ash**, the built-in backend that ships with agent-sh. The agent host (`src/agent/index.ts`) is activated unconditionally via `activateAgent(ctx)` — it attaches the `ctx.agent` surface, registers core tools, and emits `agent:register-backend` to register `ash` with the core's backend registry. ash only *activates* (constructs its `AgentLoop` and starts handling queries) when an LLM provider has both an apiKey and a model resolved, and `activateBackend("ash")` runs. It resolves providers from registered catalogs + settings overlay, configures an `LlmClient`, and calls any OpenAI-compatible API directly. It manages conversation state and executes tools in a loop until the LLM is done.
6
+
7
+ If you're looking to write your own backend instead of reading how ash works internally, see [Extensions: Custom Agent Backends](extensions.md#custom-agent-backends).
8
+
9
+ ## The Query Flow
10
+
11
+ Here's what happens when you submit a query:
12
+
13
+ ```
14
+ User types "> fix the failing test"
15
+
16
+ ├─ 1. Context assembly — gather recent shell commands, output, cwd
17
+ ├─ 2. System prompt (cached per cwd) + dynamic context (rebuilt every LLM call)
18
+ ├─ 3. LLM call — stream response from the API
19
+ ├─ 4. Tool loop — if LLM requested tool calls:
20
+ │ ├─ Execute each tool (with permission check if needed)
21
+ │ ├─ Add results to conversation
22
+ │ └─ Go back to step 3 (LLM sees tool results, decides next action)
23
+ └─ 5. Done — no more tool calls, emit response
24
+ ```
25
+
26
+ The key insight: **the agent is a loop, not a single call**. The LLM calls tools, sees results, calls more tools, until it has enough information to respond. A single query might trigger dozens of LLM calls and tool executions.
27
+
28
+ ## Context Assembly
29
+
30
+ Every query draws on two distinct streams of context:
31
+
32
+ - **Shell context** — the user's terminal activity (commands + outputs) plus the live cwd. This is what lets ash understand "fix this" after you ran a failing command, and what keeps it anchored in the right working directory across compactions. The current cwd is wrapped as `<cwd>` (always) and new shell activity since the last turn as `<shell_events>` (when there is any), both nested inside the per-query `<query_context>` envelope and prepended to your user message.
33
+ - **Conversation state** — the OpenAI chat messages array (`user`/`assistant`/`tool` messages). This is the LLM's memory of what it already said and did within this session.
34
+
35
+ The two streams don't overlap: agent tool outputs live only in the conversation, and shell context tracks only user-initiated activity. When either stream grows large, ash has escape hatches rather than silent truncation:
36
+
37
+ - **Long shell outputs** are spilled to tempfiles (`<tmpdir>/agent-sh-<pid>/<id>.out`) at capture time. The LLM sees a head+tail stub with the path and recovers the full output via plain `read_file`.
38
+ - **Older conversation turns** are compacted by the built-in `rolling-history` extension: each is nucleated into a one-line summary in a persistent store (`~/.agent-sh/rolling-history/history.jsonl`), with the full message kept in an ephemeral per-session cache. The `conversation_recall` tool browses, searches, and expands those entries.
39
+
40
+ Compaction is pluggable: the `conversation:compact` handler is advisable, so extensions can install richer strategies without changing the recall surface. See [Context Management](context-management.md) for the full design.
41
+
42
+ ## System Prompt
43
+
44
+ The system prompt is assembled once per `cwd` and cached (invalidated when the working directory changes), so the prefix is stable for provider-side prompt caching. It includes:
45
+
46
+ 1. **Identity** — "You are an AI coding assistant running inside agent-sh..."
47
+ 2. **Tool decision guide** — when to use which built-in tool
48
+ 3. **Tool usage guidelines** — read before editing, prefer edit over write, use grep/glob to find files, etc.
49
+ 4. **Project conventions** — `CLAUDE.md`/`AGENT.md` walked from cwd to root (cwd-stable; see next section)
50
+ 5. **Skills** — discovered project/global skills (cwd-stable)
51
+ 6. **Extension instructions** — blocks registered by extensions via `registerInstruction()` (e.g. proactive recall guidance)
52
+ 7. **Available tools** — name + description of every registered tool
53
+ 8. **Extension-appended content** — extensions can advise `system-prompt:build` to append additional context (instance IDs, memory files, etc.)
54
+
55
+ Per-turn signals live in two symmetric handlers, both empty by default:
56
+
57
+ - **`query-context:build`** — fires once at user-query start. Output is wrapped in `<query_context>` and frozen into the user message, so it persists in conversation history. Shell context is the canonical example (`<cwd>` always, `<shell_events>` when there is fresh activity); other "what happened between turns" signals (notifications, calendar/inbox deltas) go here too.
58
+ - **`dynamic-context:build`** — fires on every LLM call (each tool-loop iteration). Output is wrapped in `<dynamic_context>` and ephemerally prepended to the trailing message at request time, so the cacheable prefix stays byte-stable. Use for "current state" signals: in-flight subagents, threshold warnings, active mode markers.
59
+
60
+ Extensions populate either via `ctx.agent.registerContextProducer(name, fn, { mode: "per-query" | "per-request" })`. When no producer contributes, no envelope tag is emitted at all — vanilla sessions send exactly `[system, ...history]`.
61
+
62
+ ## Project Conventions
63
+
64
+ The agent automatically loads `CLAUDE.md` or `AGENT.md` files from your working directory hierarchy. These are included in the system prompt on every query, so the agent respects project-specific conventions without being told each time.
65
+
66
+ The agent scans from your current directory upward to the filesystem root. In each directory it checks for `CLAUDE.md` first, then `AGENT.md` as a fallback (only one per directory). Files are included root-first, so more specific project conventions appear last and take precedence.
67
+
68
+ ```
69
+ ~/projects/myapp/src/ ← cwd
70
+ ~/projects/myapp/CLAUDE.md ← included (project-level)
71
+ ~/CLAUDE.md ← included first (global conventions)
72
+ ```
73
+
74
+ Since the system prompt is rebuilt on every query, `cd`-ing to a new project picks up its conventions automatically.
75
+
76
+ This follows the same convention as Claude Code — if you already have `CLAUDE.md` files, they work out of the box.
77
+
78
+ ## Skills
79
+
80
+ Skills are reusable instruction sets that the agent can load on demand. They follow the [Agent Skills standard](https://agentskills.io/specification).
81
+
82
+ ### Skill format
83
+
84
+ A skill is a directory containing a `SKILL.md` file with YAML frontmatter:
85
+
86
+ ```markdown
87
+ ---
88
+ name: docker-deploy
89
+ description: Build and deploy Docker containers to production
90
+ ---
91
+
92
+ # Docker Deploy
93
+
94
+ ## Steps
95
+
96
+ 1. Build the image: `docker build -t app .`
97
+ 2. Tag for registry: `docker tag app registry.example.com/app:latest`
98
+ 3. Push: `docker push registry.example.com/app:latest`
99
+ ...
100
+ ```
101
+
102
+ The `name` and `description` fields are required. An optional `disable-model-invocation: true` hides the skill from the agent's automatic discovery.
103
+
104
+ ### Discovery
105
+
106
+ **Global skills** are discovered from `~/.agent-sh/skills/` by default. Add more locations via `skillPaths` in `~/.agent-sh/settings.json`:
107
+
108
+ ```json
109
+ {
110
+ "skillPaths": ["~/.agents/skills", "~/.claude/skills"]
111
+ }
112
+ ```
113
+
114
+ **Project skills** are discovered from `.agents/skills/` in your working directory hierarchy (up to the git root). When you `cd` into a directory with new project skills, the agent is notified with their names.
115
+
116
+ ### How the agent uses skills
117
+
118
+ Only skill **metadata** (name, description, file path) is included in the system prompt — not the full skill content. This keeps the prompt small regardless of how many skills you have.
119
+
120
+ 1. The system prompt lists available skills with their descriptions and paths
121
+ 2. The agent decides which skill is relevant (no extra round-trip needed)
122
+ 3. The agent calls `read_file` on the skill's `SKILL.md` to load full instructions when ready to use it
123
+
124
+ The `list_skills` tool is also available for broader discovery.
125
+
126
+ ### Slash command
127
+
128
+ Users can force-load a skill directly:
129
+
130
+ ```
131
+ > /skill:docker-deploy
132
+ > /skill:docker-deploy deploy the staging branch
133
+ ```
134
+
135
+ This injects the full skill content into the conversation. Tab completion works for skill names.
136
+
137
+ ## The Tool Loop
138
+
139
+ This is the core of how the agent works. After each LLM call, the agent checks if the response includes tool calls. If yes, it executes them and feeds the results back to the LLM.
140
+
141
+ ```
142
+ LLM response
143
+ ├─ Text only → done, emit response
144
+ └─ Tool calls → for each tool call:
145
+ ├─ Look up tool in registry
146
+ ├─ Execute via the `tool:<name>` handler chain (advisors can wrap)
147
+ ├─ Emit tool events (tool-started, tool-output-chunk, tool-completed)
148
+ ├─ Add tool result to conversation
149
+ └─ After all tools: call LLM again with updated conversation
150
+ ```
151
+
152
+ The loop continues until the LLM returns a response with no tool calls. There's no hard limit on iterations — the LLM decides when it's done.
153
+
154
+ ### Permission gating
155
+
156
+ The kernel has no opinion on permission. By default every tool runs (yolo
157
+ mode). Gating extensions register tool advisors via `ctx.agent.adviseTool(name, ...)`
158
+ to interpose a confirmation prompt, audit log, or policy check before calling
159
+ `next(args, onChunk, ctx)`. See `examples/extensions/interactive-prompts.ts`
160
+ for a reference implementation that gates `bash`, `pwsh`, `write_file`, and
161
+ `edit_file`.
162
+
163
+ ## Built-in Tools
164
+
165
+ Core tools are registered when `activateAgent(ctx)` runs — *before* extensions load. This means extensions can look up or advise tools at their own activate time. Additional tools come from extensions in `~/.agent-sh/extensions/`.
166
+
167
+ ### bash
168
+
169
+ The primary tool for investigation and code execution. **`bash`** runs in an **isolated subprocess** (`/bin/bash -c`). The agent uses this for reading files, running tests, checking state, and executing commands. A `cd` here doesn't affect your shell. Output is captured and returned to the LLM.
170
+
171
+ Extensions can add tools that cross the shell↔agent boundary via `shell:exec-request` — for example, running commands with lasting effects in the live PTY (`cd`, `export`, `source`). We don't include such a tool as built-in because the right behavior depends on user preference. See `examples/extensions/user-shell.ts` for a ready-made implementation to start from.
172
+
173
+ ### All tools
174
+
175
+ | Tool | Purpose | Side effects |
176
+ |---|---|---|
177
+ | `bash` | Run commands in isolated subprocess | Yes |
178
+ | `read_file` | Read file contents (line-numbered, with offset/limit) | No |
179
+ | `write_file` | Create or overwrite a file | Yes |
180
+ | `edit_file` | Find-and-replace in a file (old_text → new_text) | Yes |
181
+ | `grep` | Search file contents with regex (via ripgrep) | No |
182
+ | `glob` | Find files by name pattern | No |
183
+ | `ls` | List directory contents (with timestamps and sizes) | No |
184
+ | `list_skills` | List available skills (name, description, path) | No |
185
+ | `conversation_recall`\* | Browse/search/expand evicted conversation turns from the rolling-history store | No |
186
+
187
+ \* `conversation_recall` is not a core tool — it's registered by the built-in `rolling-history` extension, so it's absent under headless/bridge backends. Every other row is a core tool registered by `activateAgent(ctx)`.
188
+
189
+ **Common pattern**: all file-based tools resolve relative paths from the current working directory, looked up via the `cwd` handler (`ctx.call("cwd")`). The shell-context built-in advises this with the PTY-tracked cwd; without it, tools fall back to `process.cwd()`.
190
+
191
+ ### Tool-specific enhancements
192
+
193
+ **`grep`** supports three output modes and pagination:
194
+
195
+ - `output_mode`: `files_with_matches` (default, file paths only), `content` (matching lines with optional `context_before`/`context_after`), or `count` (match counts per file)
196
+ - `case_insensitive`: case-insensitive search
197
+ - `head_limit` / `offset`: pagination — default limits are 200 entries for `files_with_matches`, 150 for `content`/`count`. Pass `head_limit=0` for unlimited. Long lines in `content` mode are capped at 500 characters.
198
+
199
+ **`read_file`** deduplicates reads:
200
+
201
+ - Tracks file modification time. If a file hasn't changed since the last read (same offset/limit), returns a stub instead of re-reading — saves context tokens.
202
+ - Files over 2MB require `offset` and `limit` to prevent OOM.
203
+ - Cache is automatically invalidated when a file-modifying tool (`write_file`, `edit_file`) succeeds on the same path.
204
+
205
+ **`edit_file`** provides diagnostic hints:
206
+
207
+ - When `old_text` isn't found, the tool searches for the closest match and suggests fixes (e.g. whitespace differences, wrong line location).
208
+
209
+ **`glob`** returns results sorted by modification time (newest first), capped at 200 files.
210
+
211
+ **`ls`** returns formatted output with timestamps (YYYY-MM-DD HH:MM) and human-readable file sizes.
212
+
213
+ ### Tool batching and parallel execution
214
+
215
+ When the LLM requests multiple tool calls in a single response, the agent groups and executes them efficiently:
216
+
217
+ 1. **Batch event** — before execution, the agent emits `agent:tool-batch` with tools grouped by kind (`read`, `search`, `execute`, etc.). The TUI uses this to render group headers with tree-style connectors.
218
+
219
+ 2. **Parallel execution** — side-effect-free tools (`modifiesFiles` unset) run in parallel via `Promise.all`. Side-effecting tools run sequentially.
220
+
221
+ 3. **Output truncation** — tool results over 16KB (~4K tokens) are head+tail truncated before being added to the conversation, preventing a single tool call from blowing through the context window.
222
+
223
+ ### Structured result display
224
+
225
+ Tools can provide structured result information for the TUI via two optional methods on `ToolDefinition`:
226
+
227
+ - **`formatCall(args)`** — returns a short display string when the tool is called (e.g. the file path or search pattern). Shown in the TUI next to the tool icon.
228
+ - **`formatResult(args, result)`** — returns a `ToolResultDisplay` with an optional `summary` string (e.g. "42 files", "cached") and an optional structured `body` for richer rendering (diffs, line lists). The TUI's `render:result-body` handler renders the body — extensions can advise it.
229
+
230
+ ### Retry and error handling
231
+
232
+ The agent retries transient failures with exponential backoff:
233
+
234
+ - **Context overflow** — compacts the conversation and retries immediately
235
+ - **Rate limits (429)** — respects `Retry-After` header, otherwise backs off exponentially
236
+ - **Transient errors (500/502/503, network)** — exponential backoff (1s, 2s, 4s..., capped at 30s), up to 3 retries
237
+ - **Non-retryable errors** — reported with provider-aware context (model name, endpoint, actionable hints)
238
+
239
+ ### Thinking levels
240
+
241
+ The agent supports configurable thinking/reasoning levels for models that support `reasoning_effort`:
242
+
243
+ - Levels: `off` (default), `low`, `medium`, `high`, `xhigh` (`xhigh` falls back to `high` on providers that don't support it)
244
+ - Set via the `config:set-thinking` event (wired to `/thinking` slash command)
245
+ - Query current state via `config:get-thinking` pipe
246
+ - The agent validates that the current model/provider supports reasoning before enabling
247
+
248
+ ### Echoing reasoning back to the model
249
+
250
+ DeepSeek-family models require their previous-turn `reasoning_content` / `reasoning_details` to be echoed back on the next assistant message. Most other reasoning models do **not** want that — feeding prior chain-of-thought back through lenient OpenAI-compatible shims can register as out-of-distribution input and degrade quality.
251
+
252
+ agent-sh gates this behavior per-model via the `Model.echoReasoning` flag (default `false`). Reasoning extras are only attached to assistant messages when the active model opts in.
253
+
254
+ For OpenRouter, the flag is set automatically: model ids matching the built-in pattern `/deepseek/i` (V3, V3.2, V4, rebadges) get `echoReasoning: true`. You can extend or override this in `settings.json`:
255
+
256
+ ```json
257
+ {
258
+ "providers": {
259
+ "openrouter": {
260
+ "echoReasoningPatterns": ["my-custom-deepseek-fork"],
261
+ "models": [
262
+ { "id": "deepseek/deepseek-v3.2", "echoReasoning": false },
263
+ { "id": "openai/gpt-5.5", "reasoning": true }
264
+ ]
265
+ }
266
+ }
267
+ }
268
+ ```
269
+
270
+ - `echoReasoningPatterns` — regex **source strings** (no `/.../` delimiters), each compiled with the case-insensitive flag and tested against the model id. Examples: `"deepseek"` (substring match), `"^vendor/.*-r1$"` (anchored). Merged with the built-in `deepseek` pattern.
271
+ - Per-model `echoReasoning` — explicit boolean override that always wins over patterns.
272
+ - Invalid regexes are silently skipped, so a typo can't break provider registration.
273
+
274
+ ### Tool interface
275
+
276
+ Every tool implements this interface:
277
+
278
+ ```typescript
279
+ interface ToolDefinition {
280
+ name: string;
281
+ displayName?: string; // short label for TUI (defaults to name)
282
+ description: string;
283
+ input_schema: Record<string, unknown>; // JSON Schema for parameters
284
+
285
+ execute(
286
+ args: Record<string, unknown>,
287
+ onChunk?: (chunk: string) => void, // optional streaming callback
288
+ ): Promise<ToolResult>;
289
+
290
+ modifiesFiles?: boolean; // has side effects (skips caching + parallel execution)
291
+ readOnly?: boolean; // safe to run without nuclear-form gating (registry tags accordingly)
292
+ showOutput?: boolean; // stream output to TUI (default: true)
293
+
294
+ // Display hooks (all optional)
295
+ getDisplayInfo?: (args) => ToolDisplayInfo; // icon, kind, file locations
296
+ formatCall?: (args) => string; // short call summary for TUI
297
+ formatResult?: (args, result) => ToolResultDisplay; // structured result
298
+ }
299
+
300
+ interface ToolResult {
301
+ content: string; // text returned to the LLM
302
+ exitCode: number | null;
303
+ isError: boolean;
304
+ }
305
+
306
+ interface ToolResultDisplay {
307
+ summary?: string; // one-line (e.g. "42 files", "+3/-1")
308
+ body?: ToolResultBody; // structured content for richer rendering
309
+ }
310
+
311
+ type ToolResultBody =
312
+ | { kind: "diff"; diff: unknown; filePath: string }
313
+ | { kind: "lines"; lines: string[]; maxLines?: number }
314
+
315
+ interface ToolDisplayInfo {
316
+ kind: "read" | "write" | "execute" | "search";
317
+ locations?: { path: string; line?: number | null }[];
318
+ icon?: string; // custom icon (e.g. "◆", "⌕")
319
+ }
320
+ ```
321
+
322
+ The `onChunk` callback enables streaming tool output to the TUI in real-time (used by `bash`). Tools that don't stream (like `read_file`) just return the final result. Extensions can wrap `onChunk` via the `tool:execute` handler to intercept or transform streamed output (e.g. secret redaction).
323
+
324
+ ## Streaming
325
+
326
+ Response streaming has two phases:
327
+
328
+ **Phase 1 — LLM stream**: The agent iterates chunks from the OpenAI streaming API. Each chunk can contain:
329
+ - `delta.content` — response text
330
+ - `delta.tool_calls` — tool call arguments (streamed incrementally, parsed by index)
331
+ - `delta.reasoning_content` — thinking/reasoning tokens (non-standard, used by models like DeepSeek-r1)
332
+
333
+ **Phase 2 — Content transform pipeline**: Text chunks are emitted via `bus.emitTransform("agent:response-chunk", { blocks })`. This runs the content through the extension transform pipeline (parsers, post-transforms) before the renderer sees it. See [Extensions: Content Transform Pipeline](extensions.md#content-transform-pipeline).
334
+
335
+ The agent accumulates the full response text separately for the final `agent:response-done` event.
336
+
337
+ ## Conversation State
338
+
339
+ The conversation state is an OpenAI-compatible chat messages array. Each query adds messages:
340
+
341
+ ```
342
+ User submits query → { role: "user", content: "fix the test" }
343
+ LLM responds with text → { role: "assistant", content: "I'll look at..." }
344
+ LLM requests tool call → { role: "assistant", tool_calls: [...] }
345
+ Tool returns result → { role: "tool", tool_call_id: "...", content: "..." }
346
+ ```
347
+
348
+ This array grows with every turn. To prevent context overflow, ash auto-compacts when estimated prompt tokens cross `autoCompactThreshold` (default 0.5) of the model's usable context window.
349
+
350
+ ### Auto-compaction
351
+
352
+ Before each LLM call, ash estimates the total prompt tokens. If it's over the threshold, it invokes the `conversation:compact` handler to free space, then proceeds. If the API still returns a context-overflow error, ash compacts more aggressively and retries once; if compaction frees nothing, it aborts rather than looping.
353
+
354
+ The default compaction strategy evicts older turns into the nuclear archive and leaves a bridge note; `conversation_recall` can bring them back on demand. See [Context Management](context-management.md#conversation-compaction) for the three-tier design and how to swap the strategy.
355
+
356
+ The user can also trigger compaction manually with `/compact`.
357
+
358
+ **Note**: reasoning/thinking tokens from the LLM stream are emitted as `agent:thinking-chunk` events for display but are **not stored in conversation state**. They're ephemeral — the LLM doesn't see its own reasoning on the next turn.
359
+
360
+ ## Provider Profiles & Model Switching
361
+
362
+ ash supports multiple models and providers, switchable at runtime.
363
+
364
+ ### Models
365
+
366
+ Each entry is a `(provider, model)` target — a serializable identity plus capabilities:
367
+
368
+ ```typescript
369
+ interface Model {
370
+ id: string; // model id, e.g. "openai/gpt-5"
371
+ provider: string; // identity is the (provider, id) pair
372
+ contextWindow?: number; // per-model override for the auto-compact threshold
373
+ maxTokens?: number;
374
+ reasoning?: boolean;
375
+ supportsReasoningEffort?: boolean;
376
+ echoReasoning?: boolean;
377
+ modalities?: ("text" | "image")[];
378
+ }
379
+ ```
380
+
381
+ The credentials and provider-shape transforms needed to actually call a model — `apiKey`/`baseURL` plus the reasoning/cache encoders — live in a separate `ModelEndpoint`, resolved internally by `(provider, id)`. It never travels on a frontend-facing event, so `Model` stays secret-free and serializable.
382
+
383
+ `agent:get-models` returns the catalog; `agent:models-changed` fires when it changes. When all models share the same provider, switching just changes the model. When they span providers (e.g. OpenAI + Anthropic via OpenRouter), switching also reconfigures the LLM client with the new endpoint's credentials and base URL.
384
+
385
+ ### Switching
386
+
387
+ - **`/model`** — show the current model
388
+ - **`/model <name>`** — switch to a specific model (may cross providers; credentials and base URL are reconfigured automatically)
389
+
390
+ The current model is shown in the TUI prompt. Switching mid-conversation preserves the conversation state — only the LLM endpoint changes.
391
+
392
+ To swap the backend itself (e.g. to `claude-code` or `pi`), use `/backend <name>` or set `defaultBackend` in settings.
393
+
394
+ ## Extension Tools
395
+
396
+ Extensions can register custom tools via `ctx.agent.registerTool()`. These appear alongside built-in tools and follow the same `ToolDefinition` interface. Only works with the built-in `ash` backend — bridge backends manage their own tools.
397
+
398
+ See [Extensions: ExtensionContext API](extensions.md#extensioncontext-api) for the interface and [Extensions: Custom Agent Backends](extensions.md#custom-agent-backends) for writing backend extensions.
@@ -0,0 +1,196 @@
1
+ # Architecture
2
+
3
+ agent-sh is a composable agent runtime: a pure kernel that any frontend can drive and any agent backend can plug into, over one shared extension layer. Frontends and backends are both bus-driven components that self-wire to events — the bundled shell is just one frontend among several.
4
+
5
+ ## Design Philosophy: Pure Kernel + Everything Is an Extension
6
+
7
+ The core (`createCore()`) is a frontend-agnostic kernel — it wires up the EventBus, HandlerRegistry, and Compositor with zero knowledge of terminals, PTYs, LLMs, shells, or rendering. **The core has no agent, no LLM client, and no shell coupling.** The built-in agent backend, shell tracking, provider management, TUI rendering, and all other features are loaded as extensions.
8
+
9
+ ```
10
+ createCore() — pure kernel:
11
+ │ EventBus — typed pub/sub + transform pipelines
12
+ │ HandlerRegistry — named function registry (define/advise/call)
13
+ │ Compositor — routes named render streams to surfaces
14
+ │ Multi-backend — coordinates which agent backend is active
15
+ │ Default `cwd` handler returning `process.cwd()`
16
+
17
+ index.ts — interactive terminal frontend:
18
+ │ Shell — PTY lifecycle (delegates to InputHandler + OutputParser)
19
+
20
+ ├── Agent host (always activated via activateAgent(ctx) before built-ins load):
21
+ │ ash backend — provider resolution, LlmClient, lazy AgentLoop
22
+ │ core tools — bash/read/write/edit/grep/glob/ls/list_skills registered at activate time
23
+ │ built-in providers — openrouter, openai, openai-compatible, deepseek (unconditional)
24
+
25
+ ├── Backend registry (owned by core; backends register via `agent:register-backend`):
26
+ │ core.activateBackend() — picks the named/persisted/first backend and calls its start()
27
+
28
+ ├── Built-in extensions (loaded via declarative manifest, individually disableable):
29
+ │ shell-context — PTY exchange tracking, cwd advisor, <cwd>/<shell_events> producer
30
+ │ tui-renderer — markdown rendering, inline diffs, thinking display, spinner
31
+ │ slash-commands — /help, /model, /backend, /thinking, /compact, /context, /reload
32
+ │ file-autocomplete — @ file path completion
33
+
34
+ ├── Shared utilities:
35
+ │ palette — semantic color system (accent, success, warning, error, muted)
36
+ │ diff-renderer — syntax-highlighted diffs (split/unified/summary)
37
+ │ box-frame — bordered TUI panels
38
+ │ tool-display — width-adaptive tool call rendering + pure spinner
39
+ │ output-writer — OutputWriter interface (StdoutWriter, BufferWriter for tests)
40
+ │ stream-transform — content block transforms for response pipeline
41
+
42
+ └── User extensions (opt-in, loaded from -e flag / settings.json / extensions dir):
43
+ e.g. overlay-agent, interactive-prompts, solarized-theme, latex-images, peer-mesh
44
+ ```
45
+
46
+ All components communicate exclusively through typed bus events. The backend has no reference to Shell — it emits lifecycle events and the TUI subscribes. Input flows the same way: any frontend emits `agent:submit` and the backend handles it.
47
+
48
+ Built-in extensions are loaded from a declarative manifest and can be individually disabled via the `disabledBuiltins` setting in `~/.agent-sh/settings.json`. This means even the built-in agent can be disabled (e.g., for users who only use extension backends like Claude Code).
49
+
50
+ **The core works without any frontend.** See [Library](library.md) for embedding agent-sh in your own apps.
51
+
52
+ ## How It Works
53
+
54
+ 1. agent-sh spawns a real PTY running your shell (zsh or bash, with your full rc config) and sets up raw stdin passthrough
55
+ 2. Built-in extensions load (including the agent backend, which registers via `agent:register-backend`), then user extensions
56
+ 3. `activateBackend()` wires the chosen backend to bus events
57
+ 4. All keyboard input goes directly to the PTY — zero latency, full terminal compatibility
58
+ 5. When you type `>` at the start of a line, agent-sh intercepts and enters agent input mode
59
+ 6. On Enter, the query is emitted as `agent:submit` and the active backend decides which tools to use
60
+ 7. The backend handles the query — streaming LLM responses, executing tools, emitting events. Read-only tools run in parallel; permission-requiring tools run sequentially.
61
+ 8. The TUI renderer extension renders streamed content inline (markdown, diffs, tool calls with tree-style grouping)
62
+ 9. When the backend finishes (`agent:processing-done`), normal shell operation resumes
63
+
64
+ ## Shell ↔ Agent Boundary
65
+
66
+ The shell and the agent are **separate worlds** by default. The PTY runs your real shell; the agent runs its tools in isolated child processes. A `cd` by the agent's `bash` tool doesn't change your shell's cwd.
67
+
68
+ ### Command-boundary detection
69
+
70
+ agent-sh injects three invisible OSC sequences into its inner shell — `\e]9999;id=<tag>;PROMPT\a` (precmd), `\e]9997;id=<tag>;<cmd>\a` (preexec), `\e]9998;id=<tag>;READY\a` (prompt rendered). `<tag>` is the process's `instanceId`. The OutputParser reacts only to its own tag; markers with a different tag (or none) are treated as opaque foreground output. That's what keeps a nested agent-sh — for example, an `ash` launched inside an SSH session — from cross-triggering the outer instance's command lifecycle.
71
+
72
+ The connection between them is **context**: each query includes shell context (recent commands, output, cwd). The agent sees what you've been doing but can't touch your shell state.
73
+
74
+ Extensions can cross this boundary using `shell:exec-request`. The core event bus makes this easy to wire up — an extension just registers a tool that emits the event and returns the result. We don't include a PTY tool as built-in because the right behavior depends on user preference (confirmation prompts? output capture? restricted commands?). See `examples/extensions/user-shell.ts` for a ready-made implementation.
75
+
76
+ The pattern works like this:
77
+
78
+ ```
79
+ agent calls user_shell({ command: "cd src" })
80
+ → bus.emitPipeAsync("shell:exec-request", { command })
81
+ → Shell writes command to PTY
82
+ → PTY executes in user's real shell
83
+ → shell:command-done fires with output
84
+ → result returned to agent
85
+ ```
86
+
87
+ ## Agent Backend
88
+
89
+ The agent backend is a bus-driven component that registers via `agent:register-backend`. The core's multi-backend coordinator manages which backend is active — it has no knowledge of any specific backend's internals.
90
+
91
+ ### Built-in backend: ash
92
+
93
+ The default backend is **ash**, registered from the agent host (`src/agent/index.ts`) when `activateAgent(ctx)` runs. It resolves LLM providers from registered catalogs + settings overlay, configures an `LlmClient`, and registers itself with the core's backend registry by emitting `agent:register-backend`. The `AgentLoop` that drives tool calls is constructed lazily — only when ash's `start()` runs (on `activateBackend("ash")`). See [The Built-in Agent: ash](agent.md) for the full guide.
94
+
95
+ The agent host also defines an `llm:invoke` handler that backs the `ctx.agent.llm` facade, so any extension can call `ctx.agent.llm.ask(...)` or `ctx.agent.llm.session(...)` without knowing which backend is active. Backends with no LLM leave `ctx.agent.llm.available` false.
96
+
97
+ ### Extension Backends
98
+
99
+ Extensions can register alternative backends by emitting `agent:register-backend` during activation — this is the same mechanism the built-in agent uses. See [Extensions: Custom Agent Backends](extensions.md#custom-agent-backends) for the full protocol and a working example.
100
+
101
+ All backends emit the same bus events. The TUI, extensions, and library consumers don't know which backend is active.
102
+
103
+ ## Key Extension Points
104
+
105
+ The extension system provides several composable primitives for customizing agent-sh. Each is documented in detail in the [Extensions](extensions.md) guide:
106
+
107
+ - **[Event Bus](extensions.md#event-bus)** — typed pub/sub (`on`/`emit`), synchronous transform chains (`onPipe`/`emitPipe`), async transform chains (`onPipeAsync`/`emitPipeAsync`), and transform-then-notify (`emitTransform`)
108
+ - **[Custom Agent Backends](extensions.md#custom-agent-backends)** — replace the entire agent backend via `agent:register-backend`
109
+ - **[Named Handlers](extensions.md#named-handlers-advice-system)** — `define`/`advise`/`call` registry for wrapping processing steps (e.g. code block rendering)
110
+ - **[Content Transform Pipeline](extensions.md#content-transform-pipeline)** — typed content blocks (`text`, `code-block`, `image`, `raw`) flow through parsers and post-transforms before rendering
111
+ - **[Custom Input Modes](extensions.md#custom-input-modes)** — register trigger characters (`?`, `>`, etc.) with custom `onSubmit` handlers
112
+ - **[Terminal Buffer & Floating Panel](extensions.md#terminal-buffer--floating-panel)** — headless xterm.js terminal mirror + composited overlay with handler-based rendering customization
113
+ - **[Theming](extensions.md#theming)** — semantic color palette overrides via `setPalette()`
114
+
115
+ ## Project Structure
116
+
117
+ ```
118
+ agent-sh/
119
+ ├── src/
120
+ │ ├── core/ # Substrate kernel — no LLM, no agent, no shell
121
+ │ │ ├── index.ts # createCore(), backend registry, extensionContext()
122
+ │ │ ├── types.ts # CoreContext, CoreConfig
123
+ │ │ ├── event-bus.ts # Typed EventBus: emit/on, emitPipe, emitPipeAsync, emitTransform
124
+ │ │ ├── settings.ts # User settings (~/.agent-sh/settings.json)
125
+ │ │ └── extension-loader.ts # Extension loading (-e, settings.json, extensions dir)
126
+ │ │
127
+ │ ├── cli/ # CLI entry + subcommands (install, init, auth)
128
+ │ │ ├── index.ts # Interactive terminal entry point
129
+ │ │ ├── subcommands.ts, install.ts, init.ts
130
+ │ │ └── auth/ # Provider API key management
131
+ │ │
132
+ │ ├── shell/ # Shell host — TUI frontend, PTY, compositor, theming
133
+ │ │ ├── index.ts # registerShellHandlers/activateShell — attaches ctx.shell
134
+ │ │ ├── events.ts # BusEvents augmentation (shell:*, input:*, compositor:*, autocomplete:request)
135
+ │ │ ├── host-types.ts # ShellSurface, ShellContext, ExtensionContext, AppConfig
136
+ │ │ ├── shell.ts # PTY lifecycle + wiring (InputHandler + OutputParser)
137
+ │ │ ├── shell-context.ts # Shell exchange tracking, cwd advisor, <shell_events>
138
+ │ │ ├── tui-renderer.ts # Main renderer — writes to compositor streams
139
+ │ │ ├── input-handler.ts # Keyboard input, agent mode, bus-driven autocomplete
140
+ │ │ ├── output-parser.ts # OSC parsing, command boundary detection
141
+ │ │ └── tui-input-view.ts # Input rendering + line editor integration
142
+ │ │
143
+ │ ├── agent/ # Agent host — ash backend, providers, tools, skills
144
+ │ │ ├── index.ts # activateAgent — attaches ctx.agent, registers core tools + ash backend
145
+ │ │ ├── events.ts # BusEvents augmentation (agent:providers, agent:models-changed, ...)
146
+ │ │ ├── host-types.ts # AgentSurface, AgentContext, ProviderRegistration, Model, ModelEndpoint
147
+ │ │ ├── types.ts # AgentBackend, ToolDefinition, ToolResult
148
+ │ │ ├── agent-loop.ts # ash AgentLoop (constructed lazily in start())
149
+ │ │ ├── llm-client.ts, llm-facade.ts # ash LLM transport + ctx.agent.llm facade
150
+ │ │ ├── providers/ # openai, openrouter, deepseek, openai-compatible
151
+ │ │ ├── token-budget.ts # Shared constants (RESPONSE_RESERVE, DEFAULT_CONTEXT_WINDOW)
152
+ │ │ ├── tool-registry.ts, tool-protocol.ts
153
+ │ │ ├── live-view.ts # In-memory messages array + compaction + recall archive
154
+ │ │ ├── store.ts, session-store.ts # Append-only entry store; session/message persistence
155
+ │ │ ├── nuclear-form.ts, system-prompt.ts
156
+ │ │ ├── skills.ts, subagent.ts
157
+ │ │ └── tools/ # Built-in tool implementations (bash, read/write/edit, grep, glob, ls, ...)
158
+ │ │
159
+ │ ├── extensions/ # Cross-cutting built-ins (loaded via manifest)
160
+ │ │ ├── index.ts # Declarative manifest + loader
161
+ │ │ ├── slash-commands/ # /reload, /quit, command dispatch; events.ts ships command:* events
162
+ │ │ └── file-autocomplete.ts
163
+ │ │
164
+ │ └── utils/ # Shared primitives
165
+ │ ├── handler-registry.ts # Named function registry (define/advise/call)
166
+ │ ├── compositor.ts # Routes named render streams to surfaces
167
+ │ ├── terminal-buffer.ts # Headless xterm.js mirror of the terminal
168
+ │ ├── floating-panel.ts # Composited floating overlay
169
+ │ ├── executor.ts # Isolated child process execution
170
+ │ ├── shell-output-spill.ts # Session-tempfile spill for long shell outputs
171
+ │ ├── palette.ts, ansi.ts, diff.ts, diff-renderer.ts
172
+ │ └── (markdown, line-editor, stream-transform, ...)
173
+
174
+ ├── examples/ # Example extensions and agent integrations
175
+ │ └── extensions/
176
+ │ ├── overlay-agent.ts # Ctrl+\ floating overlay agent
177
+ │ ├── interactive-prompts.ts # Permission prompts (opt-in safety)
178
+ │ ├── peer-mesh.ts # Cross-instance communication
179
+ │ ├── terminal-buffer.ts # Headless xterm.js terminal mirror extension
180
+ │ ├── tmux-pane.ts # Tmux side pane output/interactive modes
181
+ │ ├── web-access.ts # Web search and content extraction
182
+ │ ├── user-shell.ts # Run commands in the live PTY
183
+ │ ├── questionnaire.ts # Interactive question prompts
184
+ │ ├── subagents.ts # Subagent orchestration
185
+ │ ├── solarized-theme.ts # Theme example
186
+ │ ├── secret-guard.ts # Secret redaction
187
+ │ ├── latex-images.ts # LaTeX equation rendering
188
+ │ ├── ollama.ts # Ollama provider (local + cloud)
189
+ │ ├── claude-code-bridge/ # Claude Code SDK backend
190
+ │ ├── pi-bridge/ # Pi agent backend
191
+ │ ├── ash-mcp-bridge/ # MCP server bridge
192
+ │ └── ash-acp-bridge/ # ACP server (headless core)
193
+ ├── docs/ # Documentation
194
+ ├── package.json
195
+ └── tsconfig.json
196
+ ```