agent-sh 0.15.0 → 0.15.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. package/docs/README.md +14 -0
  2. package/docs/agent.md +398 -0
  3. package/docs/architecture.md +196 -0
  4. package/docs/context-management.md +200 -0
  5. package/docs/extensions.md +951 -0
  6. package/docs/library.md +84 -0
  7. package/docs/troubleshooting.md +65 -0
  8. package/docs/tui-composition.md +294 -0
  9. package/docs/usage.md +306 -0
  10. package/examples/extensions/ash-scheme/package.json +1 -1
  11. package/examples/extensions/ashi/EXTENDING.md +2 -2
  12. package/examples/extensions/ashi/README.md +2 -2
  13. package/examples/extensions/ashi/docs/ui-surface-protocol.md +1 -1
  14. package/examples/extensions/ashi/package.json +5 -3
  15. package/examples/extensions/ashi/src/cli.ts +6 -5
  16. package/examples/extensions/ashi/src/renderer.ts +22 -2
  17. package/examples/extensions/ashi/src/renderers/pi-tui/tool-group.ts +5 -8
  18. package/examples/extensions/ashi-ink/package.json +2 -2
  19. package/examples/extensions/claude-code-bridge/package.json +1 -1
  20. package/examples/extensions/opencode-bridge/package.json +1 -1
  21. package/package.json +3 -1
  22. package/src/agent/agent-loop.ts +1563 -0
  23. package/src/agent/entry-format.ts +19 -0
  24. package/src/agent/events.ts +151 -0
  25. package/src/agent/extensions/rolling-history/constants.ts +1 -0
  26. package/src/agent/extensions/rolling-history/index.ts +202 -0
  27. package/src/agent/extensions/rolling-history/recall.ts +131 -0
  28. package/src/agent/extensions/rolling-history/strategy.ts +404 -0
  29. package/src/agent/host-types.ts +192 -0
  30. package/src/agent/index.ts +591 -0
  31. package/src/agent/live-view.ts +279 -0
  32. package/src/agent/llm-client.ts +111 -0
  33. package/src/agent/llm-facade.ts +43 -0
  34. package/src/agent/normalize-args.ts +61 -0
  35. package/src/agent/nuclear-form.ts +382 -0
  36. package/src/agent/providers/deepseek.ts +39 -0
  37. package/src/agent/providers/ollama.ts +92 -0
  38. package/src/agent/providers/openai-compatible.ts +36 -0
  39. package/src/agent/providers/openai.ts +52 -0
  40. package/src/agent/providers/opencode.ts +142 -0
  41. package/src/agent/providers/openrouter.ts +105 -0
  42. package/src/agent/providers/zai-coding-plan.ts +33 -0
  43. package/src/agent/session-store.ts +336 -0
  44. package/src/agent/skills.ts +228 -0
  45. package/src/agent/store.ts +310 -0
  46. package/src/agent/subagent.ts +305 -0
  47. package/src/agent/system-prompt.ts +151 -0
  48. package/src/agent/token-budget.ts +12 -0
  49. package/src/agent/tool-protocol.ts +722 -0
  50. package/src/agent/tool-registry.ts +66 -0
  51. package/src/agent/tools/bash.ts +95 -0
  52. package/src/agent/tools/edit-file.ts +154 -0
  53. package/src/agent/tools/expand-home.ts +7 -0
  54. package/src/agent/tools/glob.ts +108 -0
  55. package/src/agent/tools/grep.ts +228 -0
  56. package/src/agent/tools/list-skills.ts +37 -0
  57. package/src/agent/tools/ls.ts +81 -0
  58. package/src/agent/tools/pwsh.ts +140 -0
  59. package/src/agent/tools/read-file.ts +164 -0
  60. package/src/agent/tools/write-file.ts +72 -0
  61. package/src/agent/types.ts +149 -0
  62. package/src/cli/args.ts +91 -0
  63. package/src/cli/auth/cli.ts +244 -0
  64. package/src/cli/auth/discover.ts +52 -0
  65. package/src/cli/auth/keys.ts +143 -0
  66. package/src/cli/index.ts +295 -0
  67. package/src/cli/init.ts +74 -0
  68. package/src/cli/install.ts +439 -0
  69. package/src/cli/shell-env.ts +68 -0
  70. package/src/cli/subcommands.ts +24 -0
  71. package/src/core/event-bus.ts +252 -0
  72. package/src/core/extension-loader.ts +347 -0
  73. package/src/core/index.ts +152 -0
  74. package/src/core/settings.ts +398 -0
  75. package/src/core/types.ts +61 -0
  76. package/src/extensions/file-autocomplete.ts +71 -0
  77. package/src/extensions/index.ts +38 -0
  78. package/src/extensions/slash-commands/events.ts +14 -0
  79. package/src/extensions/slash-commands/index.ts +269 -0
  80. package/src/shell/events.ts +73 -0
  81. package/src/shell/host-types.ts +150 -0
  82. package/src/shell/index.ts +159 -0
  83. package/src/shell/input-handler.ts +505 -0
  84. package/src/shell/output-parser.ts +156 -0
  85. package/src/shell/shell-context.ts +193 -0
  86. package/src/shell/shell.ts +414 -0
  87. package/src/shell/strategies/bash.ts +83 -0
  88. package/src/shell/strategies/fish.ts +77 -0
  89. package/src/shell/strategies/index.ts +24 -0
  90. package/src/shell/strategies/types.ts +64 -0
  91. package/src/shell/strategies/zsh.ts +92 -0
  92. package/src/shell/terminal.ts +124 -0
  93. package/src/shell/tui-input-view.ts +222 -0
  94. package/src/shell/tui-renderer.ts +1126 -0
  95. package/src/utils/ansi.ts +140 -0
  96. package/src/utils/box-frame.ts +138 -0
  97. package/src/utils/compositor.ts +157 -0
  98. package/src/utils/diff-renderer.ts +829 -0
  99. package/src/utils/diff.ts +244 -0
  100. package/src/utils/executor.ts +305 -0
  101. package/src/utils/file-watcher.ts +110 -0
  102. package/src/utils/floating-panel.ts +1160 -0
  103. package/src/utils/handler-registry.ts +110 -0
  104. package/src/utils/line-editor.ts +636 -0
  105. package/src/utils/markdown.ts +437 -0
  106. package/src/utils/message-utils.ts +113 -0
  107. package/src/utils/package-version.ts +12 -0
  108. package/src/utils/palette.ts +64 -0
  109. package/src/utils/ref-counter.ts +9 -0
  110. package/src/utils/ripgrep-path.ts +17 -0
  111. package/src/utils/shell-output-spill.ts +76 -0
  112. package/src/utils/stream-transform.ts +292 -0
  113. package/src/utils/terminal-buffer.ts +213 -0
  114. package/src/utils/tool-display.ts +315 -0
  115. package/src/utils/tool-interactive.ts +71 -0
  116. package/src/utils/tty.ts +14 -0
@@ -0,0 +1,951 @@
1
+ # Extensions
2
+
3
+ An extension is a module that exports a default (or named `activate`) function. It receives a context object with access to all core services — no runtime imports needed; you only need the type for TS authoring.
4
+
5
+ ```typescript
6
+ import type { ExtensionContext } from "agent-sh/types";
7
+
8
+ export default function activate(ctx: ExtensionContext) {
9
+ const { bus } = ctx;
10
+
11
+ bus.on("agent:response-done", (e) => {
12
+ console.log(`Agent responded with ${e.response.length} chars`);
13
+ });
14
+ }
15
+ ```
16
+
17
+ `ExtensionContext` is the friendly default — host surfaces (`ctx.agent`, `ctx.shell`) are optional and you guard with `?.` when you use them. For more explicit host requirements, type as `AgentContext`, `ShellContext`, or `AgentContext & ShellContext` (see [ExtensionContext API](#extensioncontext-api) below).
18
+
19
+ ## Loading Extensions
20
+
21
+ Extensions are loaded from three sources (in order, deduplicated):
22
+
23
+ **CLI flag** (`-e` / `--extensions`):
24
+ ```bash
25
+ agent-sh -e my-ext-package -e ./local-ext.ts
26
+ agent-sh -e my-ext-package,another-package # comma-separated also works
27
+
28
+ # from a repo checkout in dev:
29
+ npm start -- -e my-ext-package -e ./local-ext.ts
30
+ ```
31
+
32
+ **Settings file** (`~/.agent-sh/settings.json`):
33
+ ```json
34
+ {
35
+ "extensions": [
36
+ "my-published-extension",
37
+ "./relative/path/to/ext.ts"
38
+ ]
39
+ }
40
+ ```
41
+
42
+ **Extensions directory** (`~/.agent-sh/extensions/`):
43
+ ```
44
+ ~/.agent-sh/extensions/
45
+ ├── my-extension.ts # loaded directly
46
+ ├── another.js # JS works too
47
+ └── complex-extension/ # directory with index.{js,mjs,ts,tsx,mts}
48
+ └── index.ts
49
+ ```
50
+
51
+ TypeScript and JavaScript are both supported (`.ts`, `.tsx`, `.mts`, `.js`, `.mjs`). TS is transpiled at runtime via tsx. Bare names resolve as npm packages via Node's standard module resolution. Errors in extension loading are non-fatal.
52
+
53
+ The `agent-sh install` subcommand populates this directory from bundled examples or local paths:
54
+
55
+ ```bash
56
+ agent-sh install pi-bridge # copy bundled examples/extensions/pi-bridge/
57
+ agent-sh install ./my-local-ext # copy from a local path
58
+ agent-sh install --force <name> # overwrite an existing target
59
+ agent-sh uninstall <name> # remove
60
+ agent-sh list # show all loadable extensions (extensions dir + settings.json)
61
+ ```
62
+
63
+ For directory-style extensions with declared dependencies, `install` runs `npm install` in the target automatically.
64
+
65
+ ### Single-file vs directory
66
+
67
+ The two layouts have different contracts, not just different ergonomics:
68
+
69
+ - **Single-file `.ts`** lives bare in `~/.agent-sh/extensions/`. No `package.json`, no `npm install`. Runtime resolution walks up from that path looking for `node_modules/`, and on a fresh user there's nothing to find. So a single-file extension can only use `ctx` and **type-only** imports (`import type { AgentContext } from "agent-sh/types"`) — these are erased by tsx before execution. A *runtime* `import { foo } from "agent-sh/bar"` throws `Cannot find module` at load.
70
+ - **Directory extension** ships with its own `package.json` declaring `agent-sh` (and any other runtime deps). `install` runs `npm install`, so bare imports resolve normally. Use this layout whenever you need runtime utilities the ctx surface doesn't provide.
71
+
72
+ Decision rule: if everything you need is on `ctx` (and the surfaces that hang off it — `ctx.agent.*`, `ctx.shell.*`, `ctx.call(...)`), stay single-file. If you find yourself reaching for `agent-sh/utils/*` or `agent-sh/agent/*` at runtime, either there's a missing ctx primitive worth adding, or you should promote the extension to a directory layout.
73
+
74
+ ## ExtensionContext API
75
+
76
+ The context is layered: substrate primitives + slash-command registration at the top, host-specific surfaces nested under `ctx.agent` and `ctx.shell`. Both nested surfaces are **optional** — they're attached by their hosts during activation, and headless backends (ACP server, bridges) may skip a host's activation entirely:
77
+
78
+ ```ts
79
+ type ExtensionContext = CoreContext & {
80
+ registerCommand: ...;
81
+ adviseCommand: ...;
82
+ agent?: AgentSurface; // attached by agent host
83
+ shell?: ShellSurface; // attached by shell host
84
+ };
85
+ ```
86
+
87
+ That gives extension authors a choice depending on how defensive they want to be:
88
+
89
+ | Context type | What it guarantees | Use when |
90
+ |---|---|---|
91
+ | `CoreContext` | substrate only | Pure bus / handler extensions; works under any backend |
92
+ | `AgentContext` | substrate + `agent` (required) | Need tools / LLM / instructions; won't load under no-agent bridges |
93
+ | `ShellContext` | substrate + `shell` (required) | Need compositor / palette / transforms; won't load under headless |
94
+ | `AgentContext & ShellContext` | substrate + both (required) | Need both surfaces; lets you skip `?.` guards |
95
+ | `ExtensionContext` | substrate + both (optional) | Defensive code that adapts to whatever's available; requires `?.` guards on `ctx.agent` / `ctx.shell` |
96
+
97
+ Picking a narrower type makes host requirements explicit. Under a mismatch (extension typed `AgentContext` loaded under a no-agent bridge), accessing `ctx.agent.foo` produces a real `TypeError` instead of a silent no-op — loud, debuggable failure.
98
+
99
+ ### Substrate (top-level — always present)
100
+
101
+ | Property | Type | Description |
102
+ |---|---|---|
103
+ | `bus` | `EventBus` | Subscribe to events, emit events, register pipe handlers |
104
+ | `instanceId` | `string` | Stable per-instance identifier (4-char hex) |
105
+ | `quit` | `() => void` | Exit agent-sh |
106
+ | `getExtensionSettings` | `(namespace, defaults) => T` | Read extension settings from `~/.agent-sh/settings.json` |
107
+ | `getStoragePath` | `(namespace) => string` | Get (and lazily create) a per-extension storage directory under `~/.agent-sh/<namespace>/` |
108
+ | `define` | `(name, fn) => void` | Register a named handler |
109
+ | `advise` | `(name, wrapper) => () => void` | Wrap a named handler (receives `next` + args). Returns an `unadvise()` function. |
110
+ | `call` | `(name, ...args) => any` | Call a named handler |
111
+ | `list` | `() => string[]` | Names of all registered handlers (for diagnostic/introspection use) |
112
+ | `onDispose` | `(fn: () => void) => void` | Register a teardown callback fired on `/reload` |
113
+ | `registerCommand` | `(name, description, handler) => void` | Register a slash command (e.g. `/mycommand`). Frontend-agnostic — any consumer (TUI input parser, ACP message handler) can dispatch. |
114
+ | `adviseCommand` | `(name, advisor) => () => void` | Wrap an already-registered command's handler |
115
+
116
+ ### `ctx.agent` — agent host surface
117
+
118
+ Attached by `activateAgent(ctx)` (called from `src/cli/index.ts` and from library hosts). `ctx.agent` is present whenever the ash agent host has been activated, regardless of which backend is currently active. Bridges (claude-code, opencode, pi) coexist as alternate backends and still see `ctx.agent`; they simply don't drive AgentLoop.
119
+
120
+ | Property | Type | Description |
121
+ |---|---|---|
122
+ | `llm` | `LlmInterface` | Backend-agnostic LLM facade — `llm.ask({query, system?, maxTokens?})` for one-shot, `llm.session({system?}).send(msg)` for multi-turn, `llm.available` to check |
123
+ | `providers` | `{ register, unregister, configure }` | Register/unregister provider catalogs (id, apiKey, baseURL, defaultModel, models, supportsReasoningEffort, noAuth); `configure` sets provider-specific hooks (reasoning params). See [Providers](#providers) below |
124
+ | `registerTool` | `(tool: ToolDefinition) => void` | Register a tool with the active agent backend. See [Internal Agent: Tool interface](agent.md#tool-interface) |
125
+ | `unregisterTool` | `(name: string) => void` | Remove a previously registered tool |
126
+ | `adviseTool` | `(name, advisor) => () => void` | Wrap a tool's execute function (gating, logging, transforms) |
127
+ | `adviseToolSchema` | `(name, advisor) => () => void` | Wrap a tool's LLM-facing description/parameters |
128
+ | `getTools` | `() => ToolDefinition[]` | Get all registered tools |
129
+ | `registerInstruction` | `(name, text) => void` | Inject a named instruction block into the system prompt |
130
+ | `removeInstruction` | `(name) => void` | Remove a named instruction block |
131
+ | `adviseInstruction` | `(name, advisor) => () => void` | Wrap an instruction's text |
132
+ | `registerSkill` | `(name, description, filePath) => void` | Register a skill — on-demand reference material the agent can invoke |
133
+ | `removeSkill` | `(name) => void` | Remove a registered skill |
134
+ | `adviseSkill` | `(name, advisor) => () => void` | Wrap a skill's LLM-facing view |
135
+ | `registerContextProducer` | `(name, () => string \| null, opts?: { mode? }) => () => void` | Contribute a per-turn signal. `mode: "per-request"` (default) — fires every LLM call, ephemerally wrapped on the trailing message in `<dynamic_context>`. `mode: "per-query"` — fires once per user query, frozen into the user message in `<query_context>`. Return `null` to skip. Returns a dispose fn. |
136
+
137
+ ### `ctx.shell` — shell host surface
138
+
139
+ Attached by the TUI shell frontend (`registerShellHandlers`). Headless backends leave `ctx.shell` undefined.
140
+
141
+ | Property | Type | Description |
142
+ |---|---|---|
143
+ | `compositor` | `Compositor` | Routes named render streams to terminal surfaces. Frontends set defaults during activation; extensions can `redirect()` to capture output. See [TUI Composition](tui-composition.md) |
144
+ | `setPalette` | `(overrides) => void` | Override color palette slots for theming |
145
+ | `createBlockTransform` | `(opts) => void` | Register an inline delimiter transform (e.g. `$$...$$`) |
146
+ | `createFencedBlockTransform` | `(opts) => void` | Register a fenced block transform (e.g. ` ```lang...``` `) |
147
+ | `adviseInputMode` | `(id, advisor) => () => void` | Wrap an input mode's `onSubmit` |
148
+ | `createRemoteSession` | `(opts: RemoteSessionOptions) => RemoteSession` | Create a remote session that routes agent output to a surface. See [Remote Sessions](#remote-sessions) |
149
+
150
+ ## Extension Settings
151
+
152
+ Extensions read user-configurable settings from `~/.agent-sh/settings.json`, namespaced under the extension name:
153
+
154
+ ```typescript
155
+ export default function activate(ctx) {
156
+ const config = ctx.getExtensionSettings("my-extension", {
157
+ maxItems: 10,
158
+ color: "blue",
159
+ });
160
+ // config.maxItems, config.color — typed, merged with user overrides
161
+ }
162
+ ```
163
+
164
+ Users configure in `~/.agent-sh/settings.json`:
165
+ ```json
166
+ {
167
+ "my-extension": { "maxItems": 50, "color": "red" }
168
+ }
169
+ ```
170
+
171
+ ## Event Bus
172
+
173
+ The bus has three patterns. The key difference: **`on`/`emit` is fire-and-forget** (listeners can't change anything), while **`onPipe`/`emitPipe` is a transform chain** (each listener modifies the payload for the next).
174
+
175
+ ### `on` / `emit` — Notifications
176
+
177
+ Broadcast an event. Listeners react but can't affect the payload or the emitter. Use this for logging, UI updates, and side effects.
178
+
179
+ ```typescript
180
+ // Emitter doesn't care what listeners do
181
+ bus.emit("ui:info", { message: "Operation completed" });
182
+
183
+ // Listener reacts but can't change the event
184
+ bus.on("shell:command-done", ({ command, output, exitCode }) => {
185
+ if (exitCode !== 0) bus.emit("ui:suggestion", { text: "Command failed" });
186
+ });
187
+ ```
188
+
189
+ ### `onPipe` / `emitPipe` — Synchronous Transform Chain
190
+
191
+ Each listener receives the payload, **returns a modified version**, and that becomes the input for the next listener. The emitter gets back the final result. Use this when you need extensions to intercept or transform data.
192
+
193
+ ```typescript
194
+ // Emitter sends a payload through the chain and reads the result.
195
+ // `agent:terminal-intercept` is emitted by the `bash` tool before
196
+ // execution — extensions can short-circuit specific commands with
197
+ // virtual output and skip the subprocess. No built-in extension uses
198
+ // this today; it's a general hook for custom virtual commands.
199
+ const result = bus.emitPipe("agent:terminal-intercept", {
200
+ command, cwd, intercepted: false, output: "",
201
+ });
202
+ if (result.intercepted) return result.output; // an extension handled it
203
+
204
+ // Listener transforms the payload (or returns it unchanged to pass through)
205
+ bus.onPipe("agent:terminal-intercept", (payload) => {
206
+ if (payload.command !== "my-tool") return payload; // not mine, pass through
207
+ return { ...payload, intercepted: true, output: "custom output" };
208
+ });
209
+ ```
210
+
211
+ Another common use — multiple extensions enriching a payload:
212
+ ```typescript
213
+ // Each extension appends its own completions
214
+ bus.onPipe("autocomplete:request", (payload) => {
215
+ return { ...payload, items: [...payload.items, { name: "/greet", description: "Say hello" }] };
216
+ });
217
+ ```
218
+
219
+ ### `onPipeAsync` / `emitPipeAsync` — Async Transform Chain
220
+
221
+ Same as `onPipe` but listeners can be async. Also notifies regular `on` listeners first (so UI can prepare before async work starts). Use this for transforms that need I/O — shell execution, network calls, context-compaction handlers.
222
+
223
+ ```typescript
224
+ // Example: the shell-exec request pipe lets extensions intercept commands
225
+ // the agent wants to run in the user's PTY.
226
+ bus.onPipeAsync("shell:exec-request", async (payload) => {
227
+ const result = await runInPty(payload.command);
228
+ return { ...payload, output: result.output, exitCode: result.exitCode };
229
+ });
230
+ ```
231
+
232
+ ### `emitTransform` — Pipe Then Notify
233
+
234
+ A convenience combo: runs the payload through the `onPipe` transform chain, then emits the result to regular `on` listeners. This is the standard way to emit content that should be both transformable and renderable.
235
+
236
+ ```typescript
237
+ // Without emitTransform (two steps):
238
+ const transformed = bus.emitPipe("agent:response-chunk", { blocks });
239
+ bus.emit("agent:response-chunk", transformed);
240
+
241
+ // With emitTransform (same thing, one call):
242
+ bus.emitTransform("agent:response-chunk", { blocks });
243
+ ```
244
+
245
+ This is how agent backends emit response chunks — extensions get a chance to transform the content (e.g. LaTeX → image) before the renderer sees it.
246
+
247
+ ## Custom Agent Backends
248
+
249
+ An extension can provide an agent backend — the component that receives queries and produces responses. The built-in `ash` backend (registered from `src/agent/index.ts` via `activateAgent`) uses an OpenAI-compatible API with tool calling and constructs its AgentLoop lazily when `start()` runs. The core owns the backend registry — every backend, including ash, registers by emitting `agent:register-backend`, and `core.activateBackend(name?)` picks one to start. You can add alternatives: a local model, a proprietary agent service, a deterministic script, or a test stub.
250
+
251
+ ### How it works
252
+
253
+ During `activate()`, emit `agent:register-backend` to register your backend. Multiple backends can coexist; the user switches between them with `/backend`. Set `defaultBackend` in settings to control which activates on startup.
254
+
255
+ Here's a minimal working backend:
256
+
257
+ ```typescript
258
+ import type { ExtensionContext } from "agent-sh/types";
259
+
260
+ export default function activate({ bus }: ExtensionContext): void {
261
+ // 1. Register — claims the backend role before activateBackend() runs
262
+ bus.emit("agent:register-backend", {
263
+ name: "echo",
264
+ kill: () => {},
265
+ });
266
+
267
+ // 2. Handle queries — listen for submits, emit the response protocol
268
+ bus.on("agent:submit", ({ query }) => {
269
+ bus.emit("agent:processing-start", {});
270
+ bus.emit("agent:query", { query });
271
+
272
+ // Use emitTransform so the content pipeline processes response chunks
273
+ bus.emitTransform("agent:response-chunk", {
274
+ blocks: [{ type: "text", text: `Echo: ${query}\n` }],
275
+ });
276
+
277
+ bus.emitTransform("agent:response-done", {
278
+ response: `Echo: ${query}`,
279
+ });
280
+
281
+ bus.emit("agent:processing-done", {});
282
+ });
283
+
284
+ // 3. Identify yourself (shown in the TUI prompt)
285
+ bus.emit("agent:info", { name: "echo-backend", version: "1.0.0" });
286
+ }
287
+ ```
288
+
289
+ ### Event protocol
290
+
291
+ A backend listens for input events and emits output events. The TUI and all extensions only see bus events — they don't know or care which backend is active.
292
+
293
+ **Input events** (listen with `bus.on`):
294
+
295
+ | Event | Payload | Description |
296
+ |---|---|---|
297
+ | `agent:submit` | `{ query }` | User submitted a query |
298
+ | `agent:cancel-request` | `{ silent? }` | User requested cancellation |
299
+ | `agent:reset-session` | `{}` | User issued reset — clear conversation state |
300
+
301
+ **Output events** (emit in this order for each query):
302
+
303
+ | Step | Event | Payload | Notes |
304
+ |---|---|---|---|
305
+ | 1 | `agent:processing-start` | `{}` | Starts spinner in TUI |
306
+ | 2 | `agent:query` | `{ query }` | Echoes the query for display |
307
+ | 3 | `agent:response-chunk` | `{ blocks: ContentBlock[] }` | Use `emitTransform` so content pipeline runs. Emit 0+ times |
308
+ | 4 | `agent:response-done` | `{ response }` | Full response text |
309
+ | 5 | `agent:processing-done` | `{}` | Stops spinner, returns control to prompt |
310
+
311
+ **Optional events** for richer backends:
312
+
313
+ | Event | Payload | When |
314
+ |---|---|---|
315
+ | `agent:thinking-chunk` | `{ text }` | Reasoning tokens (e.g. DeepSeek-r1) |
316
+ | `agent:tool-batch` | `{ groups: [{ kind, tools: [{ name, displayDetail? }] }] }` | Before tool execution — all tools grouped by kind |
317
+ | `agent:tool-started` | `{ title, toolCallId?, kind?, icon?, displayDetail?, locations?, batchIndex?, batchTotal? }` | Tool execution beginning |
318
+ | `agent:tool-output-chunk` | `{ chunk }` | Streamed tool output |
319
+ | `agent:tool-completed` | `{ toolCallId?, exitCode, kind?, resultDisplay? }` | Tool execution finished |
320
+ | `agent:error` | `{ message }` | Error during processing |
321
+ | `agent:usage` | `{ prompt_tokens, completion_tokens, total_tokens }` | Token usage stats |
322
+
323
+ The `agent:tool-batch` event lets the TUI prepare group headers before tools execute. `agent:tool-started` now carries display metadata (`icon`, `displayDetail` from `formatCall()`, batch position). `agent:tool-completed` includes a `resultDisplay` (from `formatResult()`) with an optional `summary` string and structured `body`.
324
+
325
+ ### Switching backends at runtime
326
+
327
+ Multiple backends can be registered at the same time. Use the `/backend` command to list and switch between them:
328
+
329
+ ```
330
+ /backend # list all registered backends (active one marked)
331
+ /backend claude-code # switch to the claude-code backend
332
+ /backend ash # switch back to the built-in backend
333
+ ```
334
+
335
+ Switching deactivates the current backend (`kill()`) and activates the new one (`start()`).
336
+
337
+ ### Default backend
338
+
339
+ By default, the built-in `"ash"` backend activates (registered from `src/agent/index.ts` via `activateAgent`, which the CLI calls before loading built-ins). To make an extension backend the default, set `defaultBackend` in `~/.agent-sh/settings.json`:
340
+
341
+ ```json
342
+ {
343
+ "extensions": ["./my-bridge.ts"],
344
+ "defaultBackend": "claude-code"
345
+ }
346
+ ```
347
+
348
+ On startup, `activateBackend()` checks this setting and activates the named backend if it was registered. If the named backend isn't found, it falls back to the first registered backend.
349
+
350
+ For a one-off override that doesn't persist, pass `--backend <name>` on the command line:
351
+
352
+ ```bash
353
+ agent-sh --backend pi # launches pi this session, settings unchanged
354
+ ```
355
+
356
+ Unlike `defaultBackend`, the CLI flag is strict: if the named backend isn't registered, agent-sh errors out with a hint pointing at `agent-sh install <bridge>` rather than silently falling back.
357
+
358
+ To run a bridge-only setup, set `defaultBackend` to the bridge — ash registers itself but won't activate unless it's chosen:
359
+ ```json
360
+ {
361
+ "extensions": ["./my-bridge.ts"],
362
+ "defaultBackend": "claude-code"
363
+ }
364
+ ```
365
+
366
+ ### Registration timing
367
+
368
+ Built-in extensions load first (via a declarative manifest), then user extensions, then `activateBackend()` runs. This is what makes `defaultBackend` work — by the time the core decides which backend to activate, all extensions have registered theirs.
369
+
370
+ ### Real-world bridges
371
+
372
+ The echo-backend shows the protocol. The `examples/extensions/` directory has three production bridges that wire real agent SDKs into agent-sh. All are **pure protocol translators** — they map the external SDK's event stream to agent-sh's bus events, and that's it. None bundle any tools of their own; each external agent uses its own built-in tools as-is.
373
+
374
+ PTY-access tools (`terminal_read`, `terminal_keys`, `user_shell`) are deliberately *not* part of a bridge's job. They're opt-in capabilities that live in their own extensions, registered per backend in that backend's tool format. Keeping this separation means bridges stay narrow and composable.
375
+
376
+ #### Claude Code Bridge (`claude-code-bridge/`)
377
+
378
+ Runs the [Claude Code Agent SDK](https://docs.anthropic.com/en/docs/claude-code/sdk) in-process. Claude Code handles model selection, tool execution, and permissions — agent-sh provides the shell and TUI.
379
+
380
+ ```bash
381
+ cp -r examples/extensions/claude-code-bridge ~/.agent-sh/extensions/
382
+ cd ~/.agent-sh/extensions/claude-code-bridge && npm install
383
+ # Requires: ANTHROPIC_API_KEY in environment
384
+ ```
385
+
386
+ **How it works:**
387
+
388
+ 1. **Registers as backend** via `agent:register-backend`
389
+ 2. **On each `agent:submit`**, calls the SDK's `query()` with the user's prompt and a system-prompt preset. Claude Code's own tools (`Read`, `Edit`, `Write`, `Bash`, `Glob`, `Grep`) handle everything.
390
+ 3. **Iterates the SDK's async iterator** — maps `stream_event` (text/thinking deltas) and `assistant` messages (tool use blocks) to agent-sh events (`agent:response-chunk`, `agent:thinking-chunk`, `agent:tool-started`)
391
+ 4. **Snapshots files before Edit/Write** so it can compute a diff when the tool result comes back, for the TUI's inline diff rendering
392
+
393
+ #### Pi Bridge (`pi-bridge/`)
394
+
395
+ Runs [pi's coding agent](https://github.com/nickarrow/pi) in-process. Pi brings its own model registry, provider settings, session management, and tools.
396
+
397
+ ```bash
398
+ cp -r examples/extensions/pi-bridge ~/.agent-sh/extensions/
399
+ cd ~/.agent-sh/extensions/pi-bridge && npm install
400
+ # Requires: pi configured separately (~/.pi/settings.json)
401
+ ```
402
+
403
+ **How it works:**
404
+
405
+ 1. **Registers as backend** with an async `start()` — pi needs to boot (load config from `~/.pi/`, create services, initialize tools)
406
+ 2. **Subscribes to pi's event stream** (`session.subscribe`) — maps pi events to agent-sh events:
407
+ - `message_update` → `agent:response-chunk` or `agent:thinking-chunk`
408
+ - `tool_execution_start/update/end` → `agent:tool-started`, `agent:tool-output-chunk`, `agent:tool-completed`
409
+ - `agent_end` → `agent:response-done` + `agent:processing-done`
410
+ 3. **Session management** — `agent:reset-session` creates a new pi session via `runtime.newSession()`
411
+
412
+ #### OpenCode Bridge (`opencode-bridge/`)
413
+
414
+ Runs [opencode](https://opencode.ai/) in-process via `@opencode-ai/sdk`. The SDK boots an embedded HTTP server the bridge talks to; opencode brings its own models, tools, and `opencode auth login` credentials.
415
+
416
+ ```bash
417
+ cp -r examples/extensions/opencode-bridge ~/.agent-sh/extensions/
418
+ cd ~/.agent-sh/extensions/opencode-bridge && npm install
419
+ # Requires: opencode configured separately (opencode auth login)
420
+ ```
421
+
422
+ **How it works:**
423
+
424
+ 1. **Registers as backend** with an async `start()` that calls `createOpencode()` to boot the embedded server and open a session
425
+ 2. **Subscribes to the SDK's event stream** (`client.event.subscribe()`, iterated with `for await`) — maps opencode events to agent-sh events (`agent:response-chunk`, `agent:tool-started`, …)
426
+ 3. **Cancellation and reset** — `agent:cancel-request` and `agent:reset-session` are wired to the SDK's abort and new-session calls
427
+
428
+ #### Adding PTY access to a bridge
429
+
430
+ None of the bridges bundle PTY tools. If you want the external agent to observe or mutate the user's live terminal, write a companion extension that registers tools in the target SDK's tool format:
431
+
432
+ - **`terminal_read`** — calls `ctx.call("terminal-buffer").readScreen()` and returns the text + cursor + alt-screen state
433
+ - **`terminal_keys`** — emits `shell:pty-write` to send keystrokes to the PTY
434
+ - **`user_shell`** — emits `shell:exec-request` and awaits the result, for `cd`/`export`/`source`-level state mutation
435
+
436
+ For pi, this is a standalone extension that registers with pi's `customTools` (via `ctx.call` or a pi-specific hook). For Claude Code, the SDK accepts MCP servers in `query()` options — so the companion extension builds the MCP server and either forks the bridge or coordinates with it via a handler to attach the server.
437
+
438
+ #### Writing your own bridge
439
+
440
+ All three bridges follow the same 4-step structure:
441
+
442
+ 1. **Register as backend** — emit `agent:register-backend` with `name`, `start()`, `kill()`
443
+ 2. **Listen for `agent:submit`** — forward the query to the external agent
444
+ 3. **Map the agent's events** to agent-sh bus events (response chunks, tool starts/completions, thinking, errors)
445
+ 4. **Handle cancellation and reset** — wire `agent:cancel-request` and `agent:reset-session`
446
+
447
+ The difference between the bridges is just SDK shape: Claude Code and opencode expose async iterators you `for await` over; pi uses a subscription callback. The translation layer is the same. Keep PTY tools out — they belong in companion extensions.
448
+
449
+ ##### Forwarding query context
450
+
451
+ Extensions register per-query producers via `ctx.agent.registerContextProducer(name, fn, { mode: "per-query" })` — for example, the `shell-context` built-in contributes a `<shell_events>` block of recent shell activity. The kernel exposes the joined producer output through the `query-context:build` handler. Each backend chooses how to surface that data when forwarding a query — there's no kernel-imposed transport.
452
+
453
+ For a bridge, the recommended pattern is:
454
+
455
+ ```typescript
456
+ bus.on("agent:submit", async ({ query }) => {
457
+ const queryCtx = (ctx.call("query-context:build") as string).trim();
458
+ const enriched = queryCtx
459
+ ? `<query_context>\n${queryCtx}\n</query_context>\n\n${query}`
460
+ : query;
461
+ await externalSdk.send(enriched);
462
+ });
463
+ ```
464
+
465
+ The wrapping tag is the bridge's call — drop the XML envelope and inline the text if that reads cleaner in the target SDK, or splice into the system prompt instead of the user message. Document what your bridge does so extension authors know what reaches the external agent.
466
+
467
+ Per-request producers (`mode: "per-request"`) only fire under backends that expose the LLM loop. Bridges that hand off to an external SDK can't fire them, since they don't see iterations — extensions wanting cross-backend reach should prefer `mode: "per-query"`.
468
+
469
+ ## Custom Providers
470
+
471
+ Providers describe the OpenAI-compatible endpoints the `ash` backend can talk to. The built-ins (openrouter, openai, openai-compatible, deepseek) register from `src/agent/providers/`; extensions can register their own — local daemons, hosted gateways, fine-tuned model catalogs — and they show up under `agent-sh auth list` and `/model`.
472
+
473
+ ```typescript
474
+ import type { AgentContext } from "agent-sh/types";
475
+
476
+ export default function activate(ctx: AgentContext): void {
477
+ ctx.agent.providers.register({
478
+ id: "ollama",
479
+ baseURL: "http://localhost:11434/v1",
480
+ defaultModel: "llama3.2",
481
+ models: ["llama3.2", "qwen2.5-coder"],
482
+ noAuth: true,
483
+ });
484
+ }
485
+ ```
486
+
487
+ `ProviderRegistration` fields:
488
+
489
+ | Field | Description |
490
+ |---|---|
491
+ | `id` | Provider name (used as the key in settings, `auth`, `/model`) |
492
+ | `apiKey` | API key; omit for `noAuth` providers, otherwise resolve via `ctx.call("provider:resolve-api-key", id)` which returns `{ key, source }` (settings → keys.json → env) |
493
+ | `baseURL` | OpenAI-compatible base URL |
494
+ | `defaultModel` | Selected by default when this provider is active |
495
+ | `models` | Catalog. Either `string[]` or `Array<{ id, contextWindow?, reasoning?, echoReasoning? }>` for per-model capabilities |
496
+ | `supportsReasoningEffort` | Whether `/thinking` levels apply to this provider |
497
+ | `noAuth` | `true` for local daemons that don't require an API key — appears as "(no auth required)" in `auth list` |
498
+
499
+ Settings overlay registered providers: `providers.<id>.apiKey/baseURL/defaultModel/models/modelCapabilities` in `~/.agent-sh/settings.json` wins over the extension's payload, so users can pin keys, endpoints, and per-model overrides without touching extension code.
500
+
501
+ Re-register to refresh (e.g. after fetching a catalog asynchronously); listeners are notified via `agent:providers:changed` and `agent:models-changed`. To configure provider hooks like custom reasoning-effort encoding, use `ctx.agent.providers.configure(id, { reasoningParams })`.
502
+
503
+ ## Named Handlers (Advice System)
504
+
505
+ The event bus transforms *data flowing through events*. Named handlers are different — they let you wrap *function calls*. Think of `define`/`advise`/`call` as a named function registry where any extension can intercept any function.
506
+
507
+ **`define`** registers a named function. **`call`** invokes it. **`advise`** wraps it — your wrapper receives `next` (the previous implementation) and decides whether to call it, like middleware.
508
+
509
+ ```typescript
510
+ // Built-in: tui-renderer defines the default code block handler
511
+ ctx.define("render:code-block", (language, code, width) => {
512
+ syntaxHighlight(language, code, width);
513
+ });
514
+
515
+ // Your extension wraps it
516
+ ctx.advise("render:code-block", (next, language, code, width) => {
517
+ if (language === "mermaid") return renderMermaid(code); // handle it yourself
518
+ return next(language, code, width); // otherwise pass through
519
+ });
520
+
521
+ // Somewhere in the system, the handler is invoked
522
+ ctx.call("render:code-block", "python", codeString, 80);
523
+ ```
524
+
525
+ Multiple advisors chain — each wraps the last. First advisor to not call `next` wins.
526
+
527
+ ### When to use `advise` vs `onPipe`
528
+
529
+ - **`onPipe`**: you want to transform *data* as it flows through the system (autocomplete items, response chunks, intercepted commands). You get a payload, return a modified payload.
530
+ - **`advise`**: you want to replace *behavior* — how a code block renders, how an image displays. You get `next` and decide whether to call the original implementation or substitute your own.
531
+
532
+ Handlers are reserved for **high-power use cases** where multiple independent extensions need to compose behavior on the same operation. Simple read/write access to internals is exposed as direct methods on `ExtensionContext` instead.
533
+
534
+ ### Built-in handlers
535
+
536
+ #### Agent loop handlers
537
+
538
+ These are registered by AgentLoop (constructed when the ash backend's `start()` runs) and let other extensions shape what the LLM sees and how tools execute. They are only available when the ash backend is active.
539
+
540
+ | Handler | Signature | Description |
541
+ |---|---|---|
542
+ | `system-prompt:build` | `() → string` | Assemble the cached system prompt. Advise to append identity blocks, memory files, learned lessons, etc. Rebuilt on cwd change, not every query. |
543
+ | `dynamic-context:build` | `() → string` | Per-request signal block. Fires on every LLM call (including each tool-loop iteration). Output is wrapped in `<dynamic_context>` and ephemerally prepended to the trailing message at request time. Default: `""`. Advisors append; extensions usually go through `ctx.agent.registerContextProducer(name, fn)` instead of advising directly. |
544
+ | `query-context:build` | `() → string` | Per-query signal block. Fires once at user-query start in `handleQuery`. Output is wrapped in `<query_context>` and frozen into the user message. Built-in: an advisor that emits `<shell_events>` from the shell-event cursor. Default: `""`. Reach via `ctx.agent.registerContextProducer(name, fn, { mode: "per-query" })`. |
545
+ | `conversation:prepare` | `(messages[]) → messages[]` | Transform the full message array before it's sent to the LLM. Default: pass through. |
546
+ | `conversation:compact` | `({target, keepRecent, force}) → { before, after, evictedCount } \| null` | Compaction strategy (returns null when nothing is compacted). Default: pins the first turn + the last `keepRecent` turns and evicts the middle by priority × recency. Advise for richer strategies (topic pinning, LLM summarization). |
547
+ | `conversation:get-messages` | `() → messages[]` | Read the current in-memory messages array. Used by compaction advisors to compute a replacement. |
548
+ | `conversation:replace-messages` | `(messages[]) → void` | Install a replacement messages array. The corresponding mutate-side of the compaction pattern. |
549
+ | `conversation:estimate-tokens` | `() → number` | Local chars/4 estimate of the conversation size. |
550
+ | `conversation:estimate-prompt-tokens` | `() → number` | API-grounded estimate (last `prompt_tokens` + local delta since). Used by the auto-compact trigger. |
551
+ | `conversation:inject-note` | `(text) → void` | Inject a `role:"user"` note mid-loop — how extensions deliver async results (subagent output, peer messages) into the next iteration. |
552
+ | `conversation:nucleate-user` / `-agent` / `-tool` | `(msg) → NuclearEntry` | Turn a message into its one-line summary. Advise to extract extra metadata (e.g. `[why: ...]` annotations). |
553
+ | `conversation:format-prior-history` | `(entries) → string` | Render prior-session history into a preamble. Advise for session-grouped output. |
554
+ | `history:append` / `:search` / `:find-by-seq` / `:read-recent` | — | Shell-history-style persistent log at `~/.agent-sh/history`. Advise to add indexing, filtering, or external stores. |
555
+ | `tool:execute` | `(ctx) → ToolResult` | Wrap the full tool lifecycle: permission → execute → emit events. |
556
+
557
+ **`dynamic-context:build`** — Each advisor appends its own context. Multiple extensions compose independently:
558
+
559
+ ```typescript
560
+ // Add git context to every query
561
+ ctx.advise("dynamic-context:build", (next) => {
562
+ const base = next();
563
+ const branch = execSync("git branch --show-current").toString().trim();
564
+ return base + `\nGit branch: ${branch}`;
565
+ });
566
+ ```
567
+
568
+ **`conversation:prepare`** — Full control over the message array the LLM receives. The default passes messages through unchanged. Extensions can implement compaction, summarization, filtering, sliding window, or any other strategy:
569
+
570
+ ```typescript
571
+ // Keep only the last 20 messages to save tokens
572
+ ctx.advise("conversation:prepare", (next, messages) => {
573
+ const prepared = next(messages);
574
+ if (prepared.length > 23) { // 3 prefix messages + 20 conversation
575
+ return [...prepared.slice(0, 3), ...prepared.slice(-20)];
576
+ }
577
+ return prepared;
578
+ });
579
+ ```
580
+
581
+ **`tool:execute`** — Wraps every tool call. The `ctx` argument contains `{ name, id, args, tool, onChunk }`. Use cases: blocking tools, logging, custom permission policies, output redaction.
582
+
583
+ ```typescript
584
+ // Safe mode — block all file-modifying tools
585
+ ctx.advise("tool:execute", async (next, ctx) => {
586
+ if (ctx.tool.modifiesFiles) {
587
+ return { content: "Blocked: read-only mode", exitCode: 1, isError: true };
588
+ }
589
+ return next(ctx);
590
+ });
591
+
592
+ // Secret redaction — wrap onChunk to scrub streaming output + final result
593
+ ctx.advise("tool:execute", async (next, ctx) => {
594
+ const origOnChunk = ctx.onChunk;
595
+ if (origOnChunk) {
596
+ ctx.onChunk = (chunk) => origOnChunk(redact(chunk));
597
+ }
598
+ const result = await next(ctx);
599
+ return { ...result, content: redact(result.content) };
600
+ });
601
+ ```
602
+
603
+ The `onChunk` callback controls what the user sees during tool execution (streamed to terminal). See `examples/extensions/secret-guard.ts` for a complete implementation.
604
+
605
+ #### Rendering handlers
606
+
607
+ These are registered by the tui-renderer and let extensions customize how content is displayed.
608
+
609
+ | Handler | Signature | Description |
610
+ |---|---|---|
611
+ | `render:code-block` | `(language: string, code: string, width: number) → void` | Render a fenced code block (default: syntax highlighting) |
612
+ | `render:image` | `(data: Buffer) → void` | Display an image in the terminal (default: iTerm2/Kitty protocol) |
613
+ | `render:result-body` | `(body: ToolResultBody, width: number) → string[]` | Render structured tool result body (default: diffs or line lists) |
614
+
615
+ The `render:result-body` handler is called when a tool's `formatResult()` returns a structured `body`. Extensions can advise it to customize how specific result types are displayed:
616
+
617
+ ```typescript
618
+ ctx.advise("render:result-body", (next, body, width) => {
619
+ if (body.kind === "diff") return myCustomDiffRenderer(body.diff, body.filePath, width);
620
+ return next(body, width);
621
+ });
622
+ ```
623
+
624
+ #### Custom handlers
625
+
626
+ Extensions can define their own handlers for other extensions to advise:
627
+
628
+ ```typescript
629
+ ctx.define("my-ext:process-data", (data) => defaultProcessing(data));
630
+ // Other extensions can then advise("my-ext:process-data", ...)
631
+ ```
632
+
633
+ ## Content Transform Pipeline
634
+
635
+ The agent streams raw text. Before the renderer sees it, the text flows through a transform pipeline that breaks it into typed **content blocks**:
636
+
637
+ ```typescript
638
+ type ContentBlock =
639
+ | { type: "text"; text: string } // markdown text
640
+ | { type: "code-block"; language: string; code: string } // fenced code block
641
+ | { type: "image"; data: Buffer } // PNG → terminal image protocol
642
+ | { type: "raw"; escape: string } // raw terminal escape
643
+ ```
644
+
645
+ The pipeline has two layers. **Parsers** turn raw text into blocks (e.g. detecting ` ``` ` fences and emitting `code-block`). **Post-transforms** operate on those blocks (e.g. taking a `code-block` with language "mermaid" and converting it to an `image`). `createBlockTransform` and `createFencedBlockTransform` are parsers; `bus.onPipe("agent:response-chunk")` is the post-transform layer.
646
+
647
+ | I want to... | Use | Layer |
648
+ |---|---|---|
649
+ | Match inline delimiters (`$$`, `<<`, etc.) | `ctx.shell.createBlockTransform` | Parser — text in, blocks out |
650
+ | Match fenced blocks (` ``` `, `:::`, `~~~`) | `ctx.shell.createFencedBlockTransform` | Parser — text in, blocks out |
651
+ | Transform blocks others produced | `bus.onPipe("agent:response-chunk", ...)` | Post-transform — blocks in, blocks out |
652
+
653
+ Parsers only read `text` blocks and pass other block types through. Post-transforms see all block types. This means they compose regardless of registration order — each operates on a disjoint domain.
654
+
655
+ ### Inline delimiter transforms
656
+
657
+ Parsers that detect patterns like `$$...$$` within text. They handle streaming buffering and flush-on-done automatically — you just provide the delimiters and a transform function:
658
+
659
+ ```typescript
660
+ ctx.shell.createBlockTransform({
661
+ open: "$$",
662
+ close: "$$",
663
+ transform(content) {
664
+ // content = text between delimiters (e.g. "E = mc^2")
665
+ // Return ContentBlock(s) or null to keep original
666
+ const png = renderToPng(content);
667
+ return png ? { type: "image", data: png } : null;
668
+ },
669
+ });
670
+ ```
671
+
672
+ ### Fenced block transforms
673
+
674
+ Parsers that detect line-delimited fenced blocks. Open/close patterns are regexes:
675
+
676
+ ```typescript
677
+ // :::warning ... ::: admonition blocks
678
+ ctx.shell.createFencedBlockTransform({
679
+ open: /^:::(\w+)\s*$/,
680
+ close: /^:::\s*$/,
681
+ transform(match, content) {
682
+ const kind = match[1]; // "warning", "note", etc.
683
+ return { type: "text", text: `⚠️ ${kind.toUpperCase()}: ${content}` };
684
+ },
685
+ });
686
+ ```
687
+
688
+ The tui-renderer uses this same primitive for standard ` ``` ` code fences — it's not special.
689
+
690
+ ### Post-transforms: claiming blocks from other transforms
691
+
692
+ Use `bus.onPipe` to transform blocks that a parser already produced. This is how you claim specific code block languages, convert block types, or filter output:
693
+
694
+ ```typescript
695
+ // A parser already turned ```mermaid ... ``` into a code-block.
696
+ // This post-transform claims it and converts to an image.
697
+ bus.onPipe("agent:response-chunk", (e) => ({
698
+ blocks: e.blocks.map(block => {
699
+ if (block.type !== "code-block" || block.language !== "mermaid") return block;
700
+ const png = renderMermaid(block.code);
701
+ return png ? { type: "image", data: png } : block;
702
+ }),
703
+ }));
704
+ ```
705
+
706
+ ### Example: LaTeX image rendering
707
+
708
+ `examples/extensions/latex-images.ts` renders both `$$...$$` and ` ```latex ` blocks as terminal images — using a parser for the inline math and a post-transform for the code fences:
709
+
710
+ ```bash
711
+ # Requires: latex + dvipng (brew install --cask mactex)
712
+ # Requires: iTerm2, WezTerm, Kitty, or Ghostty
713
+ agent-sh -e ./examples/extensions/latex-images.ts
714
+ ```
715
+
716
+ ## Custom Input Modes
717
+
718
+ Input modes change what happens when the user types and presses Enter. Each mode binds a trigger character (typed at the start of an empty line) to a custom `onSubmit` handler. The built-in mode (`>` for agent) is registered this way — it's not special.
719
+
720
+ The flow: user types trigger → prompt changes to show the mode → user types their input → presses Enter → `onSubmit` fires → your handler emits `agent:submit`. You can optionally include a `modeInstruction` that gets prepended to the user message.
721
+
722
+ ```typescript
723
+ bus.emit("input-mode:register", {
724
+ id: "translate", // unique identifier
725
+ trigger: "!", // single char — typed at empty line start
726
+ label: "translate", // shown in prompt
727
+ promptIcon: "⟩", // chevron/icon character
728
+ indicator: "🌐", // status indicator before the icon
729
+ onSubmit(query, bus) {
730
+ bus.emit("agent:submit", {
731
+ query, // what the user typed
732
+ modeInstruction: "[mode: translate] Translate the following to Spanish.",
733
+ });
734
+ },
735
+ returnToSelf: true, // re-enter this mode after agent finishes
736
+ });
737
+ ```
738
+
739
+ | Field | Type | Description |
740
+ |---|---|---|
741
+ | `id` | `string` | Unique identifier |
742
+ | `trigger` | `string` | Single character that activates the mode at empty line start |
743
+ | `label` | `string` | Shown in the prompt area |
744
+ | `promptIcon` | `string` | Chevron/icon character in the prompt |
745
+ | `indicator` | `string` | Status indicator before the icon |
746
+ | `onSubmit` | `(query, bus) => void` | Called on Enter. Emits `agent:submit` with `query` + optional `modeInstruction` |
747
+ | `returnToSelf` | `boolean` | Re-enter this mode after the agent finishes |
748
+
749
+ Each trigger character can only be claimed by one mode. Slash commands and readline keybindings work in every mode.
750
+
751
+ ## Terminal Buffer
752
+
753
+ A headless xterm.js terminal that mirrors the real terminal's output. Registered as a handler by the shell frontend (`src/shell/`); access via `ctx.call("terminal-buffer")`. Returns `null` if `@xterm/headless` is not installed, or if the shell frontend isn't loaded (e.g. in library/headless consumers).
754
+
755
+ ```typescript
756
+ const tb = ctx.call("terminal-buffer");
757
+ if (tb) {
758
+ const { text, altScreen, cursorX, cursorY } = tb.readScreen();
759
+ console.log(altScreen ? "vim/htop is running" : "normal shell");
760
+ }
761
+ ```
762
+
763
+ Key methods:
764
+ - `readScreen()` — clean text snapshot with cursor position and alt screen detection
765
+ - `getScreenLines(rows?)` — array of viewport lines (for compositing)
766
+ - `serialize()` — raw serialized output including ANSI sequences
767
+ - `altScreen` — whether the alternate screen buffer is active
768
+ - `write(data)` / `resize(cols, rows)` — manual control
769
+
770
+ Install the optional xterm dependency:
771
+ ```bash
772
+ npm install @xterm/headless@5.5.0 @xterm/addon-serialize@0.13.0
773
+ ```
774
+
775
+ ## FloatingPanel
776
+
777
+ > **Note**: FloatingPanel is an internal utility in `src/utils/floating-panel.ts`, not part of the public ExtensionContext API. It's used by the [overlay-agent](../examples/extensions/overlay-agent.ts) example extension. Import it directly if you need it.
778
+
779
+ A composited overlay rendered over the terminal. Handles alt screen management, input routing, dimmed background compositing, scroll, and screen restore.
780
+
781
+ ```typescript
782
+ import { FloatingPanel } from "agent-sh/utils/floating-panel";
783
+
784
+ const panel = new FloatingPanel(bus, {
785
+ trigger: "\x1c", // Ctrl+\ to toggle
786
+ dimBackground: true,
787
+ terminalBuffer: ctx.call("terminal-buffer") ?? undefined,
788
+ });
789
+ ```
790
+
791
+ **Config options**: `trigger`, `width`, `height`, `maxWidth`, `minHeight`, `borderStyle` (`rounded`/`square`/`double`/`heavy`), `dimBackground`, `autoDismissMs`, `promptIcon`, `handlerPrefix`.
792
+
793
+ **Content API**: `appendText(text)`, `appendLine(line)`, `updateLastLine(fn)`, `clearContent()`, `setTitle(title)`, `setFooter(footer)`.
794
+
795
+ **Lifecycle**: `open()` → input → `submit` → `setActive()` → agent processes → `setDone()` → input (follow-up) or `dismiss()`. When hidden during active processing, the panel enters passthrough mode (renders TerminalBuffer content directly) until the agent finishes.
796
+
797
+ See `src/utils/floating-panel.ts` for the full API, handler hooks, and rendering customization.
798
+
799
+ ## Remote Sessions
800
+
801
+ A remote session bundles all the wiring needed to route agent output away from stdout — compositor redirects, shell lifecycle advisors, and chrome suppression — into a single call. Use it when building side panes, web UIs, remote displays, or any extension where agent output should appear somewhere other than the main terminal.
802
+
803
+ ```typescript
804
+ const session = ctx.shell.createRemoteSession({
805
+ surface: mySurface, // where output goes (RenderSurface)
806
+ suppressQueryBox: true, // hide query box (session has own input)
807
+ });
808
+
809
+ session.submit("what's on screen?"); // submit a query
810
+ session.close(); // restore everything
811
+ ```
812
+
813
+ ### RemoteSessionOptions
814
+
815
+ | Option | Type | Default | Description |
816
+ |---|---|---|---|
817
+ | `surface` | `RenderSurface` | (required) | The surface to render agent output to |
818
+ | `suppressBorders` | `boolean` | `true` | Suppress response top/bottom borders |
819
+ | `suppressQueryBox` | `boolean` | `false` | Suppress the user query box (use when the session has its own input) |
820
+ | `suppressUsage` | `boolean` | `true` | Suppress token usage stats line |
821
+
822
+ If your extension wants to signal "this session is interactive — read the screen, prefer terminal_keys" to the LLM, register a context producer while the session is open. See `examples/extensions/overlay-agent.ts` for the pattern.
823
+
824
+ ### What createRemoteSession handles
825
+
826
+ Internally, a remote session:
827
+
828
+ 1. **Redirects render streams** — `"agent"`, `"query"`, `"status"` all route to the provided surface
829
+ 2. **Keeps the shell interactive** — advises `shell:on-processing-start` and `shell:on-processing-done` to skip pause/unpause
830
+ 3. **Suppresses chrome** — advises `tui:response-border`, `tui:render-user-query`, `tui:render-usage` based on options
831
+
832
+ Calling `session.close()` removes all advisors and restores all compositor routing in one call.
833
+
834
+ ### Example: tmux side pane
835
+
836
+ ```typescript
837
+ // Output-only: queries from main shell, output in side pane
838
+ const session = ctx.shell.createRemoteSession({ surface });
839
+ // session.close() when done
840
+
841
+ // Interactive: side pane has own input prompt
842
+ const session = ctx.shell.createRemoteSession({
843
+ surface,
844
+ suppressQueryBox: true,
845
+ });
846
+ conn.on("data", (d) => session.submit(d.toString().trim()));
847
+ ```
848
+
849
+ ### Example: overlay agent
850
+
851
+ ```typescript
852
+ const session = ctx.shell.createRemoteSession({
853
+ surface: panelSurface,
854
+ suppressQueryBox: true,
855
+ });
856
+ session.submit(query);
857
+ // ... later, on dismiss ...
858
+ session.close();
859
+ ```
860
+
861
+ ## Shell Lifecycle Handlers
862
+
863
+ The shell's behavior during agent processing is controlled by two advisable handlers. Extensions advise these to change how the shell responds when the agent starts and stops working.
864
+
865
+ ### `shell:on-processing-start`
866
+
867
+ Default: pauses the shell (blocks PTY output and input) while the agent works. This is correct when agent output shares stdout with the terminal.
868
+
869
+ ```typescript
870
+ // Skip pause — agent output goes to a separate surface
871
+ ctx.advise("shell:on-processing-start", (next) => {
872
+ if (mySessionActive) return; // don't pause
873
+ return next(); // default: pause
874
+ });
875
+ ```
876
+
877
+ ### `shell:on-processing-done`
878
+
879
+ Default: unpauses the shell, re-enters agent input mode or redraws the shell prompt.
880
+
881
+ ```typescript
882
+ // Skip prompt redraw — already handled by the extension
883
+ ctx.advise("shell:on-processing-done", (next) => {
884
+ if (mySessionActive) return; // skip
885
+ return next(); // default: unpause + redraw
886
+ });
887
+ ```
888
+
889
+ > **Note:** `createRemoteSession()` advises both of these automatically. You only need to advise them directly if you're building custom lifecycle behavior without using remote sessions.
890
+
891
+ ## Rendering Architecture
892
+
893
+ The tui-renderer turns content blocks into terminal output. All output flows through the **compositor**, which routes named streams (`"agent"`, `"query"`, `"status"`) to **render surfaces**. Extensions should never call `process.stdout.write` directly.
894
+
895
+ ```
896
+ ContentBlock (from transform pipeline)
897
+ ├── text → MarkdownRenderer.push(chunk) → drainLines() → compositor
898
+ ├── code-block → ctx.call("render:code-block") → drainLines() → compositor
899
+ ├── image → ctx.call("render:image") → compositor
900
+ └── raw → compositor.surface("agent").write(escape)
901
+ ```
902
+
903
+ Extensions can redirect any stream to a different surface (e.g. a floating panel):
904
+
905
+ ```typescript
906
+ // Redirect agent output to a panel
907
+ const restore = ctx.shell.compositor.redirect("agent", panelSurface);
908
+ // ... later ...
909
+ restore(); // back to stdout
910
+ ```
911
+
912
+ Streams are hierarchical: `"agent:diff"` falls back to `"agent"` if no override exists. See [TUI Composition](tui-composition.md) for the full compositor design, surface API, and examples.
913
+
914
+ Rendering components follow a **return lines, don't write** convention — each returns `string[]`, making them testable in isolation:
915
+
916
+ - `renderBoxFrame(content, opts)` → `string[]`
917
+ - `renderDiff(diff, opts)` → `string[]`
918
+ - `renderToolCall(tool, width)` → `string[]`
919
+ - `renderSpinnerLine(state, label, opts)` → `string`
920
+ - `MarkdownRenderer.drainLines()` → `string[]`
921
+
922
+ ## Yolo Mode
923
+
924
+ By default, agent-sh runs in **yolo mode** — all tool calls and file writes are auto-approved. To add permission prompts, load the example extension:
925
+
926
+ ```bash
927
+ agent-sh -e ./examples/extensions/interactive-prompts.ts
928
+
929
+ # Or install permanently:
930
+ cp examples/extensions/interactive-prompts.ts ~/.agent-sh/extensions/
931
+ ```
932
+
933
+ ## Theming
934
+
935
+ agent-sh uses a semantic color palette (~10 base roles). Override any slot via `setPalette()`:
936
+
937
+ ```typescript
938
+ import type { ShellContext } from "agent-sh/types";
939
+
940
+ export default function activate(ctx: ShellContext) {
941
+ ctx.shell.setPalette({
942
+ accent: "\x1b[38;2;38;139;210m", // solarized blue
943
+ success: "\x1b[38;2;133;153;0m", // solarized green
944
+ warning: "\x1b[38;2;181;137;0m", // solarized yellow
945
+ error: "\x1b[38;2;220;50;47m", // solarized red
946
+ muted: "\x1b[38;2;88;110;117m", // solarized base01
947
+ });
948
+ }
949
+ ```
950
+
951
+ Load a theme like any other extension: `agent-sh -e ./my-theme.ts`