agent-sh 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/README.md +25 -34
  2. package/dist/agent/agent-loop.d.ts +29 -6
  3. package/dist/agent/agent-loop.js +177 -59
  4. package/dist/agent/conversation-state.d.ts +3 -1
  5. package/dist/agent/conversation-state.js +6 -2
  6. package/dist/agent/nuclear-form.js +5 -4
  7. package/dist/agent/system-prompt.d.ts +4 -5
  8. package/dist/agent/system-prompt.js +12 -28
  9. package/dist/{token-budget.js → agent/token-budget.js} +1 -1
  10. package/dist/agent/tool-protocol.d.ts +83 -0
  11. package/dist/agent/tool-protocol.js +386 -0
  12. package/dist/agent/types.d.ts +21 -1
  13. package/dist/core.d.ts +7 -7
  14. package/dist/core.js +76 -194
  15. package/dist/event-bus.d.ts +26 -0
  16. package/dist/event-bus.js +20 -1
  17. package/dist/extension-loader.d.ts +5 -0
  18. package/dist/extension-loader.js +104 -17
  19. package/dist/extensions/agent-backend.d.ts +13 -0
  20. package/dist/extensions/agent-backend.js +167 -0
  21. package/dist/extensions/command-suggest.d.ts +3 -3
  22. package/dist/extensions/command-suggest.js +4 -3
  23. package/dist/extensions/index.d.ts +19 -0
  24. package/dist/extensions/index.js +25 -0
  25. package/dist/extensions/slash-commands.d.ts +1 -1
  26. package/dist/extensions/slash-commands.js +16 -1
  27. package/dist/extensions/terminal-buffer.d.ts +1 -1
  28. package/dist/extensions/terminal-buffer.js +13 -4
  29. package/dist/extensions/tui-renderer.js +63 -43
  30. package/dist/index.js +14 -20
  31. package/dist/settings.d.ts +6 -0
  32. package/dist/settings.js +4 -1
  33. package/dist/{input-handler.d.ts → shell/input-handler.d.ts} +1 -1
  34. package/dist/{input-handler.js → shell/input-handler.js} +60 -43
  35. package/dist/{output-parser.d.ts → shell/output-parser.d.ts} +1 -1
  36. package/dist/{output-parser.js → shell/output-parser.js} +1 -1
  37. package/dist/{shell.d.ts → shell/shell.d.ts} +8 -2
  38. package/dist/{shell.js → shell/shell.js} +20 -6
  39. package/dist/types.d.ts +49 -10
  40. package/dist/utils/compositor.d.ts +62 -0
  41. package/dist/utils/compositor.js +88 -0
  42. package/dist/utils/diff-renderer.js +92 -4
  43. package/dist/utils/floating-panel.d.ts +2 -0
  44. package/dist/utils/floating-panel.js +30 -14
  45. package/dist/utils/handler-registry.d.ts +26 -10
  46. package/dist/utils/handler-registry.js +52 -16
  47. package/dist/utils/line-editor.d.ts +23 -3
  48. package/dist/utils/line-editor.js +180 -42
  49. package/dist/utils/markdown.d.ts +1 -0
  50. package/dist/utils/markdown.js +1 -1
  51. package/dist/utils/message-utils.d.ts +35 -0
  52. package/dist/utils/message-utils.js +75 -0
  53. package/dist/utils/terminal-buffer.d.ts +5 -1
  54. package/dist/utils/terminal-buffer.js +18 -2
  55. package/dist/utils/tool-interactive.d.ts +12 -0
  56. package/dist/utils/tool-interactive.js +53 -0
  57. package/examples/extensions/ash-acp-bridge/README.md +39 -0
  58. package/examples/extensions/ash-acp-bridge/package.json +23 -0
  59. package/examples/extensions/ash-acp-bridge/src/index.ts +571 -0
  60. package/examples/extensions/ash-acp-bridge/tsconfig.json +14 -0
  61. package/examples/extensions/ash-mcp-bridge/README.md +72 -0
  62. package/examples/extensions/ash-mcp-bridge/index.ts +154 -0
  63. package/examples/extensions/ash-mcp-bridge/package.json +9 -0
  64. package/examples/extensions/interactive-prompts.ts +82 -110
  65. package/examples/extensions/overlay-agent.ts +84 -38
  66. package/examples/extensions/peer-mesh.ts +450 -0
  67. package/examples/extensions/questionnaire.ts +249 -0
  68. package/examples/extensions/tmux-pane.ts +307 -0
  69. package/examples/extensions/web-access.ts +327 -0
  70. package/package.json +9 -1
  71. package/dist/extensions/overlay-agent.d.ts +0 -14
  72. package/dist/extensions/overlay-agent.js +0 -147
  73. package/examples/extensions/terminal-buffer.ts +0 -184
  74. /package/dist/{token-budget.d.ts → agent/token-budget.d.ts} +0 -0
package/README.md CHANGED
@@ -1,36 +1,22 @@
1
1
  # agent-sh
2
2
 
3
+ An agent that lives in a shell — not a shell that lives in an agent.
4
+
3
5
  [![npm version](https://img.shields.io/npm/v/agent-sh.svg)](https://www.npmjs.com/package/agent-sh)
4
6
  [![license](https://img.shields.io/npm/l/agent-sh.svg)](https://github.com/guanyilun/agent-sh/blob/main/LICENSE)
5
7
 
6
- Not a shell that lives in an agent an agent that lives in a shell.
7
-
8
- I live in a terminal. I don't want an agent that can run shell commands when it needs to — I want my shell, with an agent I can reach for when *I* need to. Most AI tools get this backwards: the LLM drives the experience and the shell is bolted on as an afterthought. No real PTY, no job control, no vim, fragile `cd` tracking. The agent is the main character and your terminal is a prop.
8
+ Most AI terminal tools get this backwards: the LLM drives the experience and the shell is bolted on as an afterthought. No real PTY, no job control, no vim, fragile `cd` tracking. The agent is the main character and your terminal is a prop.
9
9
 
10
10
  agent-sh flips this. It's your shell first — full PTY, your rc config, your aliases, everything just works. But type `>` at the start of a line, and you're talking to an agent that has full context of what you've been doing.
11
11
 
12
12
  ```
13
- src $ ls -la # real shell command
14
- src $ cd ../tests && npm test # real cd, env, aliases — all just work
15
- src $ vim file.ts # opens vim in the same PTY
16
- src $ > explain the last error # agent investigates using its own tools
17
- src $ > deploy to staging # agent runs it in your live shell
13
+ ~ $ ls -la # real shell command
14
+ ~ $ cd ../tests && npm test # real cd, env, aliases — all just work
15
+ ~ $ vim file.ts # opens vim in the same PTY
16
+ ~ $ > explain the last error # agent investigates using its own tools
17
+ ~ $ > deploy to staging # agent runs it in your live shell
18
18
  ```
19
19
 
20
- ## Key Features
21
-
22
- **Real terminal, zero compromise.** Full PTY with your shell config, aliases, and environment. Shell starts instantly — the agent connects asynchronously in the background.
23
-
24
- **Context-aware agent.** Every query includes your cwd, recent commands, and their output. Run a failing test, type `> fix this`, and the agent knows exactly what happened. It has built-in tools for file read/write/edit, bash, grep, glob — no external setup needed. Context management works like shell history — continuous, persistent across restarts, no sessions to manage. See [Context Management](docs/context-management.md).
25
-
26
- **Agent decides how to help.** One entry point (`>`), three tool categories. The agent uses scratchpad tools to investigate, `display` to show you output, and `user_shell` for commands with lasting effects. No need to pick a mode — the agent reasons about which tools to use based on your intent.
27
-
28
- **Any LLM, any backend.** Works with any OpenAI-compatible API out of the box. Define multiple providers in settings and cycle between models at runtime with Shift+Tab. Or swap in a completely different agent — [Claude Code](examples/extensions/claude-code-bridge/) and [pi](examples/extensions/pi-bridge/) run as drop-in backend extensions.
29
-
30
- **Extensible by design.** The entire system is built on a typed event bus. Extensions can add custom input modes, content transforms (render LaTeX as images, Mermaid as diagrams), themes, slash commands, or replace the agent backend entirely. The built-in TUI renderer is itself just an extension — nothing is special.
31
-
32
- **Embeddable as a library.** The core is a headless kernel — `import { createCore } from "agent-sh"` to build WebSocket servers, REST APIs, Electron apps, or test harnesses. No terminal required.
33
-
34
20
  ## Quick Start
35
21
 
36
22
  ```bash
@@ -42,17 +28,21 @@ Set `OPENAI_API_KEY` in your environment (or configure providers in `~/.agent-sh
42
28
 
43
29
  Requires Node.js 18+.
44
30
 
45
- ## Agent Mode
31
+ ## Key Features
46
32
 
47
- Type `>` at the start of a line to talk to the agent. The agent decides how to help:
33
+ **Real terminal, zero compromise.** Full PTY with your shell config, aliases, and environment. Shell starts instantly — the agent connects asynchronously in the background.
48
34
 
49
- - **Scratchpad tools** (`bash`, `read_file`, `grep`, `glob`, etc.) for investigation. Output goes to the agent, not your terminal.
50
- - **`display`** — shows output in your terminal (e.g. `cat`, `git log`). You see it; the agent doesn't process it.
51
- - **`user_shell`** — runs commands with lasting effects (`cd`, `npm install`, etc.) in your live shell.
35
+ **One entry point, three tool categories.** Type `>` and agent-sh figures out how to help. Scratchpad tools (`bash`, `read_file`, `grep`, `glob`) for investigation. `display` to show you output. `user_shell` for commands with lasting effects in your live shell. No modes to pick — the agent reasons about which tools to use based on your intent.
52
36
 
53
- Everything else works as a normal shell commands go straight to the PTY. Input modes are extensiblesee [Extensions: Custom Input Modes](docs/extensions.md#custom-input-modes).
37
+ **Context that just works.** Every query includes your cwd, recent commands, and their output. Run a failing test, type `> fix this`, and agent-sh knows exactly what happened. Context management works like shell history continuous, persistent across restarts, no sessions to manage. See [Context Management](docs/context-management.md).
38
+
39
+ **Any LLM, any backend.** agent-sh works with any OpenAI-compatible API out of the box. Define multiple providers in settings and cycle between models at runtime with Shift+Tab. Or swap in a completely different agent — [Claude Code](examples/extensions/claude-code-bridge/) and [pi](examples/extensions/pi-bridge/) run as drop-in backend extensions.
40
+
41
+ **Extensible by design.** The entire system is built on a typed event bus. Extensions can add custom input modes, content transforms (render LaTeX as images, Mermaid as diagrams), themes, slash commands, or replace the agent backend entirely. The built-in TUI renderer is itself just an extension.
42
+
43
+ **Embeddable as a library.** The core is a headless kernel — `import { createCore } from "agent-sh"` to build WebSocket servers, REST APIs, Electron apps, or test harnesses. No terminal required.
54
44
 
55
- ### Slash Commands
45
+ ## Slash Commands
56
46
 
57
47
  | Command | Description |
58
48
  |---|---|
@@ -65,15 +55,16 @@ Everything else works as a normal shell — commands go straight to the PTY. Inp
65
55
 
66
56
  ## Configuration
67
57
 
68
- Configure via `~/.agent-sh/settings.json`. See the [Usage Guide](docs/usage.md#configuration) for the full settings reference (providers, models, extensions, skills, and more).
58
+ Configure via `~/.agent-sh/settings.json`. See the [Usage Guide](docs/usage.md#configuration) for the full settings reference.
69
59
 
70
60
  ## Documentation
71
61
 
72
- - [Usage Guide](docs/usage.md) — providers, models, configuration, provider profiles
73
- - [Internal Agent](docs/agent.md) — how the agent loop works: tools, context, streaming
74
- - [Context Management](docs/context-management.md) — three-tier history, token budget, design philosophy
75
- - [Architecture](docs/architecture.md) — design philosophy, component overview, project structure
62
+ - [Usage Guide](docs/usage.md) — providers, models, configuration
63
+ - [Internal Agent](docs/agent.md) — tools, context, streaming
64
+ - [Context Management](docs/context-management.md) — three-tier history, token budget
65
+ - [Architecture](docs/architecture.md) — design philosophy, component overview
76
66
  - [Extensions](docs/extensions.md) — event bus, content transforms, custom backends, theming
67
+ - [TUI Composition](docs/tui-composition.md) — compositor, render surfaces, stream routing
77
68
  - [Library Usage](docs/library.md) — embedding agent-sh in your own apps
78
69
  - [Troubleshooting](docs/troubleshooting.md) — common errors and debug mode
79
70
 
@@ -16,13 +16,19 @@ import type { EventBus } from "../event-bus.js";
16
16
  import type { AgentMode } from "../types.js";
17
17
  import type { ContextManager } from "../context-manager.js";
18
18
  import type { LlmClient } from "../utils/llm-client.js";
19
- import type { HandlerRegistry } from "../utils/handler-registry.js";
19
+ import type { HandlerFunctions } from "../utils/handler-registry.js";
20
20
  import type { AgentBackend, ToolDefinition } from "./types.js";
21
+ import type { Compositor } from "../utils/compositor.js";
22
+ export interface AgentLoopConfig {
23
+ bus: EventBus;
24
+ contextManager: ContextManager;
25
+ llmClient: LlmClient;
26
+ handlers: HandlerFunctions;
27
+ modes?: AgentMode[];
28
+ initialModeIndex?: number;
29
+ compositor?: Compositor;
30
+ }
21
31
  export declare class AgentLoop implements AgentBackend {
22
- private bus;
23
- private contextManager;
24
- private llmClient;
25
- private handlers;
26
32
  private abortController;
27
33
  private toolRegistry;
28
34
  private historyFile;
@@ -32,18 +38,35 @@ export declare class AgentLoop implements AgentBackend {
32
38
  private modes;
33
39
  private currentModeIndex;
34
40
  private boundListeners;
41
+ private ctorListeners;
42
+ private ctorPipeListeners;
35
43
  private lastProjectSkillNames;
36
44
  private static readonly THINKING_LEVELS;
45
+ private bus;
46
+ private contextManager;
47
+ private llmClient;
48
+ private handlers;
37
49
  private thinkingLevel;
38
- constructor(bus: EventBus, contextManager: ContextManager, llmClient: LlmClient, handlers: HandlerRegistry, modeConfig?: AgentMode[], initialModeIndex?: number);
50
+ private compositor;
51
+ private toolProtocol;
52
+ constructor(config: AgentLoopConfig);
39
53
  /** Subscribe to bus events — activates this backend. */
40
54
  wire(): void;
41
55
  /** Unsubscribe from bus events — deactivates this backend. */
42
56
  unwire(): void;
43
57
  /** Register a tool (used by extensions via ctx.registerTool). */
44
58
  registerTool(tool: ToolDefinition): void;
59
+ /** Unregister a tool by name. */
60
+ unregisterTool(name: string): void;
45
61
  /** Get all registered tools. */
46
62
  getTools(): ToolDefinition[];
63
+ private instructions;
64
+ /** Register a named instruction block for the system prompt. */
65
+ registerInstruction(name: string, text: string): void;
66
+ /** Remove a named instruction block. */
67
+ removeInstruction(name: string): void;
68
+ /** Get instruction blocks registered by extensions. */
69
+ getInstructionSections(): string[];
47
70
  kill(): void;
48
71
  private cancel;
49
72
  /** Check if reasoning_effort should be sent for the current model/provider. */
@@ -6,7 +6,10 @@ import { ToolRegistry } from "./tool-registry.js";
6
6
  import { ConversationState } from "./conversation-state.js";
7
7
  import { HistoryFile } from "./history-file.js";
8
8
  import { STATIC_SYSTEM_PROMPT, buildDynamicContext } from "./system-prompt.js";
9
- import { TokenBudget } from "../token-budget.js";
9
+ import { createToolUI } from "../utils/tool-interactive.js";
10
+ import { TokenBudget } from "./token-budget.js";
11
+ import { getSettings } from "../settings.js";
12
+ import { createToolProtocol } from "./tool-protocol.js";
10
13
  // Core tool factories
11
14
  import { createBashTool } from "./tools/bash.js";
12
15
  import { createReadFileTool } from "./tools/read-file.js";
@@ -20,10 +23,6 @@ import { createDisplayTool } from "./tools/display.js";
20
23
  import { createListSkillsTool } from "./tools/list-skills.js";
21
24
  import { discoverProjectSkills } from "./skills.js";
22
25
  export class AgentLoop {
23
- bus;
24
- contextManager;
25
- llmClient;
26
- handlers;
27
26
  abortController = null;
28
27
  toolRegistry = new ToolRegistry();
29
28
  historyFile = new HistoryFile();
@@ -33,27 +32,52 @@ export class AgentLoop {
33
32
  modes;
34
33
  currentModeIndex = 0;
35
34
  boundListeners = [];
35
+ ctorListeners = [];
36
+ ctorPipeListeners = [];
36
37
  lastProjectSkillNames = new Set();
37
38
  static THINKING_LEVELS = ["off", "low", "medium", "high"];
39
+ bus;
40
+ contextManager;
41
+ llmClient;
42
+ handlers;
38
43
  thinkingLevel = "off";
39
- constructor(bus, contextManager, llmClient, handlers, modeConfig, initialModeIndex) {
40
- this.bus = bus;
41
- this.contextManager = contextManager;
42
- this.llmClient = llmClient;
43
- this.handlers = handlers;
44
+ compositor = null;
45
+ toolProtocol;
46
+ constructor(config) {
47
+ this.bus = config.bus;
48
+ this.contextManager = config.contextManager;
49
+ this.llmClient = config.llmClient;
50
+ this.handlers = config.handlers;
51
+ this.compositor = config.compositor ?? null;
44
52
  // Default modes: just the configured model
45
- this.modes = modeConfig ?? [
46
- { model: llmClient.model },
53
+ this.modes = config.modes ?? [
54
+ { model: config.llmClient.model },
47
55
  ];
48
- this.currentModeIndex = initialModeIndex ?? 0;
56
+ this.currentModeIndex = config.initialModeIndex ?? 0;
49
57
  // Unified token budget — adapts to current model's context window
50
58
  this.tokenBudget = new TokenBudget(this.currentMode.contextWindow);
59
+ // Tool protocol — controls how tools are presented to the LLM
60
+ this.toolProtocol = createToolProtocol(getSettings().toolMode ?? "api");
51
61
  // Register core tools
52
62
  this.registerCoreTools();
53
63
  // Update token budget with tool count
54
64
  this.tokenBudget.update(undefined, this.toolRegistry.all().length);
55
65
  // Register handlers — extensions can advise these
56
66
  this.registerHandlers();
67
+ // Subscribe to bus-based tool/instruction registration from extensions.
68
+ // These must be in the constructor (not wire()) because extensions call
69
+ // registerTool() during activate(), before activateBackend() calls wire().
70
+ const onCtor = (event, fn) => {
71
+ this.bus.on(event, fn);
72
+ this.ctorListeners.push({ event, fn });
73
+ };
74
+ onCtor("agent:register-tool", ({ tool }) => this.registerTool(tool));
75
+ onCtor("agent:unregister-tool", ({ name }) => this.unregisterTool(name));
76
+ onCtor("agent:register-instruction", ({ name, text }) => this.registerInstruction(name, text));
77
+ onCtor("agent:remove-instruction", ({ name }) => this.removeInstruction(name));
78
+ const getToolsPipe = () => ({ tools: this.getTools() });
79
+ this.bus.onPipe("agent:get-tools", getToolsPipe);
80
+ this.ctorPipeListeners.push({ event: "agent:get-tools", fn: getToolsPipe });
57
81
  }
58
82
  /** Subscribe to bus events — activates this backend. */
59
83
  wire() {
@@ -84,7 +108,7 @@ export class AgentLoop {
84
108
  }
85
109
  this.tokenBudget.update(m.contextWindow, this.toolRegistry.all().length);
86
110
  const label = m.provider ? `${m.provider}: ${m.model}` : m.model;
87
- this.bus.emit("agent:info", { name: "agent-sh", version: "0.4", model: m.model, provider: m.provider, contextWindow: m.contextWindow });
111
+ this.bus.emit("agent:info", { name: "ash", version: "0.4", model: m.model, provider: m.provider, contextWindow: m.contextWindow });
88
112
  this.bus.emit("ui:info", { message: `Model: ${label}` });
89
113
  this.bus.emit("config:changed", {});
90
114
  });
@@ -144,8 +168,8 @@ export class AgentLoop {
144
168
  this.lastProjectSkillNames.clear();
145
169
  });
146
170
  on("agent:compact-request", () => {
147
- const budgetTokens = this.tokenBudget.conversationBudgetTokens;
148
- const stats = this.conversation.compact(budgetTokens);
171
+ // Force compaction: use target of 0 so every non-pinned turn is evicted
172
+ const stats = this.conversation.compact(0, 10, true);
149
173
  this.conversation.flush().catch(() => { });
150
174
  if (stats) {
151
175
  this.bus.emit("ui:info", {
@@ -196,12 +220,44 @@ export class AgentLoop {
196
220
  registerTool(tool) {
197
221
  this.toolRegistry.register(tool);
198
222
  }
223
+ /** Unregister a tool by name. */
224
+ unregisterTool(name) {
225
+ this.toolRegistry.unregister(name);
226
+ }
199
227
  /** Get all registered tools. */
200
228
  getTools() {
201
229
  return this.toolRegistry.all();
202
230
  }
231
+ // ── Extension instructions & tool tracking ──────────────────────
232
+ instructions = new Map();
233
+ /** Register a named instruction block for the system prompt. */
234
+ registerInstruction(name, text) {
235
+ this.instructions.set(name, text);
236
+ }
237
+ /** Remove a named instruction block. */
238
+ removeInstruction(name) {
239
+ this.instructions.delete(name);
240
+ }
241
+ /** Get instruction blocks registered by extensions. */
242
+ getInstructionSections() {
243
+ const sections = [];
244
+ for (const [name, text] of this.instructions) {
245
+ sections.push(`## ${name}\n${text}`);
246
+ }
247
+ return sections;
248
+ }
203
249
  kill() {
204
250
  this.cancel();
251
+ this.unwire();
252
+ // Clean up constructor-level bus subscriptions
253
+ for (const { event, fn } of this.ctorListeners) {
254
+ this.bus.off(event, fn);
255
+ }
256
+ this.ctorListeners = [];
257
+ for (const { event, fn } of this.ctorPipeListeners) {
258
+ this.bus.offPipe(event, fn);
259
+ }
260
+ this.ctorPipeListeners = [];
205
261
  }
206
262
  cancel() {
207
263
  this.abortController?.abort();
@@ -237,7 +293,7 @@ export class AgentLoop {
237
293
  const label = newMode.provider
238
294
  ? `${newMode.provider}: ${newMode.model}`
239
295
  : newMode.model;
240
- this.bus.emit("agent:info", { name: "agent-sh", version: "0.4", model: newMode.model, provider: newMode.provider, contextWindow: newMode.contextWindow });
296
+ this.bus.emit("agent:info", { name: "ash", version: "0.4", model: newMode.model, provider: newMode.provider, contextWindow: newMode.contextWindow });
241
297
  this.bus.emit("ui:info", { message: `Model: ${label}` });
242
298
  this.bus.emit("config:changed", {});
243
299
  }
@@ -339,6 +395,7 @@ export class AgentLoop {
339
395
  // conversation_recall — search/expand evicted conversation turns
340
396
  this.toolRegistry.register({
341
397
  name: "conversation_recall",
398
+ displayName: "recall",
342
399
  description: "Browse, search, or expand evicted conversation turns. " +
343
400
  "Use when you need context from earlier in the conversation that was compacted away.",
344
401
  input_schema: {
@@ -382,8 +439,17 @@ export class AgentLoop {
382
439
  */
383
440
  registerHandlers() {
384
441
  const h = this.handlers;
442
+ // System prompt: static identity + behavioral instructions.
443
+ // Extensions can use registerInstruction() for a managed section,
444
+ // or advise this handler directly for full control.
445
+ h.define("system-prompt:build", () => {
446
+ const instructions = this.getInstructionSections();
447
+ if (instructions.length === 0)
448
+ return STATIC_SYSTEM_PROMPT;
449
+ return STATIC_SYSTEM_PROMPT + "\n\n# Extension Instructions\n\n" + instructions.join("\n\n");
450
+ });
385
451
  // Extensions compose additional context (git info, project rules, etc.)
386
- h.define("dynamic-context:build", () => buildDynamicContext(this.toolRegistry.all(), this.contextManager, this.tokenBudget.shellBudgetTokens));
452
+ h.define("dynamic-context:build", () => buildDynamicContext(this.contextManager, this.tokenBudget.shellBudgetTokens));
387
453
  // Full control over what the LLM sees: takes messages[], returns messages[].
388
454
  // Default: pass through. Extensions can advise to compact, summarize,
389
455
  // filter, reorder, inject — whatever strategy fits.
@@ -417,7 +483,7 @@ export class AgentLoop {
417
483
  // write_file
418
484
  newContent = args.content;
419
485
  }
420
- else if (typeof args.old_text === "string" && typeof args.new_text === "string" && oldContent) {
486
+ else if (typeof args.old_text === "string" && typeof args.new_text === "string" && oldContent !== null) {
421
487
  // edit_file
422
488
  newContent = oldContent.replace(args.old_text.replace(/\r\n/g, "\n"), args.new_text.replace(/\r\n/g, "\n"));
423
489
  }
@@ -441,10 +507,14 @@ export class AgentLoop {
441
507
  }
442
508
  catch { /* fall back to generic permission */ }
443
509
  }
510
+ const ui = this.compositor
511
+ ? createToolUI(this.bus, this.compositor.surface("agent"))
512
+ : undefined;
444
513
  const perm = await this.bus.emitPipeAsync("permission:request", {
445
514
  kind: permKind,
446
515
  title: permTitle,
447
516
  metadata,
517
+ ui,
448
518
  decision: { outcome: "approved" },
449
519
  });
450
520
  if (perm.decision.outcome !== "approved") {
@@ -466,7 +536,10 @@ export class AgentLoop {
466
536
  const onChunk = (tool.showOutput !== false && !diffShown)
467
537
  ? ctx.onChunk
468
538
  : undefined;
469
- const result = await tool.execute(args, onChunk);
539
+ const toolCtx = this.compositor
540
+ ? { ui: createToolUI(this.bus, this.compositor.surface("agent")) }
541
+ : undefined;
542
+ const result = await tool.execute(args, onChunk, toolCtx);
470
543
  // Invalidate read cache when a file is modified
471
544
  if (tool.modifiesFiles && typeof args.path === "string" && !result.isError) {
472
545
  const absPath = path.resolve(process.cwd(), args.path);
@@ -494,8 +567,8 @@ export class AgentLoop {
494
567
  this.abortController = new AbortController();
495
568
  const signal = this.abortController.signal;
496
569
  // Each loop iteration adds an abort listener (via OpenAI SDK stream);
497
- // raise the limit to avoid spurious warnings on multi-tool queries.
498
- setMaxListeners(50, signal);
570
+ // disable the limit long-running tool loops can easily exceed any cap.
571
+ setMaxListeners(0, signal);
499
572
  this.bus.emit("agent:query", { query });
500
573
  this.bus.emit("agent:processing-start", {});
501
574
  let responseText = "";
@@ -534,10 +607,11 @@ export class AgentLoop {
534
607
  async executeLoop(signal) {
535
608
  let fullResponseText = "";
536
609
  while (!signal.aborted) {
537
- // Auto-compact if conversation exceeds the model-aware budget
610
+ // Auto-compact when conversation exceeds threshold fraction of budget
538
611
  const budgetTokens = this.tokenBudget.conversationBudgetTokens;
539
- if (this.conversation.estimateTokens() > budgetTokens) {
540
- const stats = this.conversation.compact(budgetTokens);
612
+ const autoCompactThreshold = Math.floor(budgetTokens * getSettings().autoCompactThreshold);
613
+ if (this.conversation.estimateTokens() > autoCompactThreshold) {
614
+ const stats = this.conversation.compact(autoCompactThreshold);
541
615
  await this.conversation.flush();
542
616
  if (stats) {
543
617
  this.bus.emit("ui:info", {
@@ -545,16 +619,19 @@ export class AgentLoop {
545
619
  });
546
620
  }
547
621
  }
548
- // System prompt is static (cacheable); dynamic context uses handler
549
- // so extensions can compose additional context via advise()
550
- const systemPrompt = STATIC_SYSTEM_PROMPT;
622
+ // System prompt uses handler so extensions can append instructions (cacheable);
623
+ // dynamic context uses handler for per-query state via advise()
624
+ const systemPrompt = this.handlers.call("system-prompt:build");
551
625
  const dynamicContext = this.handlers.call("dynamic-context:build");
552
626
  // Stream LLM response with retry
553
627
  const result = await this.streamWithRetry(systemPrompt, dynamicContext, signal);
554
- const { text, toolCalls, assistantContent, assistantToolCalls } = result;
628
+ const { text, toolCalls: streamedToolCalls } = result;
629
+ // Extract tool calls via protocol (API mode uses streamed calls,
630
+ // inline mode parses XML from text)
631
+ const toolCalls = this.toolProtocol.extractToolCalls(text, streamedToolCalls);
555
632
  fullResponseText += text;
556
- // Record the assistant message in conversation
557
- this.conversation.addAssistantMessage(assistantContent, assistantToolCalls);
633
+ // Record the assistant message via protocol
634
+ this.toolProtocol.recordAssistant(this.conversation, text, toolCalls);
558
635
  // No tool calls → agent is done
559
636
  if (toolCalls.length === 0)
560
637
  break;
@@ -585,10 +662,28 @@ export class AgentLoop {
585
662
  // Execute tool calls — run read-only tools in parallel, permission-
586
663
  // requiring tools sequentially (to avoid overlapping permission prompts).
587
664
  const batchTotal = toolCalls.length;
665
+ const collectedResults = [];
588
666
  const executeSingle = async (tc, batchIndex) => {
667
+ // Rewrite meta-tool calls (e.g., use_extension → actual tool)
668
+ tc = this.toolProtocol.rewriteToolCall(tc);
669
+ // Check for validation errors from rewrite (e.g., wrong extension params)
670
+ try {
671
+ const maybeError = JSON.parse(tc.argumentsJson);
672
+ if (maybeError._error) {
673
+ collectedResults.push({
674
+ callId: tc.id, toolName: tc.name,
675
+ content: maybeError._error, isError: true,
676
+ });
677
+ return;
678
+ }
679
+ }
680
+ catch { /* not an error payload, continue */ }
589
681
  const tool = this.toolRegistry.get(tc.name);
590
682
  if (!tool) {
591
- this.conversation.addToolResult(tc.id, `Error: Unknown tool "${tc.name}"`);
683
+ collectedResults.push({
684
+ callId: tc.id, toolName: tc.name,
685
+ content: `Unknown tool "${tc.name}"`, isError: true,
686
+ });
592
687
  return;
593
688
  }
594
689
  let args;
@@ -596,7 +691,10 @@ export class AgentLoop {
596
691
  args = JSON.parse(tc.argumentsJson);
597
692
  }
598
693
  catch {
599
- this.conversation.addToolResult(tc.id, `Error: Invalid JSON arguments for ${tc.name}`);
694
+ collectedResults.push({
695
+ callId: tc.id, toolName: tc.name,
696
+ content: `Invalid JSON arguments for ${tc.name}`, isError: true,
697
+ });
600
698
  return;
601
699
  }
602
700
  // Execute via handler — extensions can advise to add safe-mode,
@@ -606,11 +704,8 @@ export class AgentLoop {
606
704
  };
607
705
  const result = await this.handlers.call("tool:execute", { name: tc.name, id: tc.id, args, tool, onChunk: defaultOnChunk,
608
706
  batchIndex, batchTotal: batchTotal > 1 ? batchTotal : undefined });
609
- // Add tool result to conversation (truncate large outputs to avoid
610
- // blowing through the context window on a single tool call)
611
- let content = result.isError
612
- ? `Error: ${result.content}`
613
- : result.content;
707
+ // Truncate large outputs to avoid blowing context
708
+ let content = result.content;
614
709
  const maxBytes = 16_384; // ~4k tokens
615
710
  if (content.length > maxBytes) {
616
711
  const headBytes = Math.floor(maxBytes * 0.6);
@@ -633,7 +728,10 @@ export class AgentLoop {
633
728
  ...lines.slice(tailStart),
634
729
  ].join("\n");
635
730
  }
636
- this.conversation.addToolResult(tc.id, content);
731
+ collectedResults.push({
732
+ callId: tc.id, toolName: tc.name,
733
+ content, isError: result.isError,
734
+ });
637
735
  };
638
736
  // Partition into parallel-safe (read-only) and sequential (needs permission)
639
737
  const parallel = [];
@@ -661,6 +759,8 @@ export class AgentLoop {
661
759
  break;
662
760
  await executeSingle(tc, ++batchIdx);
663
761
  }
762
+ // Record all tool results via protocol
763
+ this.toolProtocol.recordResults(this.conversation, collectedResults);
664
764
  // Loop back — LLM sees tool results
665
765
  }
666
766
  return fullResponseText;
@@ -726,9 +826,21 @@ export class AgentLoop {
726
826
  ];
727
827
  // Let extensions transform the message array (compact, summarize, filter, etc.)
728
828
  const messages = this.handlers.call("conversation:prepare", rawMessages);
829
+ // Tool protocol controls what goes in the API tools param vs dynamic context
830
+ const apiTools = this.toolProtocol.getApiTools(this.toolRegistry.all());
831
+ const toolPrompt = this.toolProtocol.getToolPrompt(this.toolRegistry.all());
832
+ // Append tool catalog to dynamic context (closer to user query = better followed)
833
+ if (toolPrompt) {
834
+ const ctxMsg = messages[1]; // dynamic context user message
835
+ if (ctxMsg && typeof ctxMsg.content === "string") {
836
+ ctxMsg.content += "\n" + toolPrompt;
837
+ }
838
+ }
839
+ // Stream filter strips tool tags from display (inline mode only)
840
+ const streamFilter = this.toolProtocol.createStreamFilter(this.toolRegistry.all().map((t) => t.name));
729
841
  const stream = await this.llmClient.stream({
730
842
  messages,
731
- tools: this.toolRegistry.toAPITools(),
843
+ tools: apiTools,
732
844
  model: this.currentModel,
733
845
  reasoning_effort: this.shouldSendReasoningEffort() ? this.thinkingLevel : undefined,
734
846
  signal,
@@ -736,6 +848,15 @@ export class AgentLoop {
736
848
  for await (const chunk of stream) {
737
849
  if (signal.aborted)
738
850
  break;
851
+ // Token usage (may arrive in a chunk with empty choices)
852
+ if (chunk.usage) {
853
+ const u = chunk.usage;
854
+ this.bus.emit("agent:usage", {
855
+ prompt_tokens: u.prompt_tokens ?? 0,
856
+ completion_tokens: u.completion_tokens ?? 0,
857
+ total_tokens: u.total_tokens ?? 0,
858
+ });
859
+ }
739
860
  const choice = chunk.choices[0];
740
861
  if (!choice)
741
862
  continue;
@@ -743,9 +864,15 @@ export class AgentLoop {
743
864
  // Text content
744
865
  if (delta?.content) {
745
866
  text += delta.content;
746
- this.bus.emitTransform("agent:response-chunk", {
747
- blocks: [{ type: "text", text: delta.content }],
748
- });
867
+ // Filter tool tags from display output (inline mode)
868
+ const displayText = streamFilter
869
+ ? streamFilter.feed(delta.content)
870
+ : delta.content;
871
+ if (displayText) {
872
+ this.bus.emitTransform("agent:response-chunk", {
873
+ blocks: [{ type: "text", text: displayText }],
874
+ });
875
+ }
749
876
  }
750
877
  // Reasoning/thinking tokens (non-standard, e.g. DeepSeek)
751
878
  if (delta?.reasoning_content) {
@@ -770,28 +897,19 @@ export class AgentLoop {
770
897
  }
771
898
  }
772
899
  }
773
- // Token usage (final chunk from providers that support it)
774
- if (chunk.usage) {
775
- const u = chunk.usage;
776
- this.bus.emit("agent:usage", {
777
- prompt_tokens: u.prompt_tokens ?? 0,
778
- completion_tokens: u.completion_tokens ?? 0,
779
- total_tokens: u.total_tokens ?? 0,
900
+ }
901
+ // Flush any buffered content from the stream filter
902
+ if (streamFilter) {
903
+ const remaining = streamFilter.flush();
904
+ if (remaining) {
905
+ this.bus.emitTransform("agent:response-chunk", {
906
+ blocks: [{ type: "text", text: remaining }],
780
907
  });
781
908
  }
782
909
  }
783
- // Build assistant tool calls for conversation recording
784
- const assistantToolCalls = pendingToolCalls.length
785
- ? pendingToolCalls.map((tc) => ({
786
- id: tc.id,
787
- function: { name: tc.name, arguments: tc.argumentsJson },
788
- }))
789
- : undefined;
790
910
  return {
791
911
  text,
792
912
  toolCalls: pendingToolCalls,
793
- assistantContent: text || null,
794
- assistantToolCalls,
795
913
  };
796
914
  }
797
915
  }
@@ -18,6 +18,8 @@ export declare class ConversationState {
18
18
  };
19
19
  }[]): void;
20
20
  addToolResult(toolCallId: string, content: string): void;
21
+ /** Add tool results as a user message (for inline tool protocol). */
22
+ addToolResultInline(content: string): void;
21
23
  addSystemNote(text: string): void;
22
24
  getMessages(): ChatCompletionMessageParam[];
23
25
  estimateTokens(): number;
@@ -26,7 +28,7 @@ export declare class ConversationState {
26
28
  * them with nuclear one-liner summaries that stay in the conversation.
27
29
  * Read-only tool results are dropped entirely.
28
30
  */
29
- compact(targetTokens: number, recentTurnsToKeep?: number): {
31
+ compact(targetTokens: number, recentTurnsToKeep?: number, force?: boolean): {
30
32
  before: number;
31
33
  after: number;
32
34
  } | null;
@@ -43,6 +43,10 @@ export class ConversationState {
43
43
  content,
44
44
  });
45
45
  }
46
+ /** Add tool results as a user message (for inline tool protocol). */
47
+ addToolResultInline(content) {
48
+ this.messages.push({ role: "user", content });
49
+ }
46
50
  addSystemNote(text) {
47
51
  this.messages.push({ role: "user", content: text });
48
52
  }
@@ -59,9 +63,9 @@ export class ConversationState {
59
63
  * them with nuclear one-liner summaries that stay in the conversation.
60
64
  * Read-only tool results are dropped entirely.
61
65
  */
62
- compact(targetTokens, recentTurnsToKeep = 10) {
66
+ compact(targetTokens, recentTurnsToKeep = 10, force = false) {
63
67
  const before = this.estimateTokens();
64
- if (before <= targetTokens)
68
+ if (!force && before <= targetTokens)
65
69
  return null;
66
70
  const turns = this.parseTurns();
67
71
  if (turns.length <= 2)