npm - agent-sh - Versions diffs - 0.8.0 → 0.9.0 - Mend

agent-sh 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (74) hide show

package/README.md +25 -34
package/dist/agent/agent-loop.d.ts +29 -6
package/dist/agent/agent-loop.js +177 -59
package/dist/agent/conversation-state.d.ts +3 -1
package/dist/agent/conversation-state.js +6 -2
package/dist/agent/nuclear-form.js +5 -4
package/dist/agent/system-prompt.d.ts +4 -5
package/dist/agent/system-prompt.js +12 -28
package/dist/{token-budget.js → agent/token-budget.js} +1 -1
package/dist/agent/tool-protocol.d.ts +83 -0
package/dist/agent/tool-protocol.js +386 -0
package/dist/agent/types.d.ts +21 -1
package/dist/core.d.ts +7 -7
package/dist/core.js +76 -194
package/dist/event-bus.d.ts +26 -0
package/dist/event-bus.js +20 -1
package/dist/extension-loader.d.ts +5 -0
package/dist/extension-loader.js +104 -17
package/dist/extensions/agent-backend.d.ts +13 -0
package/dist/extensions/agent-backend.js +167 -0
package/dist/extensions/command-suggest.d.ts +3 -3
package/dist/extensions/command-suggest.js +4 -3
package/dist/extensions/index.d.ts +19 -0
package/dist/extensions/index.js +25 -0
package/dist/extensions/slash-commands.d.ts +1 -1
package/dist/extensions/slash-commands.js +16 -1
package/dist/extensions/terminal-buffer.d.ts +1 -1
package/dist/extensions/terminal-buffer.js +13 -4
package/dist/extensions/tui-renderer.js +63 -43
package/dist/index.js +14 -20
package/dist/settings.d.ts +6 -0
package/dist/settings.js +4 -1
package/dist/{input-handler.d.ts → shell/input-handler.d.ts} +1 -1
package/dist/{input-handler.js → shell/input-handler.js} +60 -43
package/dist/{output-parser.d.ts → shell/output-parser.d.ts} +1 -1
package/dist/{output-parser.js → shell/output-parser.js} +1 -1
package/dist/{shell.d.ts → shell/shell.d.ts} +8 -2
package/dist/{shell.js → shell/shell.js} +20 -6
package/dist/types.d.ts +49 -10
package/dist/utils/compositor.d.ts +62 -0
package/dist/utils/compositor.js +88 -0
package/dist/utils/diff-renderer.js +92 -4
package/dist/utils/floating-panel.d.ts +2 -0
package/dist/utils/floating-panel.js +30 -14
package/dist/utils/handler-registry.d.ts +26 -10
package/dist/utils/handler-registry.js +52 -16
package/dist/utils/line-editor.d.ts +23 -3
package/dist/utils/line-editor.js +180 -42
package/dist/utils/markdown.d.ts +1 -0
package/dist/utils/markdown.js +1 -1
package/dist/utils/message-utils.d.ts +35 -0
package/dist/utils/message-utils.js +75 -0
package/dist/utils/terminal-buffer.d.ts +5 -1
package/dist/utils/terminal-buffer.js +18 -2
package/dist/utils/tool-interactive.d.ts +12 -0
package/dist/utils/tool-interactive.js +53 -0
package/examples/extensions/ash-acp-bridge/README.md +39 -0
package/examples/extensions/ash-acp-bridge/package.json +23 -0
package/examples/extensions/ash-acp-bridge/src/index.ts +571 -0
package/examples/extensions/ash-acp-bridge/tsconfig.json +14 -0
package/examples/extensions/ash-mcp-bridge/README.md +72 -0
package/examples/extensions/ash-mcp-bridge/index.ts +154 -0
package/examples/extensions/ash-mcp-bridge/package.json +9 -0
package/examples/extensions/interactive-prompts.ts +82 -110
package/examples/extensions/overlay-agent.ts +84 -38
package/examples/extensions/peer-mesh.ts +450 -0
package/examples/extensions/questionnaire.ts +249 -0
package/examples/extensions/tmux-pane.ts +307 -0
package/examples/extensions/web-access.ts +327 -0
package/package.json +9 -1
package/dist/extensions/overlay-agent.d.ts +0 -14
package/dist/extensions/overlay-agent.js +0 -147
package/examples/extensions/terminal-buffer.ts +0 -184
/package/dist/{token-budget.d.ts → agent/token-budget.d.ts} +0 -0

package/README.md CHANGED Viewed

@@ -1,36 +1,22 @@
 # agent-sh
+An agent that lives in a shell — not a shell that lives in an agent.
 [![npm version](https://img.shields.io/npm/v/agent-sh.svg)](https://www.npmjs.com/package/agent-sh)
 [![license](https://img.shields.io/npm/l/agent-sh.svg)](https://github.com/guanyilun/agent-sh/blob/main/LICENSE)
-Not a shell that lives in an agent — an agent that lives in a shell.
-I live in a terminal. I don't want an agent that can run shell commands when it needs to — I want my shell, with an agent I can reach for when *I* need to. Most AI tools get this backwards: the LLM drives the experience and the shell is bolted on as an afterthought. No real PTY, no job control, no vim, fragile `cd` tracking. The agent is the main character and your terminal is a prop.
+Most AI terminal tools get this backwards: the LLM drives the experience and the shell is bolted on as an afterthought. No real PTY, no job control, no vim, fragile `cd` tracking. The agent is the main character and your terminal is a prop.
 agent-sh flips this. It's your shell first — full PTY, your rc config, your aliases, everything just works. But type `>` at the start of a line, and you're talking to an agent that has full context of what you've been doing.
 ```
-⚡ src $ ls -la                          # real shell command
-⚡ src $ cd ../tests && npm test          # real cd, env, aliases — all just work
-⚡ src $ vim file.ts                      # opens vim in the same PTY
-⚡ src $ > explain the last error          # agent investigates using its own tools
-⚡ src $ > deploy to staging              # agent runs it in your live shell
+~ $ ls -la                          # real shell command
+~ $ cd ../tests && npm test          # real cd, env, aliases — all just work
+~ $ vim file.ts                      # opens vim in the same PTY
+~ $ > explain the last error          # agent investigates using its own tools
+~ $ > deploy to staging              # agent runs it in your live shell
 ```
-## Key Features
-**Real terminal, zero compromise.** Full PTY with your shell config, aliases, and environment. Shell starts instantly — the agent connects asynchronously in the background.
-**Context-aware agent.** Every query includes your cwd, recent commands, and their output. Run a failing test, type `> fix this`, and the agent knows exactly what happened. It has built-in tools for file read/write/edit, bash, grep, glob — no external setup needed. Context management works like shell history — continuous, persistent across restarts, no sessions to manage. See [Context Management](docs/context-management.md).
-**Agent decides how to help.** One entry point (`>`), three tool categories. The agent uses scratchpad tools to investigate, `display` to show you output, and `user_shell` for commands with lasting effects. No need to pick a mode — the agent reasons about which tools to use based on your intent.
-**Any LLM, any backend.** Works with any OpenAI-compatible API out of the box. Define multiple providers in settings and cycle between models at runtime with Shift+Tab. Or swap in a completely different agent — [Claude Code](examples/extensions/claude-code-bridge/) and [pi](examples/extensions/pi-bridge/) run as drop-in backend extensions.
-**Extensible by design.** The entire system is built on a typed event bus. Extensions can add custom input modes, content transforms (render LaTeX as images, Mermaid as diagrams), themes, slash commands, or replace the agent backend entirely. The built-in TUI renderer is itself just an extension — nothing is special.
-**Embeddable as a library.** The core is a headless kernel — `import { createCore } from "agent-sh"` to build WebSocket servers, REST APIs, Electron apps, or test harnesses. No terminal required.
 ## Quick Start
 ```bash
@@ -42,17 +28,21 @@ Set `OPENAI_API_KEY` in your environment (or configure providers in `~/.agent-sh
 Requires Node.js 18+.
-## Agent Mode
+## Key Features
-Type `>` at the start of a line to talk to the agent. The agent decides how to help:
+**Real terminal, zero compromise.** Full PTY with your shell config, aliases, and environment. Shell starts instantly — the agent connects asynchronously in the background.
-- **Scratchpad tools** (`bash`, `read_file`, `grep`, `glob`, etc.) — for investigation. Output goes to the agent, not your terminal.
-- **`display`** — shows output in your terminal (e.g. `cat`, `git log`). You see it; the agent doesn't process it.
-- **`user_shell`** — runs commands with lasting effects (`cd`, `npm install`, etc.) in your live shell.
+**One entry point, three tool categories.** Type `>` and agent-sh figures out how to help. Scratchpad tools (`bash`, `read_file`, `grep`, `glob`) for investigation. `display` to show you output. `user_shell` for commands with lasting effects in your live shell. No modes to pick — the agent reasons about which tools to use based on your intent.
-Everything else works as a normal shell — commands go straight to the PTY. Input modes are extensible — see [Extensions: Custom Input Modes](docs/extensions.md#custom-input-modes).
+**Context that just works.** Every query includes your cwd, recent commands, and their output. Run a failing test, type `> fix this`, and agent-sh knows exactly what happened. Context management works like shell history — continuous, persistent across restarts, no sessions to manage. See [Context Management](docs/context-management.md).
+**Any LLM, any backend.** agent-sh works with any OpenAI-compatible API out of the box. Define multiple providers in settings and cycle between models at runtime with Shift+Tab. Or swap in a completely different agent — [Claude Code](examples/extensions/claude-code-bridge/) and [pi](examples/extensions/pi-bridge/) run as drop-in backend extensions.
+**Extensible by design.** The entire system is built on a typed event bus. Extensions can add custom input modes, content transforms (render LaTeX as images, Mermaid as diagrams), themes, slash commands, or replace the agent backend entirely. The built-in TUI renderer is itself just an extension.
+**Embeddable as a library.** The core is a headless kernel — `import { createCore } from "agent-sh"` to build WebSocket servers, REST APIs, Electron apps, or test harnesses. No terminal required.
-### Slash Commands
+## Slash Commands
 | Command | Description |
 |---|---|
@@ -65,15 +55,16 @@ Everything else works as a normal shell — commands go straight to the PTY. Inp
 ## Configuration
-Configure via `~/.agent-sh/settings.json`. See the [Usage Guide](docs/usage.md#configuration) for the full settings reference (providers, models, extensions, skills, and more).
+Configure via `~/.agent-sh/settings.json`. See the [Usage Guide](docs/usage.md#configuration) for the full settings reference.
 ## Documentation
-- [Usage Guide](docs/usage.md) — providers, models, configuration, provider profiles
-- [Internal Agent](docs/agent.md) — how the agent loop works: tools, context, streaming
-- [Context Management](docs/context-management.md) — three-tier history, token budget, design philosophy
-- [Architecture](docs/architecture.md) — design philosophy, component overview, project structure
+- [Usage Guide](docs/usage.md) — providers, models, configuration
+- [Internal Agent](docs/agent.md) — tools, context, streaming
+- [Context Management](docs/context-management.md) — three-tier history, token budget
+- [Architecture](docs/architecture.md) — design philosophy, component overview
 - [Extensions](docs/extensions.md) — event bus, content transforms, custom backends, theming
+- [TUI Composition](docs/tui-composition.md) — compositor, render surfaces, stream routing
 - [Library Usage](docs/library.md) — embedding agent-sh in your own apps
 - [Troubleshooting](docs/troubleshooting.md) — common errors and debug mode

package/dist/agent/agent-loop.d.ts CHANGED Viewed

@@ -16,13 +16,19 @@ import type { EventBus } from "../event-bus.js";
 import type { AgentMode } from "../types.js";
 import type { ContextManager } from "../context-manager.js";
 import type { LlmClient } from "../utils/llm-client.js";
-import type { HandlerRegistry } from "../utils/handler-registry.js";
+import type { HandlerFunctions } from "../utils/handler-registry.js";
 import type { AgentBackend, ToolDefinition } from "./types.js";
+import type { Compositor } from "../utils/compositor.js";
+export interface AgentLoopConfig {
+    bus: EventBus;
+    contextManager: ContextManager;
+    llmClient: LlmClient;
+    handlers: HandlerFunctions;
+    modes?: AgentMode[];
+    initialModeIndex?: number;
+    compositor?: Compositor;
+}
 export declare class AgentLoop implements AgentBackend {
-    private bus;
-    private contextManager;
-    private llmClient;
-    private handlers;
     private abortController;
     private toolRegistry;
     private historyFile;
@@ -32,18 +38,35 @@ export declare class AgentLoop implements AgentBackend {
     private modes;
     private currentModeIndex;
     private boundListeners;
+    private ctorListeners;
+    private ctorPipeListeners;
     private lastProjectSkillNames;
     private static readonly THINKING_LEVELS;
+    private bus;
+    private contextManager;
+    private llmClient;
+    private handlers;
     private thinkingLevel;
-    constructor(bus: EventBus, contextManager: ContextManager, llmClient: LlmClient, handlers: HandlerRegistry, modeConfig?: AgentMode[], initialModeIndex?: number);
+    private compositor;
+    private toolProtocol;
+    constructor(config: AgentLoopConfig);
     /** Subscribe to bus events — activates this backend. */
     wire(): void;
     /** Unsubscribe from bus events — deactivates this backend. */
     unwire(): void;
     /** Register a tool (used by extensions via ctx.registerTool). */
     registerTool(tool: ToolDefinition): void;
+    /** Unregister a tool by name. */
+    unregisterTool(name: string): void;
     /** Get all registered tools. */
     getTools(): ToolDefinition[];
+    private instructions;
+    /** Register a named instruction block for the system prompt. */
+    registerInstruction(name: string, text: string): void;
+    /** Remove a named instruction block. */
+    removeInstruction(name: string): void;
+    /** Get instruction blocks registered by extensions. */
+    getInstructionSections(): string[];
     kill(): void;
     private cancel;
     /** Check if reasoning_effort should be sent for the current model/provider. */

package/dist/agent/agent-loop.js CHANGED Viewed

@@ -6,7 +6,10 @@ import { ToolRegistry } from "./tool-registry.js";
 import { ConversationState } from "./conversation-state.js";
 import { HistoryFile } from "./history-file.js";
 import { STATIC_SYSTEM_PROMPT, buildDynamicContext } from "./system-prompt.js";
-import { TokenBudget } from "../token-budget.js";
+import { createToolUI } from "../utils/tool-interactive.js";
+import { TokenBudget } from "./token-budget.js";
+import { getSettings } from "../settings.js";
+import { createToolProtocol } from "./tool-protocol.js";
 // Core tool factories
 import { createBashTool } from "./tools/bash.js";
 import { createReadFileTool } from "./tools/read-file.js";
@@ -20,10 +23,6 @@ import { createDisplayTool } from "./tools/display.js";
 import { createListSkillsTool } from "./tools/list-skills.js";
 import { discoverProjectSkills } from "./skills.js";
 export class AgentLoop {
-    bus;
-    contextManager;
-    llmClient;
-    handlers;
     abortController = null;
     toolRegistry = new ToolRegistry();
     historyFile = new HistoryFile();
@@ -33,27 +32,52 @@ export class AgentLoop {
     modes;
     currentModeIndex = 0;
     boundListeners = [];
+    ctorListeners = [];
+    ctorPipeListeners = [];
     lastProjectSkillNames = new Set();
     static THINKING_LEVELS = ["off", "low", "medium", "high"];
+    bus;
+    contextManager;
+    llmClient;
+    handlers;
     thinkingLevel = "off";
-    constructor(bus, contextManager, llmClient, handlers, modeConfig, initialModeIndex) {
-        this.bus = bus;
-        this.contextManager = contextManager;
-        this.llmClient = llmClient;
-        this.handlers = handlers;
+    compositor = null;
+    toolProtocol;
+    constructor(config) {
+        this.bus = config.bus;
+        this.contextManager = config.contextManager;
+        this.llmClient = config.llmClient;
+        this.handlers = config.handlers;
+        this.compositor = config.compositor ?? null;
         // Default modes: just the configured model
-        this.modes = modeConfig ?? [
-            { model: llmClient.model },
+        this.modes = config.modes ?? [
+            { model: config.llmClient.model },
         ];
-        this.currentModeIndex = initialModeIndex ?? 0;
+        this.currentModeIndex = config.initialModeIndex ?? 0;
         // Unified token budget — adapts to current model's context window
         this.tokenBudget = new TokenBudget(this.currentMode.contextWindow);
+        // Tool protocol — controls how tools are presented to the LLM
+        this.toolProtocol = createToolProtocol(getSettings().toolMode ?? "api");
         // Register core tools
         this.registerCoreTools();
         // Update token budget with tool count
         this.tokenBudget.update(undefined, this.toolRegistry.all().length);
         // Register handlers — extensions can advise these
         this.registerHandlers();
+        // Subscribe to bus-based tool/instruction registration from extensions.
+        // These must be in the constructor (not wire()) because extensions call
+        // registerTool() during activate(), before activateBackend() calls wire().
+        const onCtor = (event, fn) => {
+            this.bus.on(event, fn);
+            this.ctorListeners.push({ event, fn });
+        };
+        onCtor("agent:register-tool", ({ tool }) => this.registerTool(tool));
+        onCtor("agent:unregister-tool", ({ name }) => this.unregisterTool(name));
+        onCtor("agent:register-instruction", ({ name, text }) => this.registerInstruction(name, text));
+        onCtor("agent:remove-instruction", ({ name }) => this.removeInstruction(name));
+        const getToolsPipe = () => ({ tools: this.getTools() });
+        this.bus.onPipe("agent:get-tools", getToolsPipe);
+        this.ctorPipeListeners.push({ event: "agent:get-tools", fn: getToolsPipe });
     }
     /** Subscribe to bus events — activates this backend. */
     wire() {
@@ -84,7 +108,7 @@ export class AgentLoop {
             }
             this.tokenBudget.update(m.contextWindow, this.toolRegistry.all().length);
             const label = m.provider ? `${m.provider}: ${m.model}` : m.model;
-            this.bus.emit("agent:info", { name: "agent-sh", version: "0.4", model: m.model, provider: m.provider, contextWindow: m.contextWindow });
+            this.bus.emit("agent:info", { name: "ash", version: "0.4", model: m.model, provider: m.provider, contextWindow: m.contextWindow });
             this.bus.emit("ui:info", { message: `Model: ${label}` });
             this.bus.emit("config:changed", {});
         });
@@ -144,8 +168,8 @@ export class AgentLoop {
             this.lastProjectSkillNames.clear();
         });
         on("agent:compact-request", () => {
-            const budgetTokens = this.tokenBudget.conversationBudgetTokens;
-            const stats = this.conversation.compact(budgetTokens);
+            // Force compaction: use target of 0 so every non-pinned turn is evicted
+            const stats = this.conversation.compact(0, 10, true);
             this.conversation.flush().catch(() => { });
             if (stats) {
                 this.bus.emit("ui:info", {
@@ -196,12 +220,44 @@ export class AgentLoop {
     registerTool(tool) {
         this.toolRegistry.register(tool);
     }
+    /** Unregister a tool by name. */
+    unregisterTool(name) {
+        this.toolRegistry.unregister(name);
+    }
     /** Get all registered tools. */
     getTools() {
         return this.toolRegistry.all();
     }
+    // ── Extension instructions & tool tracking ──────────────────────
+    instructions = new Map();
+    /** Register a named instruction block for the system prompt. */
+    registerInstruction(name, text) {
+        this.instructions.set(name, text);
+    }
+    /** Remove a named instruction block. */
+    removeInstruction(name) {
+        this.instructions.delete(name);
+    }
+    /** Get instruction blocks registered by extensions. */
+    getInstructionSections() {
+        const sections = [];
+        for (const [name, text] of this.instructions) {
+            sections.push(`## ${name}\n${text}`);
+        }
+        return sections;
+    }
     kill() {
         this.cancel();
+        this.unwire();
+        // Clean up constructor-level bus subscriptions
+        for (const { event, fn } of this.ctorListeners) {
+            this.bus.off(event, fn);
+        }
+        this.ctorListeners = [];
+        for (const { event, fn } of this.ctorPipeListeners) {
+            this.bus.offPipe(event, fn);
+        }
+        this.ctorPipeListeners = [];
     }
     cancel() {
         this.abortController?.abort();
@@ -237,7 +293,7 @@ export class AgentLoop {
         const label = newMode.provider
             ? `${newMode.provider}: ${newMode.model}`
             : newMode.model;
-        this.bus.emit("agent:info", { name: "agent-sh", version: "0.4", model: newMode.model, provider: newMode.provider, contextWindow: newMode.contextWindow });
+        this.bus.emit("agent:info", { name: "ash", version: "0.4", model: newMode.model, provider: newMode.provider, contextWindow: newMode.contextWindow });
         this.bus.emit("ui:info", { message: `Model: ${label}` });
         this.bus.emit("config:changed", {});
     }
@@ -339,6 +395,7 @@ export class AgentLoop {
         // conversation_recall — search/expand evicted conversation turns
         this.toolRegistry.register({
             name: "conversation_recall",
+            displayName: "recall",
             description: "Browse, search, or expand evicted conversation turns. " +
                 "Use when you need context from earlier in the conversation that was compacted away.",
             input_schema: {
@@ -382,8 +439,17 @@ export class AgentLoop {
      */
     registerHandlers() {
         const h = this.handlers;
+        // System prompt: static identity + behavioral instructions.
+        // Extensions can use registerInstruction() for a managed section,
+        // or advise this handler directly for full control.
+        h.define("system-prompt:build", () => {
+            const instructions = this.getInstructionSections();
+            if (instructions.length === 0)
+                return STATIC_SYSTEM_PROMPT;
+            return STATIC_SYSTEM_PROMPT + "\n\n# Extension Instructions\n\n" + instructions.join("\n\n");
+        });
         // Extensions compose additional context (git info, project rules, etc.)
-        h.define("dynamic-context:build", () => buildDynamicContext(this.toolRegistry.all(), this.contextManager, this.tokenBudget.shellBudgetTokens));
+        h.define("dynamic-context:build", () => buildDynamicContext(this.contextManager, this.tokenBudget.shellBudgetTokens));
         // Full control over what the LLM sees: takes messages[], returns messages[].
         // Default: pass through. Extensions can advise to compact, summarize,
         // filter, reorder, inject — whatever strategy fits.
@@ -417,7 +483,7 @@ export class AgentLoop {
                             // write_file
                             newContent = args.content;
                         }
-                        else if (typeof args.old_text === "string" && typeof args.new_text === "string" && oldContent) {
+                        else if (typeof args.old_text === "string" && typeof args.new_text === "string" && oldContent !== null) {
                             // edit_file
                             newContent = oldContent.replace(args.old_text.replace(/\r\n/g, "\n"), args.new_text.replace(/\r\n/g, "\n"));
                         }
@@ -441,10 +507,14 @@ export class AgentLoop {
                     }
                     catch { /* fall back to generic permission */ }
                 }
+                const ui = this.compositor
+                    ? createToolUI(this.bus, this.compositor.surface("agent"))
+                    : undefined;
                 const perm = await this.bus.emitPipeAsync("permission:request", {
                     kind: permKind,
                     title: permTitle,
                     metadata,
+                    ui,
                     decision: { outcome: "approved" },
                 });
                 if (perm.decision.outcome !== "approved") {
@@ -466,7 +536,10 @@ export class AgentLoop {
             const onChunk = (tool.showOutput !== false && !diffShown)
                 ? ctx.onChunk
                 : undefined;
-            const result = await tool.execute(args, onChunk);
+            const toolCtx = this.compositor
+                ? { ui: createToolUI(this.bus, this.compositor.surface("agent")) }
+                : undefined;
+            const result = await tool.execute(args, onChunk, toolCtx);
             // Invalidate read cache when a file is modified
             if (tool.modifiesFiles && typeof args.path === "string" && !result.isError) {
                 const absPath = path.resolve(process.cwd(), args.path);
@@ -494,8 +567,8 @@ export class AgentLoop {
         this.abortController = new AbortController();
         const signal = this.abortController.signal;
         // Each loop iteration adds an abort listener (via OpenAI SDK stream);
-        // raise the limit to avoid spurious warnings on multi-tool queries.
-        setMaxListeners(50, signal);
+        // disable the limit — long-running tool loops can easily exceed any cap.
+        setMaxListeners(0, signal);
         this.bus.emit("agent:query", { query });
         this.bus.emit("agent:processing-start", {});
         let responseText = "";
@@ -534,10 +607,11 @@ export class AgentLoop {
     async executeLoop(signal) {
         let fullResponseText = "";
         while (!signal.aborted) {
-            // Auto-compact if conversation exceeds the model-aware budget
+            // Auto-compact when conversation exceeds threshold fraction of budget
             const budgetTokens = this.tokenBudget.conversationBudgetTokens;
-            if (this.conversation.estimateTokens() > budgetTokens) {
-                const stats = this.conversation.compact(budgetTokens);
+            const autoCompactThreshold = Math.floor(budgetTokens * getSettings().autoCompactThreshold);
+            if (this.conversation.estimateTokens() > autoCompactThreshold) {
+                const stats = this.conversation.compact(autoCompactThreshold);
                 await this.conversation.flush();
                 if (stats) {
                     this.bus.emit("ui:info", {
@@ -545,16 +619,19 @@ export class AgentLoop {
                     });
                 }
             }
-            // System prompt is static (cacheable); dynamic context uses handler
-            // so extensions can compose additional context via advise()
-            const systemPrompt = STATIC_SYSTEM_PROMPT;
+            // System prompt uses handler so extensions can append instructions (cacheable);
+            // dynamic context uses handler for per-query state via advise()
+            const systemPrompt = this.handlers.call("system-prompt:build");
             const dynamicContext = this.handlers.call("dynamic-context:build");
             // Stream LLM response with retry
             const result = await this.streamWithRetry(systemPrompt, dynamicContext, signal);
-            const { text, toolCalls, assistantContent, assistantToolCalls } = result;
+            const { text, toolCalls: streamedToolCalls } = result;
+            // Extract tool calls via protocol (API mode uses streamed calls,
+            // inline mode parses XML from text)
+            const toolCalls = this.toolProtocol.extractToolCalls(text, streamedToolCalls);
             fullResponseText += text;
-            // Record the assistant message in conversation
-            this.conversation.addAssistantMessage(assistantContent, assistantToolCalls);
+            // Record the assistant message via protocol
+            this.toolProtocol.recordAssistant(this.conversation, text, toolCalls);
             // No tool calls → agent is done
             if (toolCalls.length === 0)
                 break;
@@ -585,10 +662,28 @@ export class AgentLoop {
             // Execute tool calls — run read-only tools in parallel, permission-
             // requiring tools sequentially (to avoid overlapping permission prompts).
             const batchTotal = toolCalls.length;
+            const collectedResults = [];
             const executeSingle = async (tc, batchIndex) => {
+                // Rewrite meta-tool calls (e.g., use_extension → actual tool)
+                tc = this.toolProtocol.rewriteToolCall(tc);
+                // Check for validation errors from rewrite (e.g., wrong extension params)
+                try {
+                    const maybeError = JSON.parse(tc.argumentsJson);
+                    if (maybeError._error) {
+                        collectedResults.push({
+                            callId: tc.id, toolName: tc.name,
+                            content: maybeError._error, isError: true,
+                        });
+                        return;
+                    }
+                }
+                catch { /* not an error payload, continue */ }
                 const tool = this.toolRegistry.get(tc.name);
                 if (!tool) {
-                    this.conversation.addToolResult(tc.id, `Error: Unknown tool "${tc.name}"`);
+                    collectedResults.push({
+                        callId: tc.id, toolName: tc.name,
+                        content: `Unknown tool "${tc.name}"`, isError: true,
+                    });
                     return;
                 }
                 let args;
@@ -596,7 +691,10 @@ export class AgentLoop {
                     args = JSON.parse(tc.argumentsJson);
                 }
                 catch {
-                    this.conversation.addToolResult(tc.id, `Error: Invalid JSON arguments for ${tc.name}`);
+                    collectedResults.push({
+                        callId: tc.id, toolName: tc.name,
+                        content: `Invalid JSON arguments for ${tc.name}`, isError: true,
+                    });
                     return;
                 }
                 // Execute via handler — extensions can advise to add safe-mode,
@@ -606,11 +704,8 @@ export class AgentLoop {
                 };
                 const result = await this.handlers.call("tool:execute", { name: tc.name, id: tc.id, args, tool, onChunk: defaultOnChunk,
                     batchIndex, batchTotal: batchTotal > 1 ? batchTotal : undefined });
-                // Add tool result to conversation (truncate large outputs to avoid
-                // blowing through the context window on a single tool call)
-                let content = result.isError
-                    ? `Error: ${result.content}`
-                    : result.content;
+                // Truncate large outputs to avoid blowing context
+                let content = result.content;
                 const maxBytes = 16_384; // ~4k tokens
                 if (content.length > maxBytes) {
                     const headBytes = Math.floor(maxBytes * 0.6);
@@ -633,7 +728,10 @@ export class AgentLoop {
                         ...lines.slice(tailStart),
                     ].join("\n");
                 }
-                this.conversation.addToolResult(tc.id, content);
+                collectedResults.push({
+                    callId: tc.id, toolName: tc.name,
+                    content, isError: result.isError,
+                });
             };
             // Partition into parallel-safe (read-only) and sequential (needs permission)
             const parallel = [];
@@ -661,6 +759,8 @@ export class AgentLoop {
                     break;
                 await executeSingle(tc, ++batchIdx);
             }
+            // Record all tool results via protocol
+            this.toolProtocol.recordResults(this.conversation, collectedResults);
             // Loop back — LLM sees tool results
         }
         return fullResponseText;
@@ -726,9 +826,21 @@ export class AgentLoop {
         ];
         // Let extensions transform the message array (compact, summarize, filter, etc.)
         const messages = this.handlers.call("conversation:prepare", rawMessages);
+        // Tool protocol controls what goes in the API tools param vs dynamic context
+        const apiTools = this.toolProtocol.getApiTools(this.toolRegistry.all());
+        const toolPrompt = this.toolProtocol.getToolPrompt(this.toolRegistry.all());
+        // Append tool catalog to dynamic context (closer to user query = better followed)
+        if (toolPrompt) {
+            const ctxMsg = messages[1]; // dynamic context user message
+            if (ctxMsg && typeof ctxMsg.content === "string") {
+                ctxMsg.content += "\n" + toolPrompt;
+            }
+        }
+        // Stream filter strips tool tags from display (inline mode only)
+        const streamFilter = this.toolProtocol.createStreamFilter(this.toolRegistry.all().map((t) => t.name));
         const stream = await this.llmClient.stream({
             messages,
-            tools: this.toolRegistry.toAPITools(),
+            tools: apiTools,
             model: this.currentModel,
             reasoning_effort: this.shouldSendReasoningEffort() ? this.thinkingLevel : undefined,
             signal,
@@ -736,6 +848,15 @@ export class AgentLoop {
         for await (const chunk of stream) {
             if (signal.aborted)
                 break;
+            // Token usage (may arrive in a chunk with empty choices)
+            if (chunk.usage) {
+                const u = chunk.usage;
+                this.bus.emit("agent:usage", {
+                    prompt_tokens: u.prompt_tokens ?? 0,
+                    completion_tokens: u.completion_tokens ?? 0,
+                    total_tokens: u.total_tokens ?? 0,
+                });
+            }
             const choice = chunk.choices[0];
             if (!choice)
                 continue;
@@ -743,9 +864,15 @@ export class AgentLoop {
             // Text content
             if (delta?.content) {
                 text += delta.content;
-                this.bus.emitTransform("agent:response-chunk", {
-                    blocks: [{ type: "text", text: delta.content }],
-                });
+                // Filter tool tags from display output (inline mode)
+                const displayText = streamFilter
+                    ? streamFilter.feed(delta.content)
+                    : delta.content;
+                if (displayText) {
+                    this.bus.emitTransform("agent:response-chunk", {
+                        blocks: [{ type: "text", text: displayText }],
+                    });
+                }
             }
             // Reasoning/thinking tokens (non-standard, e.g. DeepSeek)
             if (delta?.reasoning_content) {
@@ -770,28 +897,19 @@ export class AgentLoop {
                     }
                 }
             }
-            // Token usage (final chunk from providers that support it)
-            if (chunk.usage) {
-                const u = chunk.usage;
-                this.bus.emit("agent:usage", {
-                    prompt_tokens: u.prompt_tokens ?? 0,
-                    completion_tokens: u.completion_tokens ?? 0,
-                    total_tokens: u.total_tokens ?? 0,
+        }
+        // Flush any buffered content from the stream filter
+        if (streamFilter) {
+            const remaining = streamFilter.flush();
+            if (remaining) {
+                this.bus.emitTransform("agent:response-chunk", {
+                    blocks: [{ type: "text", text: remaining }],
                 });
             }
         }
-        // Build assistant tool calls for conversation recording
-        const assistantToolCalls = pendingToolCalls.length
-            ? pendingToolCalls.map((tc) => ({
-                id: tc.id,
-                function: { name: tc.name, arguments: tc.argumentsJson },
-            }))
-            : undefined;
         return {
             text,
             toolCalls: pendingToolCalls,
-            assistantContent: text || null,
-            assistantToolCalls,
         };
     }
 }

package/dist/agent/conversation-state.d.ts CHANGED Viewed

@@ -18,6 +18,8 @@ export declare class ConversationState {
         };
     }[]): void;
     addToolResult(toolCallId: string, content: string): void;
+    /** Add tool results as a user message (for inline tool protocol). */
+    addToolResultInline(content: string): void;
     addSystemNote(text: string): void;
     getMessages(): ChatCompletionMessageParam[];
     estimateTokens(): number;
@@ -26,7 +28,7 @@ export declare class ConversationState {
      * them with nuclear one-liner summaries that stay in the conversation.
      * Read-only tool results are dropped entirely.
      */
-    compact(targetTokens: number, recentTurnsToKeep?: number): {
+    compact(targetTokens: number, recentTurnsToKeep?: number, force?: boolean): {
         before: number;
         after: number;
     } | null;

package/dist/agent/conversation-state.js CHANGED Viewed

@@ -43,6 +43,10 @@ export class ConversationState {
             content,
         });
     }
+    /** Add tool results as a user message (for inline tool protocol). */
+    addToolResultInline(content) {
+        this.messages.push({ role: "user", content });
+    }
     addSystemNote(text) {
         this.messages.push({ role: "user", content: text });
     }
@@ -59,9 +63,9 @@ export class ConversationState {
      * them with nuclear one-liner summaries that stay in the conversation.
      * Read-only tool results are dropped entirely.
      */
-    compact(targetTokens, recentTurnsToKeep = 10) {
+    compact(targetTokens, recentTurnsToKeep = 10, force = false) {
         const before = this.estimateTokens();
-        if (before <= targetTokens)
+        if (!force && before <= targetTokens)
             return null;
         const turns = this.parseTurns();
         if (turns.length <= 2)