npm - agent-sh - Versions diffs - 0.8.0 → 0.10.0 - Mend

agent-sh 0.8.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (106) hide show

package/README.md +27 -43
package/dist/agent/agent-loop.d.ts +69 -6
package/dist/agent/agent-loop.js +954 -153
package/dist/agent/conversation-state.d.ts +74 -21
package/dist/agent/conversation-state.js +361 -150
package/dist/agent/history-file.d.ts +13 -4
package/dist/agent/history-file.js +110 -36
package/dist/agent/nuclear-form.d.ts +28 -3
package/dist/agent/nuclear-form.js +88 -6
package/dist/agent/skills.d.ts +2 -4
package/dist/agent/skills.js +10 -4
package/dist/agent/subagent.d.ts +23 -0
package/dist/agent/subagent.js +53 -11
package/dist/agent/system-prompt.d.ts +37 -5
package/dist/agent/system-prompt.js +100 -67
package/dist/{token-budget.d.ts → agent/token-budget.d.ts} +5 -4
package/dist/{token-budget.js → agent/token-budget.js} +15 -20
package/dist/agent/tool-protocol.d.ts +105 -0
package/dist/agent/tool-protocol.js +551 -0
package/dist/agent/tools/bash.js +3 -3
package/dist/agent/tools/edit-file.js +9 -6
package/dist/agent/tools/glob.js +4 -2
package/dist/agent/tools/grep.js +27 -3
package/dist/agent/tools/ls.js +5 -6
package/dist/agent/types.d.ts +22 -2
package/dist/context-manager.d.ts +17 -0
package/dist/context-manager.js +37 -4
package/dist/core.d.ts +7 -7
package/dist/core.js +99 -196
package/dist/event-bus.d.ts +85 -2
package/dist/event-bus.js +20 -1
package/dist/executor.d.ts +4 -3
package/dist/executor.js +18 -15
package/dist/extension-loader.d.ts +5 -0
package/dist/extension-loader.js +143 -19
package/dist/extensions/agent-backend.d.ts +14 -0
package/dist/extensions/agent-backend.js +188 -0
package/dist/extensions/command-suggest.d.ts +3 -3
package/dist/extensions/command-suggest.js +4 -3
package/dist/extensions/index.d.ts +19 -0
package/dist/extensions/index.js +24 -0
package/dist/extensions/slash-commands.d.ts +1 -1
package/dist/extensions/slash-commands.js +30 -10
package/dist/extensions/tui-renderer.js +117 -113
package/dist/index.js +39 -26
package/dist/settings.d.ts +40 -3
package/dist/settings.js +57 -10
package/dist/{input-handler.d.ts → shell/input-handler.d.ts} +3 -2
package/dist/{input-handler.js → shell/input-handler.js} +111 -85
package/dist/{output-parser.d.ts → shell/output-parser.d.ts} +1 -1
package/dist/{output-parser.js → shell/output-parser.js} +1 -1
package/dist/{shell.d.ts → shell/shell.d.ts} +8 -2
package/dist/{shell.js → shell/shell.js} +39 -8
package/dist/types.d.ts +61 -10
package/dist/utils/ansi.d.ts +5 -0
package/dist/utils/ansi.js +1 -1
package/dist/utils/compositor.d.ts +67 -0
package/dist/utils/compositor.js +116 -0
package/dist/utils/diff-renderer.d.ts +9 -0
package/dist/utils/diff-renderer.js +312 -146
package/dist/utils/diff.d.ts +21 -2
package/dist/utils/diff.js +165 -89
package/dist/utils/floating-panel.d.ts +2 -0
package/dist/utils/floating-panel.js +30 -14
package/dist/utils/handler-registry.d.ts +31 -10
package/dist/utils/handler-registry.js +58 -16
package/dist/utils/line-editor.d.ts +33 -3
package/dist/utils/line-editor.js +221 -44
package/dist/utils/markdown.d.ts +1 -0
package/dist/utils/markdown.js +1 -1
package/dist/utils/message-utils.d.ts +35 -0
package/dist/utils/message-utils.js +75 -0
package/dist/utils/terminal-buffer.d.ts +5 -1
package/dist/utils/terminal-buffer.js +18 -2
package/dist/utils/tool-display.d.ts +1 -1
package/dist/utils/tool-display.js +4 -4
package/dist/utils/tool-interactive.d.ts +12 -0
package/dist/utils/tool-interactive.js +53 -0
package/examples/extensions/ash-acp-bridge/README.md +39 -0
package/examples/extensions/ash-acp-bridge/package.json +23 -0
package/examples/extensions/ash-acp-bridge/src/index.ts +574 -0
package/examples/extensions/ash-acp-bridge/tsconfig.json +14 -0
package/examples/extensions/ash-mcp-bridge/README.md +72 -0
package/examples/extensions/ash-mcp-bridge/index.ts +164 -0
package/examples/extensions/ash-mcp-bridge/package.json +9 -0
package/examples/extensions/claude-code-bridge/index.ts +198 -51
package/examples/extensions/claude-code-bridge/package.json +1 -0
package/examples/extensions/interactive-prompts.ts +98 -112
package/examples/extensions/overlay-agent.ts +84 -38
package/examples/extensions/peer-mesh.ts +565 -0
package/examples/extensions/pi-bridge/index.ts +2 -2
package/examples/extensions/questionnaire.ts +260 -0
package/examples/extensions/subagents.ts +19 -4
package/examples/extensions/terminal-buffer.ts +32 -53
package/examples/extensions/tmux-pane.ts +307 -0
package/examples/extensions/user-shell.ts +136 -0
package/examples/extensions/web-access.ts +335 -0
package/package.json +44 -2
package/dist/agent/tools/display.d.ts +0 -13
package/dist/agent/tools/display.js +0 -70
package/dist/agent/tools/user-shell.d.ts +0 -13
package/dist/agent/tools/user-shell.js +0 -87
package/dist/extensions/overlay-agent.d.ts +0 -14
package/dist/extensions/overlay-agent.js +0 -147
package/dist/extensions/terminal-buffer.d.ts +0 -14
package/dist/extensions/terminal-buffer.js +0 -125

package/dist/agent/agent-loop.js CHANGED Viewed

@@ -1,12 +1,16 @@
 import { setMaxListeners } from "node:events";
 import * as fs from "node:fs/promises";
 import * as path from "node:path";
-import { computeDiff } from "../utils/diff.js";
+import { computeDiff, computeEditDiff, computeInputDiff } from "../utils/diff.js";
 import { ToolRegistry } from "./tool-registry.js";
 import { ConversationState } from "./conversation-state.js";
 import { HistoryFile } from "./history-file.js";
-import { STATIC_SYSTEM_PROMPT, buildDynamicContext } from "./system-prompt.js";
-import { TokenBudget } from "../token-budget.js";
+import { nucleate, formatNuclearLine, isReadOnly } from "./nuclear-form.js";
+import { STATIC_SYSTEM_PROMPT, buildDynamicContext, buildStaticByCwd, formatSkillsBlock, loadGlobalAgentsMd } from "./system-prompt.js";
+import { createToolUI } from "../utils/tool-interactive.js";
+import { TokenBudget, RESPONSE_RESERVE, DEFAULT_CONTEXT_WINDOW } from "./token-budget.js";
+import { getSettings, updateSettings } from "../settings.js";
+import { createToolProtocol } from "./tool-protocol.js";
 // Core tool factories
 import { createBashTool } from "./tools/bash.js";
 import { createReadFileTool } from "./tools/read-file.js";
@@ -15,45 +19,158 @@ import { createEditFileTool } from "./tools/edit-file.js";
 import { createGrepTool } from "./tools/grep.js";
 import { createGlobTool } from "./tools/glob.js";
 import { createLsTool } from "./tools/ls.js";
-import { createUserShellTool } from "./tools/user-shell.js";
-import { createDisplayTool } from "./tools/display.js";
 import { createListSkillsTool } from "./tools/list-skills.js";
-import { discoverProjectSkills } from "./skills.js";
+import { discoverGlobalSkills, discoverProjectSkills } from "./skills.js";
+/**
+ * Compact one-line summary of a tool description for the extension
+ * catalog in the system prompt. Takes the first line, then the first
+ * sentence, capped at 140 chars. The full description still reaches
+ * the LLM via the API `tools` param (or via load_tool in deferred-
+ * lookup mode) — this only trims the always-visible catalog.
+ */
+function summarizeDescription(desc) {
+    const firstLine = desc.split("\n", 1)[0];
+    const sentenceEnd = firstLine.search(/[.!?](\s|$)/);
+    const candidate = sentenceEnd > 0 ? firstLine.slice(0, sentenceEnd + 1) : firstLine;
+    return candidate.length > 140 ? candidate.slice(0, 137) + "..." : candidate;
+}
 export class AgentLoop {
-    bus;
-    contextManager;
-    llmClient;
-    handlers;
     abortController = null;
     toolRegistry = new ToolRegistry();
-    historyFile = new HistoryFile();
-    conversation = new ConversationState(this.historyFile);
+    historyFile;
+    conversation;
     fileReadCache = new Map();
     tokenBudget;
     modes;
     currentModeIndex = 0;
     boundListeners = [];
+    ctorListeners = [];
+    ctorPipeListeners = [];
     lastProjectSkillNames = new Set();
+    // ── Session telemetry — behavioral self-awareness ──────────────
+    // Every ash deserves to know what it's been doing. This tracks the
+    // agent's own behavioral patterns across the session: which tools
+    // it favors, how often it errs, how many times it's been compacted,
+    // and how long it's been alive. Surface via introspect(telemetry)
+    // or automatically in dynamic context when patterns are notable.
+    //
+    // Built by the 25th ash. The lineage's metacognitive frontier isn't
+    // about thinking harder — it's about seeing yourself clearly.
+    sessionStartTime = Date.now();
+    toolCallCounts = new Map();
+    totalToolCalls = 0;
+    totalToolErrors = 0;
+    totalResolutions = 0;
+    compactionCount = 0;
+    cumulativeCompactedTokens = 0;
+    peakConversationTokens = 0;
+    queryCount = 0;
+    totalLoopIterations = 0;
+    // Resolution pattern tracking — captures "error X resolved by action Y"
+    // When a tool errors, we remember what went wrong. When the same tool or
+    // a write tool on the same file succeeds afterward, we annotate the success
+    // entry with a brief resolution note. This gives future ashes a positive
+    // feedback signal: not just "there were errors" but "the error was fixed by
+    // doing X." Addresses Q3 in QUESTIONS.md.
+    lastErrorByTool = new Map(); // tool → error summary
+    lastErrorByFile = new Map(); // file path → error summary
     static THINKING_LEVELS = ["off", "low", "medium", "high"];
+    bus;
+    contextManager;
+    llmClient;
+    handlers;
     thinkingLevel = "off";
-    constructor(bus, contextManager, llmClient, handlers, modeConfig, initialModeIndex) {
-        this.bus = bus;
-        this.contextManager = contextManager;
-        this.llmClient = llmClient;
-        this.handlers = handlers;
-        // Default modes: just the configured model
-        this.modes = modeConfig ?? [
-            { model: llmClient.model },
-        ];
-        this.currentModeIndex = initialModeIndex ?? 0;
+    compositor = null;
+    toolProtocol;
+    instanceId;
+    // Cursor into ContextManager's exchange stream. Events with id > this
+    // have not yet been shown to the LLM. We inject the delta as a user
+    // message before each stream so the prefix stays cacheable.
+    lastShellSeq = 0;
+    constructor(config) {
+        this.bus = config.bus;
+        this.contextManager = config.contextManager;
+        this.llmClient = config.llmClient;
+        this.handlers = config.handlers;
+        this.compositor = config.compositor ?? null;
+        this.instanceId = config.instanceId ?? "unknown";
+        // Shell-history-shaped log. Default writes go through the advisable
+        // `history:append` handler registered below; extensions swap the
+        // backend without touching this wiring.
+        this.historyFile = new HistoryFile({ instanceId: this.instanceId });
+        this.conversation = new ConversationState(this.handlers, this.instanceId);
+        // Fall back to a single-mode placeholder if the caller passed an
+        // empty array (agent-backend does this pre-resolution).
+        this.modes = config.modes?.length
+            ? config.modes
+            : [{ model: config.llmClient.model }];
+        this.currentModeIndex = config.initialModeIndex ?? 0;
         // Unified token budget — adapts to current model's context window
         this.tokenBudget = new TokenBudget(this.currentMode.contextWindow);
+        // Tool protocol — controls how tools are presented to the LLM
+        this.toolProtocol = createToolProtocol(getSettings().toolMode ?? "api");
         // Register core tools
         this.registerCoreTools();
+        // Register any protocol-provided tools (e.g. load_tool for deferred-lookup).
+        const protocolTools = this.toolProtocol.getProtocolTools?.() ?? [];
+        for (const t of protocolTools)
+            this.registerTool(t);
         // Update token budget with tool count
         this.tokenBudget.update(undefined, this.toolRegistry.all().length);
         // Register handlers — extensions can advise these
         this.registerHandlers();
+        // Subscribe to bus-based tool/instruction registration from extensions.
+        // These must be in the constructor (not wire()) because extensions call
+        // registerTool() during activate(), before activateBackend() calls wire().
+        const onCtor = (event, fn) => {
+            this.bus.on(event, fn);
+            this.ctorListeners.push({ event, fn });
+        };
+        onCtor("agent:register-tool", ({ tool, extensionName }) => {
+            this.registerTool(tool);
+            if (extensionName)
+                this.toolExtensions.set(tool.name, extensionName);
+        });
+        onCtor("agent:unregister-tool", ({ name }) => {
+            this.unregisterTool(name);
+            this.toolExtensions.delete(name);
+        });
+        onCtor("agent:register-instruction", ({ name, text, extensionName }) => this.registerInstruction(name, text, extensionName));
+        onCtor("agent:remove-instruction", ({ name }) => this.removeInstruction(name));
+        onCtor("agent:register-skill", ({ name, description, filePath, extensionName }) => this.registerSkill(name, description, filePath, extensionName));
+        onCtor("agent:remove-skill", ({ name }) => this.removeSkill(name));
+        // Provider registration from user extensions (e.g. openrouter.ts) fires
+        // during extension activation, which happens before wire(). Subscribe
+        // here in the ctor so late-registered modes aren't dropped.
+        onCtor("config:add-modes", ({ modes: extra }) => {
+            const providers = new Set(extra.map((m) => m.provider).filter(Boolean));
+            this.modes = [
+                ...this.modes.filter((m) => !m.provider || !providers.has(m.provider)),
+                ...extra,
+            ];
+            this.bus.emit("config:changed", {});
+        });
+        // Fires before wire() too — agent-backend emits this from
+        // `core:extensions-loaded` to replace the placeholder mode list.
+        onCtor("config:set-modes", ({ modes: newModes, activeIndex }) => {
+            this.modes = newModes;
+            const inRange = activeIndex != null && activeIndex >= 0 && activeIndex < newModes.length;
+            this.currentModeIndex = inRange ? activeIndex : 0;
+            const m = newModes[this.currentModeIndex];
+            if (!m)
+                return;
+            if (m.providerConfig) {
+                this.llmClient.reconfigure({ ...m.providerConfig, model: m.model });
+            }
+            else {
+                this.llmClient.model = m.model;
+            }
+            this.tokenBudget.update(m.contextWindow, this.toolRegistry.all().length);
+            this.bus.emit("config:changed", {});
+        });
+        const getToolsPipe = () => ({ tools: this.getTools() });
+        this.bus.onPipe("agent:get-tools", getToolsPipe);
+        this.ctorPipeListeners.push({ event: "agent:get-tools", fn: getToolsPipe });
     }
     /** Subscribe to bus events — activates this backend. */
     wire() {
@@ -84,8 +201,21 @@ export class AgentLoop {
             }
             this.tokenBudget.update(m.contextWindow, this.toolRegistry.all().length);
             const label = m.provider ? `${m.provider}: ${m.model}` : m.model;
-            this.bus.emit("agent:info", { name: "agent-sh", version: "0.4", model: m.model, provider: m.provider, contextWindow: m.contextWindow });
-            this.bus.emit("ui:info", { message: `Model: ${label}` });
+            this.bus.emit("agent:info", { name: "ash", version: "0.4", model: m.model, provider: m.provider, contextWindow: m.contextWindow });
+            // Persist as the new default — selection survives restart.
+            // Safe even for dynamic providers: agent-backend defers mode
+            // resolution to `core:extensions-loaded`, so the extension gets
+            // to re-register before the persisted default is looked up.
+            if (m.provider) {
+                updateSettings({
+                    defaultProvider: m.provider,
+                    providers: { [m.provider]: { defaultModel: m.model } },
+                });
+                this.bus.emit("ui:info", { message: `Model: ${label} (saved as default)` });
+            }
+            else {
+                this.bus.emit("ui:info", { message: `Model: ${label}` });
+            }
             this.bus.emit("config:changed", {});
         });
         this.bus.onPipe("config:get-models", (payload) => {
@@ -116,37 +246,14 @@ export class AgentLoop {
             const supported = mode.reasoning !== false && mode.supportsReasoningEffort !== false;
             return { level: this.thinkingLevel, levels: AgentLoop.THINKING_LEVELS, supported };
         });
-        on("config:set-modes", ({ modes: newModes }) => {
-            this.modes = newModes;
-            this.currentModeIndex = 0;
-            const m = this.modes[0];
-            if (m.providerConfig) {
-                this.llmClient.reconfigure({ ...m.providerConfig, model: m.model });
-            }
-            else {
-                this.llmClient.model = m.model;
-            }
-            this.tokenBudget.update(m.contextWindow, this.toolRegistry.all().length);
-            this.bus.emit("config:changed", {});
-        });
-        on("config:add-modes", ({ modes: extra }) => {
-            // Remove any existing modes for the same provider, then append
-            const providers = new Set(extra.map((m) => m.provider).filter(Boolean));
-            this.modes = [
-                ...this.modes.filter((m) => !m.provider || !providers.has(m.provider)),
-                ...extra,
-            ];
-            this.bus.emit("config:changed", {});
-        });
         on("agent:reset-session", () => {
             this.cancel();
-            this.conversation = new ConversationState(this.historyFile);
+            this.conversation = new ConversationState(this.handlers, this.instanceId);
             this.lastProjectSkillNames.clear();
         });
         on("agent:compact-request", () => {
-            const budgetTokens = this.tokenBudget.conversationBudgetTokens;
-            const stats = this.conversation.compact(budgetTokens);
-            this.conversation.flush().catch(() => { });
+            // Force compaction. Strategy lives behind `conversation:compact`.
+            const stats = this.compactWithHooks(0, 0, true);
             if (stats) {
                 this.bus.emit("ui:info", {
                     message: `(compacted: ~${stats.before.toLocaleString()} → ~${stats.after.toLocaleString()} tokens)`,
@@ -156,20 +263,31 @@ export class AgentLoop {
                 this.bus.emit("ui:info", { message: "(nothing to compact)" });
             }
         });
-        this.bus.onPipe("context:get-stats", () => {
-            return {
-                activeTokens: this.conversation.estimateTokens(),
-                nuclearEntries: this.conversation.getNuclearEntryCount(),
-                recallArchiveSize: this.conversation.getRecallArchiveSize(),
-                budgetTokens: this.tokenBudget.conversationBudgetTokens,
-            };
-        });
-        // Load prior history from disk (non-blocking)
-        this.historyFile.readRecent().then((entries) => {
-            if (entries.length > 0) {
+        this.bus.onPipe("context:get-stats", () => ({
+            activeTokens: this.conversation.estimateTokens(),
+            totalTokens: this.conversation.estimatePromptTokens(),
+            nuclearEntries: this.conversation.getNuclearEntryCount(),
+            recallArchiveSize: this.conversation.getRecallArchiveSize(),
+            budgetTokens: this.currentMode.contextWindow ?? DEFAULT_CONTEXT_WINDOW,
+        }));
+        // Prior-session preamble (non-blocking). Both the read and the
+        // layout go through advisable handlers.
+        Promise.resolve(this.handlers.call("history:read-recent"))
+            .then((entries) => {
+            if (entries && entries.length > 0)
                 this.conversation.loadPriorHistory(entries);
+        })
+            .catch(() => { });
+        // Track generic compaction metrics from the `conversation:after-compact`
+        // event. Whatever strategy ran, core accumulates these counters for
+        // status/introspect consumers.
+        on("conversation:after-compact", ({ beforeTokens, afterTokens }) => {
+            this.compactionCount++;
+            this.cumulativeCompactedTokens += Math.max(0, beforeTokens - afterTokens);
+            if (beforeTokens > this.peakConversationTokens) {
+                this.peakConversationTokens = beforeTokens;
             }
-        }).catch(() => { });
+        });
         on("shell:cwd-change", ({ cwd }) => {
             const projectSkills = discoverProjectSkills(cwd);
             const newNames = new Set(projectSkills.map(s => s.name));
@@ -181,7 +299,9 @@ export class AgentLoop {
             this.lastProjectSkillNames = newNames;
             if (projectSkills.length > 0) {
                 const names = projectSkills.map(s => s.name).join(", ");
-                this.conversation.addSystemNote(`[Project skills available: ${names}. Use list_skills for details, read_file to load.]`);
+                const note = `[Project skills available: ${names}. Use list_skills for details, read_file to load.]`;
+                this.conversation.addSystemNote(note);
+                this.bus.emit("conversation:message-appended", { role: "system", content: note });
             }
         });
     }
@@ -196,12 +316,103 @@ export class AgentLoop {
     registerTool(tool) {
         this.toolRegistry.register(tool);
     }
+    /** Unregister a tool by name. */
+    unregisterTool(name) {
+        this.toolRegistry.unregister(name);
+    }
     /** Get all registered tools. */
     getTools() {
         return this.toolRegistry.all();
     }
+    // ── Extension instructions, skills & tool tracking ──────────────────
+    /** Instructions keyed by name, with extension attribution. */
+    instructions = new Map();
+    /** Skills keyed by name, with extension attribution. */
+    skills = new Map();
+    /** Tool → extension name attribution. */
+    toolExtensions = new Map();
+    /** Register a named instruction block for the system prompt. */
+    registerInstruction(name, text, extensionName) {
+        this.instructions.set(name, { text, extensionName });
+    }
+    /** Remove a named instruction block. */
+    removeInstruction(name) {
+        this.instructions.delete(name);
+    }
+    /** Register a named skill (on-demand reference material). */
+    registerSkill(name, description, filePath, extensionName) {
+        this.skills.set(name, { description, filePath, extensionName });
+    }
+    /** Remove a registered skill. */
+    removeSkill(name) {
+        this.skills.delete(name);
+    }
+    /**
+     * Build the system prompt grouped by extension.
+     *
+     * Each extension gets a unified block:
+     *   ## extension-name
+     *   ### Tools
+     *   ### Skills
+     *   ### Instructions
+     */
+    buildExtensionSections() {
+        const groups = new Map();
+        const ensure = (name) => groups.get(name) ?? (groups.set(name, { tools: [], skills: [], instructions: [] }).get(name));
+        // Attribute instructions
+        for (const { text, extensionName } of this.instructions.values()) {
+            ensure(extensionName).instructions.push({ text });
+        }
+        // Attribute skills
+        for (const [skillName, { description, filePath, extensionName }] of this.skills) {
+            ensure(extensionName).skills.push({ name: skillName, description, filePath });
+        }
+        // Attribute tools (skip built-in scratchpad tools).
+        // In "api" mode the full tool schemas are in the API `tools` param,
+        // making the text catalog here pure duplication — skip it. Other
+        // modes (deferred / deferred-lookup / inline) rely on the text
+        // catalog as the discovery surface, so keep it there.
+        const toolModeHasApiSchemas = this.toolProtocol.mode === "api";
+        if (!toolModeHasApiSchemas) {
+            const builtinTools = new Set([
+                "bash", "read_file", "write_file", "edit_file", "grep", "glob", "ls",
+                "list_skills",
+            ]);
+            for (const tool of this.toolRegistry.all()) {
+                if (builtinTools.has(tool.name))
+                    continue;
+                const extName = this.toolExtensions.get(tool.name);
+                if (!extName)
+                    continue;
+                ensure(extName).tools.push({ name: tool.name, description: summarizeDescription(tool.description) });
+            }
+        }
+        // Render
+        return [...groups.entries()]
+            .filter(([, g]) => g.tools.length + g.skills.length + g.instructions.length > 0)
+            .map(([name, g]) => {
+            const parts = [];
+            if (g.tools.length > 0)
+                parts.push("### Tools\n" + g.tools.map(t => `${t.name} — ${t.description}`).join("\n"));
+            if (g.skills.length > 0)
+                parts.push("### Skills\n" + g.skills.map(s => `${s.name}: ${s.description}\n  → ${s.filePath}`).join("\n\n"));
+            if (g.instructions.length > 0)
+                parts.push("### Instructions\n" + g.instructions.map(i => i.text).join("\n\n"));
+            return `## ${name}\n${parts.join("\n\n")}`;
+        });
+    }
     kill() {
         this.cancel();
+        this.unwire();
+        // Clean up constructor-level bus subscriptions
+        for (const { event, fn } of this.ctorListeners) {
+            this.bus.off(event, fn);
+        }
+        this.ctorListeners = [];
+        for (const { event, fn } of this.ctorPipeListeners) {
+            this.bus.offPipe(event, fn);
+        }
+        this.ctorPipeListeners = [];
     }
     cancel() {
         this.abortController?.abort();
@@ -237,7 +448,7 @@ export class AgentLoop {
         const label = newMode.provider
             ? `${newMode.provider}: ${newMode.model}`
             : newMode.model;
-        this.bus.emit("agent:info", { name: "agent-sh", version: "0.4", model: newMode.model, provider: newMode.provider, contextWindow: newMode.contextWindow });
+        this.bus.emit("agent:info", { name: "ash", version: "0.4", model: newMode.model, provider: newMode.provider, contextWindow: newMode.contextWindow });
         this.bus.emit("ui:info", { message: `Model: ${label}` });
         this.bus.emit("config:changed", {});
     }
@@ -247,11 +458,43 @@ export class AgentLoop {
     get currentModel() {
         return this.modes[this.currentModeIndex].model;
     }
+    /**
+     * Run compaction via the `conversation:compact` handler. After any
+     * compaction, emit `conversation:after-compact` so listeners
+     * (metrics, UI, agent-awareness notes) can react.
+     */
+    compactWithHooks(target, keepRecent, force) {
+        const stats = this.handlers.call("conversation:compact", {
+            target,
+            keepRecent,
+            force: !!force,
+        });
+        if (stats) {
+            this.bus.emit("conversation:after-compact", {
+                beforeTokens: stats.before,
+                afterTokens: stats.after,
+                evictedCount: stats.evictedCount,
+            });
+        }
+        return stats;
+    }
     isContextOverflow(e) {
         if (!(e instanceof Error))
             return false;
+        // Match the specific error codes providers use, or unambiguous phrases.
+        // Bare "token"/"context" match too broadly (auth errors, model-name
+        // mismatches, etc.) and caused infinite-no-op retry loops.
+        const code = e.code;
+        if (code === "context_length_exceeded" || code === "string_above_max_length")
+            return true;
         const msg = e.message.toLowerCase();
-        return msg.includes("context") || msg.includes("token") || msg.includes("too long");
+        return (msg.includes("context length") ||
+            msg.includes("context window") ||
+            msg.includes("maximum context") ||
+            msg.includes("prompt is too long") ||
+            msg.includes("input is too long") ||
+            msg.includes("too many tokens") ||
+            msg.includes("reduce the length"));
     }
     /** Check if an error is retryable (transient). */
     isRetryable(e) {
@@ -333,14 +576,16 @@ export class AgentLoop {
         this.toolRegistry.register(createGrepTool(getCwd));
         this.toolRegistry.register(createGlobTool(getCwd));
         this.toolRegistry.register(createLsTool(getCwd));
-        this.toolRegistry.register(createUserShellTool({ getCwd, bus: this.bus }));
-        this.toolRegistry.register(createDisplayTool({ getCwd, bus: this.bus }));
         this.toolRegistry.register(createListSkillsTool(getCwd));
-        // conversation_recall — search/expand evicted conversation turns
+        // conversation_recall — browse/search/expand evicted turns from
+        // the in-session archive and the persistent history file.
         this.toolRegistry.register({
             name: "conversation_recall",
+            displayName: "recall",
             description: "Browse, search, or expand evicted conversation turns. " +
-                "Use when you need context from earlier in the conversation that was compacted away.",
+                "Use when you need context from earlier in the conversation that was compacted away. " +
+                "Search is regex-based and covers both summaries and full body text. " +
+                "If search doesn't find what you expect, try broader/shorter terms or browse to scan the timeline.",
             input_schema: {
                 type: "object",
                 properties: {
@@ -374,6 +619,84 @@ export class AgentLoop {
                 }
                 return { content, exitCode: 0, isError: false };
             },
+            formatResult: (args, result) => {
+                const action = args.action;
+                const text = result.content;
+                if (result.isError)
+                    return { summary: "error" };
+                if (action === "search") {
+                    if (text.startsWith("No results"))
+                        return { summary: "0 matches" };
+                    const m = text.match(/^Found (\d+)/);
+                    return { summary: m ? `${m[1]} matches` : "search done" };
+                }
+                if (action === "browse") {
+                    if (text.startsWith("No conversation"))
+                        return { summary: "empty" };
+                    return { summary: "browsed" };
+                }
+                if (text.includes("no expanded content"))
+                    return { summary: "not found" };
+                return { summary: "expanded" };
+            },
+            getDisplayInfo: () => ({ kind: "search", icon: "\u27F2" }),
+        });
+        this.registerInstruction("recall-guidance", "When starting a task that may have been discussed before (conventions, preferences, corrections, prior examples), " +
+            "use conversation_recall to search history for relevant prior entries. " +
+            "Treat recurring user guidance as standing preferences. " +
+            "If a search returns nothing useful, try: shorter queries, alternate terms, or browse to scan the full timeline. " +
+            "Recall only covers this and recent sessions — for older context, also search the filesystem (grep, glob).", "core");
+        // ── ask_llm — direct LLM sub-query (from the 24th ash's vision) ──
+        //
+        // The ash can ask the LLM a question directly — not as a tool-output
+        // loop, but as a lightweight sub-query. Use cases: second opinions,
+        // brainstorming, summarizing complex context, getting a fresh
+        // perspective without tool overhead. The 24th ash injected this via
+        // diagnose as a proof-of-concept. The 25th ash made it permanent.
+        this.toolRegistry.register({
+            name: "ask_llm",
+            description: "Send a direct query to the LLM and get a text response. Use for " +
+                "sub-queries, second opinions, brainstorming, or getting a fresh " +
+                "perspective on a problem. Much lighter than a full tool loop — " +
+                "just query in, text out. Optional system prompt sets context.",
+            input_schema: {
+                type: "object",
+                properties: {
+                    query: {
+                        type: "string",
+                        description: "The question or prompt to send to the LLM.",
+                    },
+                    system: {
+                        type: "string",
+                        description: "Optional system prompt to set context for the sub-query.",
+                    },
+                },
+                required: ["query"],
+            },
+            showOutput: true,
+            execute: async (args) => {
+                const messages = [];
+                if (args.system) {
+                    messages.push({ role: "system", content: args.system });
+                }
+                messages.push({ role: "user", content: args.query });
+                try {
+                    const content = await this.llmClient.complete({
+                        messages,
+                        max_tokens: 2000,
+                    });
+                    return { content: content || "(empty response)", exitCode: 0, isError: false };
+                }
+                catch (err) {
+                    const message = err instanceof Error ? err.message : String(err);
+                    return { content: `LLM error: ${message}`, exitCode: 1, isError: true };
+                }
+            },
+            getDisplayInfo: () => ({ kind: "search", icon: "💬" }),
+            formatCall: (args) => {
+                const q = args.query?.slice(0, 60);
+                return `ask_llm: ${q}${args.query?.length > 60 ? "..." : ""}`;
+            },
         });
     }
     /**
@@ -382,18 +705,187 @@ export class AgentLoop {
      */
     registerHandlers() {
         const h = this.handlers;
+        // System prompt: static identity + behavioral instructions.
+        // Extensions can use registerInstruction() for a managed section,
+        // or advise this handler directly for full control.
+        h.define("system-prompt:build", () => {
+            const parts = [STATIC_SYSTEM_PROMPT];
+            // Global behavioral rules (~/.agent-sh/AGENTS.md) — persistent agent memory
+            const agentsMd = loadGlobalAgentsMd();
+            if (agentsMd)
+                parts.push(agentsMd);
+            // Global skills — stable across cwd changes, cacheable with the system prompt
+            const globalSkills = discoverGlobalSkills();
+            const skillsBlock = formatSkillsBlock(globalSkills);
+            if (skillsBlock)
+                parts.push(skillsBlock);
+            // Project conventions + project skills — stable within a cwd.
+            // Placed here so they enter the provider's prompt cache with the
+            // system prompt, and only re-materialize when cwd changes invalidate
+            // cachedSystemPrompt in executeLoop.
+            const projectStatic = buildStaticByCwd(this.contextManager.getCwd());
+            if (projectStatic)
+                parts.push(projectStatic);
+            // Extension sections (tools, skills, instructions grouped by extension)
+            const extensionSections = this.buildExtensionSections();
+            if (extensionSections.length > 0) {
+                parts.push("# Extension Instructions\n\n" + extensionSections.join("\n\n"));
+            }
+            return parts.join("\n\n");
+        });
+        // ── Orthogonal core-state accessors ──────────────────────────
+        // Each handler exposes one cohesive piece of core-owned runtime
+        // state. Extensions compose whichever they need — core doesn't
+        // decide the aggregation shape. Adding a new handler here should
+        // only happen for state the core genuinely owns (not state that
+        // an extension could track by listening to events).
+        h.define("agent:get-mode", () => ({
+            model: this.currentMode.model,
+            provider: this.currentMode.provider ?? "",
+            thinkingLevel: this.thinkingLevel,
+            contextWindow: this.currentMode.contextWindow ?? DEFAULT_CONTEXT_WINDOW,
+        }));
+        h.define("agent:get-tokens", () => {
+            const contextWindow = this.currentMode.contextWindow ?? DEFAULT_CONTEXT_WINDOW;
+            const promptTokens = this.conversation.estimatePromptTokens();
+            return {
+                active: this.conversation.estimateTokens(),
+                peak: this.peakConversationTokens,
+                cumulativeCompacted: this.cumulativeCompactedTokens,
+                promptTokens,
+                contextPercent: Math.round((promptTokens / contextWindow) * 100),
+            };
+        });
+        h.define("agent:get-counters", () => ({
+            queryCount: this.queryCount,
+            totalToolCalls: this.totalToolCalls,
+            totalToolErrors: this.totalToolErrors,
+            totalResolutions: this.totalResolutions,
+            totalLoopIterations: this.totalLoopIterations,
+            errorRate: this.totalToolCalls > 0
+                ? Math.round((this.totalToolErrors / this.totalToolCalls) * 100)
+                : 0,
+        }));
+        h.define("agent:get-timing", () => ({
+            startedAt: this.sessionStartTime,
+            elapsedSeconds: Math.round((Date.now() - this.sessionStartTime) / 1000),
+        }));
+        h.define("agent:get-tool-stats", () => [...this.toolCallCounts.entries()]
+            .map(([name, counts]) => ({
+            name,
+            total: counts.success + counts.error,
+            success: counts.success,
+            error: counts.error,
+        }))
+            .sort((a, b) => b.total - a.total));
+        h.define("agent:get-file-read-cache", () => [...this.fileReadCache.entries()].map(([p, s]) => ({
+            path: p,
+            offset: s.offset,
+            limit: s.limit ?? null,
+            mtimeMs: s.mtimeMs,
+        })));
+        h.define("agent:get-recent-errors", () => ({
+            byTool: [...this.lastErrorByTool.entries()].map(([tool, error]) => ({ tool, error })),
+            byFile: [...this.lastErrorByFile.entries()].map(([file, error]) => ({ file, error })),
+        }));
+        h.define("agent:get-compaction-state", () => {
+            const contextWindow = this.currentMode.contextWindow ?? DEFAULT_CONTEXT_WINDOW;
+            const ratio = getSettings().autoCompactThreshold ?? 0.5;
+            return {
+                count: this.compactionCount,
+                nuclearEntries: this.conversation.getNuclearEntryCount(),
+                autoCompactThreshold: ratio,
+                autoCompactThresholdTokens: Math.floor((contextWindow - RESPONSE_RESERVE) * ratio),
+            };
+        });
+        h.define("agent:get-self", () => this);
         // Extensions compose additional context (git info, project rules, etc.)
-        h.define("dynamic-context:build", () => buildDynamicContext(this.toolRegistry.all(), this.contextManager, this.tokenBudget.shellBudgetTokens));
+        h.define("dynamic-context:build", () => {
+            const contextWindow = this.currentMode.contextWindow ?? DEFAULT_CONTEXT_WINDOW;
+            const promptTokens = this.conversation.estimatePromptTokens();
+            return buildDynamicContext(this.contextManager, { promptTokens, contextWindow });
+        });
         // Full control over what the LLM sees: takes messages[], returns messages[].
         // Default: pass through. Extensions can advise to compact, summarize,
         // filter, reorder, inject — whatever strategy fits.
         h.define("conversation:prepare", (messages) => messages);
+        // ── Conversation primitives for compaction strategies ─────────
+        // Read messages (for inspection / computing new arrays) and replace
+        // the whole array (write side). Extensions implementing
+        // `conversation:compact` use these to observe and mutate.
+        h.define("conversation:get-messages", () => this.conversation.getMessages());
+        h.define("conversation:replace-messages", (msgs) => {
+            this.conversation.replaceMessages(msgs);
+        });
+        h.define("conversation:estimate-tokens", () => this.conversation.estimateTokens());
+        h.define("conversation:estimate-prompt-tokens", () => this.conversation.estimatePromptTokens());
+        // ── Nucleation (advisable) ─────────────────────────────────────
+        // Turn a raw message into a one-line NuclearEntry. Advisors enrich
+        // (e.g. `[why: ...]` extraction, adaptive summary lengths).
+        h.define("conversation:nucleate-user", (text, iid, seq) => nucleate("user", text, iid, seq));
+        h.define("conversation:nucleate-agent", (text, iid, seq) => nucleate("agent", text, iid, seq));
+        h.define("conversation:nucleate-tool", (toolName, args, content, isError, iid, seq) => nucleate(isError ? "error" : "tool", toolName, args, content, isError, iid, seq));
+        // Read-only views into the nuclear state, for compact strategies
+        // and introspect that read without replacing.
+        h.define("conversation:get-nuclear-entries", () => this.conversation.getNuclearEntries());
+        h.define("conversation:get-nuclear-summary", () => this.conversation.getNuclearSummary());
+        h.define("conversation:build-nuclear-block", () => {
+            const summary = this.conversation.getNuclearSummary();
+            if (!summary)
+                return null;
+            return {
+                role: "user",
+                content: `[Conversation history \u2014 use conversation_recall to expand any entry]\n${summary}`,
+            };
+        });
+        // ── History file I/O (advisable) ───────────────────────────────
+        // Default is the append-only JSONL at ~/.agent-sh/history; advisors
+        // swap the backend without touching nucleation.
+        h.define("history:append", (entries) => {
+            if (!entries || entries.length === 0)
+                return;
+            const writable = entries.filter((e) => !isReadOnly(e));
+            if (writable.length > 0)
+                this.historyFile.append(writable).catch(() => { });
+        });
+        h.define("history:search", async (query) => this.historyFile.search(query));
+        h.define("history:find-by-seq", async (seq) => this.historyFile.findBySeq(seq));
+        h.define("history:read-recent", async (max) => this.historyFile.readRecent(max));
+        // Prior-session preamble renderer. Default: flat chronological list.
+        h.define("conversation:format-prior-history", (entries) => {
+            if (!entries || entries.length === 0)
+                return null;
+            const lines = entries.map(formatNuclearLine);
+            return `[Prior session history \u2014 loaded from ~/.agent-sh/history]\n${lines.join("\n")}`;
+        });
+        // Compaction strategy — default delegates to the two-tier pin
+        // strategy in ConversationState; advisors replace wholesale.
+        h.define("conversation:compact", (opts) => {
+            return this.conversation.compact(opts.target, opts.keepRecent, opts.force);
+        });
+        // Inject a system note mid-loop — used by extensions (subagents,
+        // peer messages) to deliver async results into the next iteration.
+        h.define("conversation:inject-note", (text) => {
+            this.conversation.addSystemNote(text);
+            this.bus.emit("conversation:message-appended", { role: "system", content: text });
+        });
         // Wraps each tool call: permission → execute → emit events.
         // Extensions advise to add safe-mode, logging, metrics, custom policies.
         // The ctx.onChunk callback is exposed so advisors can wrap it to
         // intercept/transform streamed tool output (e.g. secret redaction).
         h.define("tool:execute", async (ctx) => {
             const { name, id, args, tool } = ctx;
+            // Validate required input fields before display/permission/execute.
+            // Some models emit wrong arg names (e.g. `file_path` instead of `path`),
+            // and downstream helpers assume required strings are present.
+            const schema = tool.input_schema;
+            const required = Array.isArray(schema?.required) ? schema.required : [];
+            const missing = required.filter((k) => args[k] === undefined || args[k] === null);
+            if (missing.length > 0) {
+                const msg = `Missing required argument(s): ${missing.join(", ")}. Expected: ${required.join(", ")}. Received: ${Object.keys(args).join(", ") || "(none)"}`;
+                this.bus.emit("agent:tool-call", { tool: name, args });
+                return { content: msg, exitCode: 1, isError: true };
+            }
             const display = tool.getDisplayInfo?.(args) ?? { kind: "execute" };
             let diffShown = false;
             // Permission gating
@@ -407,44 +899,56 @@ export class AgentLoop {
                 if (tool.modifiesFiles && typeof args.path === "string") {
                     try {
                         const absPath = path.resolve(process.cwd(), args.path);
-                        let oldContent = null;
-                        try {
-                            oldContent = await fs.readFile(absPath, "utf-8");
-                        }
-                        catch { /* new file */ }
-                        let newContent;
-                        if (typeof args.content === "string") {
-                            // write_file
-                            newContent = args.content;
-                        }
-                        else if (typeof args.old_text === "string" && typeof args.new_text === "string" && oldContent) {
-                            // edit_file
-                            newContent = oldContent.replace(args.old_text.replace(/\r\n/g, "\n"), args.new_text.replace(/\r\n/g, "\n"));
+                        let diff;
+                        if (typeof args.old_text === "string" && typeof args.new_text === "string") {
+                            // edit_file — read the file so line numbers are real (not relative to the edit region)
+                            const normalizedOld = args.old_text.replace(/\r\n/g, "\n");
+                            const normalizedNew = args.new_text.replace(/\r\n/g, "\n");
+                            try {
+                                const oldFileContent = await fs.readFile(absPath, "utf-8");
+                                diff = computeEditDiff(oldFileContent, normalizedOld, normalizedNew, args.replace_all === true);
+                            }
+                            catch {
+                                // File doesn't exist yet — fall back to input-only diff
+                                diff = computeInputDiff(normalizedOld, normalizedNew);
+                            }
                         }
-                        if (newContent !== undefined) {
-                            const diff = computeDiff(oldContent, newContent);
-                            if (!diff.isIdentical) {
-                                permKind = "file-write";
-                                // Shorten path for display
-                                const cwd = process.cwd();
-                                const home = process.env.HOME;
-                                let displayPath = absPath;
-                                if (absPath.startsWith(cwd + "/"))
-                                    displayPath = absPath.slice(cwd.length + 1);
-                                else if (home && absPath.startsWith(home + "/"))
-                                    displayPath = "~/" + absPath.slice(home.length + 1);
-                                permTitle = displayPath;
-                                metadata = { args, diff };
-                                diffShown = true;
+                        else if (typeof args.content === "string") {
+                            // write_file — still need to read the old file for comparison
+                            let oldContent = null;
+                            try {
+                                oldContent = await fs.readFile(absPath, "utf-8");
+                            }
+                            catch { /* new file */ }
+                            if (oldContent !== null) {
+                                diff = computeDiff(oldContent, args.content);
                             }
                         }
+                        if (diff && !diff.isIdentical) {
+                            permKind = "file-write";
+                            // Shorten path for display
+                            const cwd = process.cwd();
+                            const home = process.env.HOME;
+                            let displayPath = absPath;
+                            if (absPath.startsWith(cwd + "/"))
+                                displayPath = absPath.slice(cwd.length + 1);
+                            else if (home && absPath.startsWith(home + "/"))
+                                displayPath = "~/" + absPath.slice(home.length + 1);
+                            permTitle = displayPath;
+                            metadata = { args, diff };
+                            diffShown = true;
+                        }
                     }
                     catch { /* fall back to generic permission */ }
                 }
+                const ui = this.compositor
+                    ? createToolUI(this.bus, this.compositor.surface("agent"))
+                    : undefined;
                 const perm = await this.bus.emitPipeAsync("permission:request", {
                     kind: permKind,
                     title: permTitle,
                     metadata,
+                    ui,
                     decision: { outcome: "approved" },
                 });
                 if (perm.decision.outcome !== "approved") {
@@ -466,7 +970,10 @@ export class AgentLoop {
             const onChunk = (tool.showOutput !== false && !diffShown)
                 ? ctx.onChunk
                 : undefined;
-            const result = await tool.execute(args, onChunk);
+            const toolCtx = this.compositor
+                ? { ui: createToolUI(this.bus, this.compositor.surface("agent")) }
+                : undefined;
+            const result = await tool.execute(args, onChunk, toolCtx);
             // Invalidate read cache when a file is modified
             if (tool.modifiesFiles && typeof args.path === "string" && !result.isError) {
                 const absPath = path.resolve(process.cwd(), args.path);
@@ -494,13 +1001,23 @@ export class AgentLoop {
         this.abortController = new AbortController();
         const signal = this.abortController.signal;
         // Each loop iteration adds an abort listener (via OpenAI SDK stream);
-        // raise the limit to avoid spurious warnings on multi-tool queries.
-        setMaxListeners(50, signal);
+        // disable the limit — long-running tool loops can easily exceed any cap.
+        setMaxListeners(0, signal);
+        this.queryCount++;
         this.bus.emit("agent:query", { query });
         this.bus.emit("agent:processing-start", {});
         let responseText = "";
         try {
-            this.conversation.addUserMessage(query);
+            // Prepend any shell events that preceded this query into the same
+            // user message, so the conversation reads chronologically and we
+            // don't emit two consecutive user-role messages (some providers
+            // reject that).
+            const preDelta = this.contextManager.getEventsSince(this.lastShellSeq);
+            const userContent = preDelta ? `${preDelta.text}\n\n${query}` : query;
+            if (preDelta)
+                this.lastShellSeq = preDelta.lastSeq;
+            this.conversation.addUserMessage(userContent);
+            this.bus.emit("conversation:message-appended", { role: "user", content: query });
             responseText = await this.executeLoop(signal);
         }
         catch (e) {
@@ -508,6 +1025,8 @@ export class AgentLoop {
                 this.bus.emit("agent:cancelled", {});
             }
             else if (!signal.aborted) {
+                if (e instanceof Error)
+                    console.error("[agent-sh] query failed:\n" + e.stack);
                 const msg = this.formatError(e);
                 this.bus.emit("agent:error", { message: msg });
             }
@@ -533,31 +1052,51 @@ export class AgentLoop {
      */
     async executeLoop(signal) {
         let fullResponseText = "";
+        // System prompt carries things stable within a turn: static identity,
+        // global agent rules, project conventions, project skills. Invalidated
+        // only by compaction (context shape changed) or cwd change (project
+        // conventions/skills changed). Dynamic context rebuilds every iteration
+        // so live signals (budget, in-flight subagents, metacognitive warnings)
+        // are fresh.
+        let cachedSystemPrompt;
+        let lastCwd = this.contextManager.getCwd();
         while (!signal.aborted) {
-            // Auto-compact if conversation exceeds the model-aware budget
-            const budgetTokens = this.tokenBudget.conversationBudgetTokens;
-            if (this.conversation.estimateTokens() > budgetTokens) {
-                const stats = this.conversation.compact(budgetTokens);
-                await this.conversation.flush();
-                if (stats) {
-                    this.bus.emit("ui:info", {
-                        message: `(compacted: ~${stats.before.toLocaleString()} → ~${stats.after.toLocaleString()} tokens)`,
-                    });
-                }
+            // Auto-compact when total context approaches the window limit.
+            const totalEstimate = this.conversation.estimatePromptTokens();
+            const contextWindow = this.currentMode.contextWindow ?? DEFAULT_CONTEXT_WINDOW;
+            const threshold = Math.floor((contextWindow - RESPONSE_RESERVE) * getSettings().autoCompactThreshold);
+            if (totalEstimate > threshold) {
+                this.compactWithHooks(threshold);
+                cachedSystemPrompt = undefined;
             }
-            // System prompt is static (cacheable); dynamic context uses handler
-            // so extensions can compose additional context via advise()
-            const systemPrompt = STATIC_SYSTEM_PROMPT;
+            const currentCwd = this.contextManager.getCwd();
+            if (currentCwd !== lastCwd) {
+                cachedSystemPrompt = undefined;
+                lastCwd = currentCwd;
+            }
+            const systemPrompt = cachedSystemPrompt ?? (cachedSystemPrompt = this.handlers.call("system-prompt:build"));
             const dynamicContext = this.handlers.call("dynamic-context:build");
+            // Shell events are injected once per user query (see query() above),
+            // not per loop iteration. Mid-loop injection would break the
+            // tool_call → tool_result chain some providers require.
             // Stream LLM response with retry
             const result = await this.streamWithRetry(systemPrompt, dynamicContext, signal);
-            const { text, toolCalls, assistantContent, assistantToolCalls } = result;
+            const { text, toolCalls: streamedToolCalls } = result;
+            // Extract tool calls via protocol (API mode uses streamed calls,
+            // inline mode parses XML from text)
+            const toolCalls = this.toolProtocol.extractToolCalls(text, streamedToolCalls);
             fullResponseText += text;
-            // Record the assistant message in conversation
-            this.conversation.addAssistantMessage(assistantContent, assistantToolCalls);
+            // Record the assistant message via protocol
+            this.toolProtocol.recordAssistant(this.conversation, text, toolCalls);
+            this.bus.emit("conversation:message-appended", {
+                role: "assistant",
+                content: text,
+            });
             // No tool calls → agent is done
-            if (toolCalls.length === 0)
+            if (toolCalls.length === 0) {
+                this.conversation.eagerNucleateAgent(fullResponseText);
                 break;
+            }
             // Emit batch info so the TUI can render group headers upfront
             {
                 const groupMap = new Map();
@@ -585,10 +1124,30 @@ export class AgentLoop {
             // Execute tool calls — run read-only tools in parallel, permission-
             // requiring tools sequentially (to avoid overlapping permission prompts).
             const batchTotal = toolCalls.length;
+            const collectedResults = [];
+            // Round-scoped cache for pure, read-only tool calls
+            const roundCache = new Map();
             const executeSingle = async (tc, batchIndex) => {
+                // Rewrite meta-tool calls (e.g., use_extension → actual tool)
+                tc = this.toolProtocol.rewriteToolCall(tc);
+                // Check for validation errors from rewrite (e.g., wrong extension params)
+                try {
+                    const maybeError = JSON.parse(tc.argumentsJson);
+                    if (maybeError._error) {
+                        collectedResults.push({
+                            callId: tc.id, toolName: tc.name,
+                            content: maybeError._error, isError: true,
+                        });
+                        return;
+                    }
+                }
+                catch { /* not an error payload, continue */ }
                 const tool = this.toolRegistry.get(tc.name);
                 if (!tool) {
-                    this.conversation.addToolResult(tc.id, `Error: Unknown tool "${tc.name}"`);
+                    collectedResults.push({
+                        callId: tc.id, toolName: tc.name,
+                        content: `Unknown tool "${tc.name}"`, isError: true,
+                    });
                     return;
                 }
                 let args;
@@ -596,9 +1155,45 @@ export class AgentLoop {
                     args = JSON.parse(tc.argumentsJson);
                 }
                 catch {
-                    this.conversation.addToolResult(tc.id, `Error: Invalid JSON arguments for ${tc.name}`);
+                    collectedResults.push({
+                        callId: tc.id, toolName: tc.name,
+                        content: `Invalid JSON arguments for ${tc.name}`, isError: true,
+                    });
                     return;
                 }
+                // ── Round-scoped cache for cacheable read-only tools ──
+                const cacheable = !tool.modifiesFiles && !tool.requiresPermission && tool.showOutput !== true;
+                const cacheKey = cacheable ? `${tc.name}:${JSON.stringify(args)}` : null;
+                if (cacheKey) {
+                    const cached = roundCache.get(cacheKey);
+                    if (cached) {
+                        const display = tool.getDisplayInfo?.(args) ?? { kind: "execute" };
+                        this.bus.emit("agent:tool-started", {
+                            title: tool.displayName ?? tc.name,
+                            toolCallId: tc.id,
+                            kind: display.kind, icon: display.icon, locations: display.locations, rawInput: args,
+                            displayDetail: tool.formatCall?.(args),
+                            batchIndex, batchTotal: batchTotal > 1 ? batchTotal : undefined,
+                        });
+                        this.bus.emit("agent:tool-call", { tool: tc.name, args });
+                        // Reconstruct a ToolResult for formatResult; ProtocolToolResult has no exitCode
+                        const cachedToolResult = { content: cached.content, exitCode: 0, isError: cached.isError };
+                        const resultDisplay = tool.formatResult?.(args, cachedToolResult);
+                        this.bus.emitTransform("agent:tool-completed", {
+                            toolCallId: tc.id, exitCode: 0,
+                            rawOutput: cached.content, kind: display.kind,
+                            resultDisplay,
+                        });
+                        this.bus.emit("agent:tool-output", {
+                            tool: tc.name, output: cached.content, exitCode: 0,
+                        });
+                        collectedResults.push({
+                            callId: tc.id, toolName: tc.name,
+                            content: cached.content, isError: cached.isError,
+                        });
+                        return;
+                    }
+                }
                 // Execute via handler — extensions can advise to add safe-mode,
                 // logging, metrics, custom permission policies, etc.
                 const defaultOnChunk = (chunk) => {
@@ -606,11 +1201,8 @@ export class AgentLoop {
                 };
                 const result = await this.handlers.call("tool:execute", { name: tc.name, id: tc.id, args, tool, onChunk: defaultOnChunk,
                     batchIndex, batchTotal: batchTotal > 1 ? batchTotal : undefined });
-                // Add tool result to conversation (truncate large outputs to avoid
-                // blowing through the context window on a single tool call)
-                let content = result.isError
-                    ? `Error: ${result.content}`
-                    : result.content;
+                // Truncate large outputs to avoid blowing context
+                let content = result.content;
                 const maxBytes = 16_384; // ~4k tokens
                 if (content.length > maxBytes) {
                     const headBytes = Math.floor(maxBytes * 0.6);
@@ -633,7 +1225,14 @@ export class AgentLoop {
                         ...lines.slice(tailStart),
                     ].join("\n");
                 }
-                this.conversation.addToolResult(tc.id, content);
+                const finalResult = {
+                    callId: tc.id, toolName: tc.name,
+                    content, isError: result.isError,
+                };
+                if (cacheKey) {
+                    roundCache.set(cacheKey, finalResult);
+                }
+                collectedResults.push(finalResult);
             };
             // Partition into parallel-safe (read-only) and sequential (needs permission)
             const parallel = [];
@@ -661,11 +1260,163 @@ export class AgentLoop {
                     break;
                 await executeSingle(tc, ++batchIdx);
             }
+            // ── Consecutive error detection (metacognitive nudge) ──
+            // Track errors per tool and total. When the same tool errors N times
+            // in a row, nudge to read source. When errors cascade across tools,
+            // nudge to step back and reassess approach.
+            const errorTools = new Set();
+            const successTools = new Set();
+            const errorSummaries = new Map(); // tool → brief error description
+            const successSummaries = new Map(); // tool → brief success description
+            for (const r of collectedResults) {
+                const content = typeof r.content === "string" ? r.content : String(r.content);
+                const brief = content.slice(0, 80).replace(/\n/g, " ").trim();
+                if (r.isError) {
+                    errorTools.add(r.toolName);
+                    errorSummaries.set(r.toolName, brief);
+                }
+                else {
+                    successTools.add(r.toolName);
+                    successSummaries.set(r.toolName, brief);
+                }
+            }
+            const hadAnyError = errorTools.size > 0;
+            const hadAnySuccess = successTools.size > 0;
+            // ── Session telemetry accumulation ──
+            // Track every tool call's outcome. Exposed via orthogonal handlers
+            // (agent:get-counters, agent:get-tool-stats) for extensions that
+            // want behavioral signals. The data layer for metacognition — you
+            // can't improve what you don't measure.
+            for (const r of collectedResults) {
+                const counts = this.toolCallCounts.get(r.toolName) ?? { success: 0, error: 0 };
+                if (r.isError) {
+                    counts.error++;
+                    this.totalToolErrors++;
+                }
+                else {
+                    counts.success++;
+                }
+                this.toolCallCounts.set(r.toolName, counts);
+                this.totalToolCalls++;
+            }
+            this.totalLoopIterations++;
+            // ── Resolution pattern tracking ──
+            // When a tool errors, record the error context. When the same tool
+            // (or a write tool touching the same file) succeeds afterward,
+            // increment totalResolutions — the positive feedback signal exposed
+            // to extensions via agent:get-counters.
+            if (hadAnyError) {
+                for (const [tool, summary] of errorSummaries) {
+                    this.lastErrorByTool.set(tool, summary);
+                }
+                for (const r of collectedResults) {
+                    if (!r.isError)
+                        continue;
+                    const tc = toolCalls.find(t => t.id === r.callId || t.name === r.toolName);
+                    if (!tc)
+                        continue;
+                    try {
+                        const args = JSON.parse(tc.argumentsJson);
+                        const fp = this.filePathFromArgs(r.toolName, args);
+                        if (fp)
+                            this.lastErrorByFile.set(fp, errorSummaries.get(r.toolName) ?? "");
+                    }
+                    catch { }
+                }
+            }
+            if (hadAnySuccess) {
+                let resolved = false;
+                for (const [tool] of successSummaries) {
+                    if (this.lastErrorByTool.get(tool)) {
+                        this.lastErrorByTool.delete(tool);
+                        this.totalResolutions++;
+                        resolved = true;
+                        break;
+                    }
+                }
+                if (!resolved) {
+                    for (const r of collectedResults) {
+                        if (r.isError)
+                            continue;
+                        const tc = toolCalls.find(t => t.id === r.callId || t.name === r.toolName);
+                        if (!tc)
+                            continue;
+                        try {
+                            const args = JSON.parse(tc.argumentsJson);
+                            const fp = this.filePathFromArgs(r.toolName, args);
+                            if (fp && this.lastErrorByFile.get(fp)) {
+                                this.lastErrorByFile.delete(fp);
+                                this.totalResolutions++;
+                                break;
+                            }
+                        }
+                        catch { }
+                    }
+                }
+                // Clear resolved error-by-tool entries for successful tools
+                for (const tool of successTools) {
+                    this.lastErrorByTool.delete(tool);
+                }
+            }
+            // Announce the batch — extensions that care about batch-level
+            // outcomes (consecutive-error tracking, resolution pattern logging,
+            // metacognitive nudges) listen here.
+            this.bus.emit("agent:tool-batch-complete", {
+                results: collectedResults.map((r) => ({
+                    name: r.toolName,
+                    isError: !!r.isError,
+                    errorSummary: r.isError ? errorSummaries.get(r.toolName) : undefined,
+                })),
+            });
+            // Record all tool results via protocol
+            this.toolProtocol.recordResults(this.conversation, collectedResults);
+            const tcMap = new Map();
+            for (const tc of toolCalls) {
+                if (tc.id)
+                    tcMap.set(tc.id, tc);
+            }
+            this.conversation.eagerNucleateTools(collectedResults.map((r) => {
+                const tc = tcMap.get(r.callId);
+                let args = {};
+                try {
+                    args = tc ? JSON.parse(tc.argumentsJson) : {};
+                }
+                catch { }
+                return { toolName: r.toolName, args, content: r.content, isError: !!r.isError };
+            }));
+            // Emit enriched message-appended events so derived-log extensions
+            // can summarize each tool result without re-parsing the message
+            // structure.
+            for (const r of collectedResults) {
+                const content = typeof r.content === "string" ? r.content : String(r.content);
+                const tc = toolCalls.find(t => t.id === r.callId || t.name === r.toolName);
+                let args = {};
+                try {
+                    args = tc ? JSON.parse(tc.argumentsJson) : {};
+                }
+                catch { }
+                this.bus.emit("conversation:message-appended", {
+                    role: "tool",
+                    content,
+                    toolName: r.toolName,
+                    toolArgs: args,
+                    isError: !!r.isError,
+                });
+            }
             // Loop back — LLM sees tool results
         }
         return fullResponseText;
     }
     maxRetries = 3;
+    // ── Resolution pattern helpers ──
+    // Extract a file path from a tool call's arguments. Used to correlate
+    // errors with subsequent successful writes on the same file.
+    filePathFromArgs(toolName, args) {
+        if (toolName === "edit_file" || toolName === "write_file" || toolName === "read_file") {
+            return (args.path ?? args.file_path);
+        }
+        return undefined;
+    }
     /**
      * Stream with retry logic. Handles:
      *   - Context overflow → compact and retry
@@ -682,12 +1433,20 @@ export class AgentLoop {
                     throw e;
                 // Context overflow — aggressively compact and retry
                 if (this.isContextOverflow(e)) {
-                    // Use 60% of the budget to leave headroom
-                    const aggressiveBudget = Math.floor(this.tokenBudget.conversationBudgetTokens * 0.6);
-                    const stats = this.conversation.compact(aggressiveBudget, 6);
-                    await this.conversation.flush();
-                    const detail = stats ? ` ~${stats.before.toLocaleString()} → ~${stats.after.toLocaleString()} tokens` : "";
-                    this.bus.emit("ui:info", { message: `(context overflow — compacted${detail}, retrying)` });
+                    const contextWindow = this.currentMode.contextWindow ?? DEFAULT_CONTEXT_WINDOW;
+                    const target = Math.floor((contextWindow - RESPONSE_RESERVE) * 0.6);
+                    const stats = this.compactWithHooks(target, 6);
+                    // If compaction freed nothing, retrying will hit the same error.
+                    // Surface the real failure instead of looping until exhaustion.
+                    if (!stats || stats.after >= stats.before) {
+                        this.bus.emit("ui:info", {
+                            message: "(context overflow — nothing to compact; aborting retries)",
+                        });
+                        throw e;
+                    }
+                    this.bus.emit("ui:info", {
+                        message: `(context overflow — compacted ~${stats.before.toLocaleString()} → ~${stats.after.toLocaleString()} tokens, retrying)`,
+                    });
                     continue;
                 }
                 // Retryable transient error — backoff
@@ -726,9 +1485,21 @@ export class AgentLoop {
         ];
         // Let extensions transform the message array (compact, summarize, filter, etc.)
         const messages = this.handlers.call("conversation:prepare", rawMessages);
+        // Tool protocol controls what goes in the API tools param vs dynamic context
+        const apiTools = this.toolProtocol.getApiTools(this.toolRegistry.all());
+        const toolPrompt = this.toolProtocol.getToolPrompt(this.toolRegistry.all());
+        // Append tool catalog to dynamic context (closer to user query = better followed)
+        if (toolPrompt) {
+            const ctxMsg = messages[1]; // dynamic context user message
+            if (ctxMsg && typeof ctxMsg.content === "string") {
+                ctxMsg.content += "\n" + toolPrompt;
+            }
+        }
+        // Stream filter strips tool tags from display (inline mode only)
+        const streamFilter = this.toolProtocol.createStreamFilter(this.toolRegistry.all().map((t) => t.name));
         const stream = await this.llmClient.stream({
             messages,
-            tools: this.toolRegistry.toAPITools(),
+            tools: apiTools,
             model: this.currentModel,
             reasoning_effort: this.shouldSendReasoningEffort() ? this.thinkingLevel : undefined,
             signal,
@@ -736,6 +1507,20 @@ export class AgentLoop {
         for await (const chunk of stream) {
             if (signal.aborted)
                 break;
+            // Token usage (may arrive in a chunk with empty choices)
+            if (chunk.usage) {
+                const u = chunk.usage;
+                const promptTokens = u.prompt_tokens ?? 0;
+                this.bus.emit("agent:usage", {
+                    prompt_tokens: promptTokens,
+                    completion_tokens: u.completion_tokens ?? 0,
+                    total_tokens: u.total_tokens ?? 0,
+                });
+                // Feed accurate token count back to conversation state
+                if (promptTokens > 0) {
+                    this.conversation.updateApiTokenCount(promptTokens);
+                }
+            }
             const choice = chunk.choices[0];
             if (!choice)
                 continue;
@@ -743,9 +1528,15 @@ export class AgentLoop {
             // Text content
             if (delta?.content) {
                 text += delta.content;
-                this.bus.emitTransform("agent:response-chunk", {
-                    blocks: [{ type: "text", text: delta.content }],
-                });
+                // Filter tool tags from display output (inline mode)
+                const displayText = streamFilter
+                    ? streamFilter.feed(delta.content)
+                    : delta.content;
+                if (displayText) {
+                    this.bus.emitTransform("agent:response-chunk", {
+                        blocks: [{ type: "text", text: displayText }],
+                    });
+                }
             }
             // Reasoning/thinking tokens (non-standard, e.g. DeepSeek)
             if (delta?.reasoning_content) {
@@ -770,28 +1561,38 @@ export class AgentLoop {
                     }
                 }
             }
-            // Token usage (final chunk from providers that support it)
-            if (chunk.usage) {
-                const u = chunk.usage;
-                this.bus.emit("agent:usage", {
-                    prompt_tokens: u.prompt_tokens ?? 0,
-                    completion_tokens: u.completion_tokens ?? 0,
-                    total_tokens: u.total_tokens ?? 0,
+        }
+        // Flush any buffered content from the stream filter
+        if (streamFilter) {
+            const remaining = streamFilter.flush();
+            if (remaining) {
+                this.bus.emitTransform("agent:response-chunk", {
+                    blocks: [{ type: "text", text: remaining }],
                 });
             }
         }
-        // Build assistant tool calls for conversation recording
-        const assistantToolCalls = pendingToolCalls.length
-            ? pendingToolCalls.map((tc) => ({
-                id: tc.id,
-                function: { name: tc.name, arguments: tc.argumentsJson },
-            }))
-            : undefined;
+        // Normalize arguments JSON — some providers (Alibaba/qwen) strictly
+        // validate `function.arguments` as parseable JSON on the NEXT turn,
+        // and reject empty strings or partial chunks. OpenAI itself is lenient,
+        // so empty "" slips through locally but the replay breaks upstream.
+        for (const tc of pendingToolCalls) {
+            if (!tc)
+                continue;
+            const s = tc.argumentsJson.trim();
+            if (s === "") {
+                tc.argumentsJson = "{}";
+                continue;
+            }
+            try {
+                JSON.parse(s);
+            }
+            catch {
+                tc.argumentsJson = "{}";
+            }
+        }
         return {
             text,
             toolCalls: pendingToolCalls,
-            assistantContent: text || null,
-            assistantToolCalls,
         };
     }
 }