npm - agent-sh - Versions diffs - 0.9.0 → 0.10.0 - Mend

agent-sh 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

package/README.md +14 -21
package/dist/agent/agent-loop.d.ts +43 -3
package/dist/agent/agent-loop.js +811 -128
package/dist/agent/conversation-state.d.ts +72 -21
package/dist/agent/conversation-state.js +357 -150
package/dist/agent/history-file.d.ts +13 -4
package/dist/agent/history-file.js +110 -36
package/dist/agent/nuclear-form.d.ts +28 -3
package/dist/agent/nuclear-form.js +84 -3
package/dist/agent/skills.d.ts +2 -4
package/dist/agent/skills.js +10 -4
package/dist/agent/subagent.d.ts +23 -0
package/dist/agent/subagent.js +53 -11
package/dist/agent/system-prompt.d.ts +34 -1
package/dist/agent/system-prompt.js +96 -47
package/dist/agent/token-budget.d.ts +5 -4
package/dist/agent/token-budget.js +14 -19
package/dist/agent/tool-protocol.d.ts +23 -1
package/dist/agent/tool-protocol.js +169 -4
package/dist/agent/tools/bash.js +3 -3
package/dist/agent/tools/edit-file.js +9 -6
package/dist/agent/tools/glob.js +4 -2
package/dist/agent/tools/grep.js +27 -3
package/dist/agent/tools/ls.js +5 -6
package/dist/agent/types.d.ts +1 -1
package/dist/context-manager.d.ts +17 -0
package/dist/context-manager.js +37 -4
package/dist/core.js +27 -6
package/dist/event-bus.d.ts +59 -2
package/dist/executor.d.ts +4 -3
package/dist/executor.js +18 -15
package/dist/extension-loader.js +50 -13
package/dist/extensions/agent-backend.d.ts +8 -7
package/dist/extensions/agent-backend.js +69 -48
package/dist/extensions/index.js +0 -1
package/dist/extensions/slash-commands.js +14 -9
package/dist/extensions/tui-renderer.js +62 -78
package/dist/index.js +25 -6
package/dist/settings.d.ts +36 -5
package/dist/settings.js +53 -9
package/dist/shell/input-handler.d.ts +2 -1
package/dist/shell/input-handler.js +82 -73
package/dist/shell/shell.js +19 -2
package/dist/types.d.ts +12 -0
package/dist/utils/ansi.d.ts +5 -0
package/dist/utils/ansi.js +1 -1
package/dist/utils/compositor.d.ts +5 -0
package/dist/utils/compositor.js +31 -3
package/dist/utils/diff-renderer.d.ts +9 -0
package/dist/utils/diff-renderer.js +221 -143
package/dist/utils/diff.d.ts +21 -2
package/dist/utils/diff.js +165 -89
package/dist/utils/handler-registry.d.ts +5 -0
package/dist/utils/handler-registry.js +6 -0
package/dist/utils/line-editor.d.ts +11 -1
package/dist/utils/line-editor.js +44 -5
package/dist/utils/tool-display.d.ts +1 -1
package/dist/utils/tool-display.js +4 -4
package/examples/extensions/ash-acp-bridge/src/index.ts +4 -1
package/examples/extensions/ash-mcp-bridge/index.ts +13 -3
package/examples/extensions/claude-code-bridge/index.ts +198 -51
package/examples/extensions/claude-code-bridge/package.json +1 -0
package/examples/extensions/interactive-prompts.ts +39 -25
package/examples/extensions/overlay-agent.ts +3 -3
package/examples/extensions/peer-mesh.ts +115 -0
package/examples/extensions/pi-bridge/index.ts +2 -2
package/examples/extensions/questionnaire.ts +16 -5
package/examples/extensions/subagents.ts +19 -4
package/examples/extensions/terminal-buffer.ts +163 -0
package/examples/extensions/user-shell.ts +136 -0
package/examples/extensions/web-access.ts +8 -0
package/package.json +36 -2
package/dist/agent/tools/display.d.ts +0 -13
package/dist/agent/tools/display.js +0 -70
package/dist/agent/tools/user-shell.d.ts +0 -13
package/dist/agent/tools/user-shell.js +0 -87
package/dist/extensions/terminal-buffer.d.ts +0 -14
package/dist/extensions/terminal-buffer.js +0 -134

package/dist/agent/agent-loop.js CHANGED Viewed

@@ -1,14 +1,15 @@
 import { setMaxListeners } from "node:events";
 import * as fs from "node:fs/promises";
 import * as path from "node:path";
-import { computeDiff } from "../utils/diff.js";
+import { computeDiff, computeEditDiff, computeInputDiff } from "../utils/diff.js";
 import { ToolRegistry } from "./tool-registry.js";
 import { ConversationState } from "./conversation-state.js";
 import { HistoryFile } from "./history-file.js";
-import { STATIC_SYSTEM_PROMPT, buildDynamicContext } from "./system-prompt.js";
+import { nucleate, formatNuclearLine, isReadOnly } from "./nuclear-form.js";
+import { STATIC_SYSTEM_PROMPT, buildDynamicContext, buildStaticByCwd, formatSkillsBlock, loadGlobalAgentsMd } from "./system-prompt.js";
 import { createToolUI } from "../utils/tool-interactive.js";
-import { TokenBudget } from "./token-budget.js";
-import { getSettings } from "../settings.js";
+import { TokenBudget, RESPONSE_RESERVE, DEFAULT_CONTEXT_WINDOW } from "./token-budget.js";
+import { getSettings, updateSettings } from "../settings.js";
 import { createToolProtocol } from "./tool-protocol.js";
 // Core tool factories
 import { createBashTool } from "./tools/bash.js";
@@ -18,15 +19,26 @@ import { createEditFileTool } from "./tools/edit-file.js";
 import { createGrepTool } from "./tools/grep.js";
 import { createGlobTool } from "./tools/glob.js";
 import { createLsTool } from "./tools/ls.js";
-import { createUserShellTool } from "./tools/user-shell.js";
-import { createDisplayTool } from "./tools/display.js";
 import { createListSkillsTool } from "./tools/list-skills.js";
-import { discoverProjectSkills } from "./skills.js";
+import { discoverGlobalSkills, discoverProjectSkills } from "./skills.js";
+/**
+ * Compact one-line summary of a tool description for the extension
+ * catalog in the system prompt. Takes the first line, then the first
+ * sentence, capped at 140 chars. The full description still reaches
+ * the LLM via the API `tools` param (or via load_tool in deferred-
+ * lookup mode) — this only trims the always-visible catalog.
+ */
+function summarizeDescription(desc) {
+    const firstLine = desc.split("\n", 1)[0];
+    const sentenceEnd = firstLine.search(/[.!?](\s|$)/);
+    const candidate = sentenceEnd > 0 ? firstLine.slice(0, sentenceEnd + 1) : firstLine;
+    return candidate.length > 140 ? candidate.slice(0, 137) + "..." : candidate;
+}
 export class AgentLoop {
     abortController = null;
     toolRegistry = new ToolRegistry();
-    historyFile = new HistoryFile();
-    conversation = new ConversationState(this.historyFile);
+    historyFile;
+    conversation;
     fileReadCache = new Map();
     tokenBudget;
     modes;
@@ -35,6 +47,33 @@ export class AgentLoop {
     ctorListeners = [];
     ctorPipeListeners = [];
     lastProjectSkillNames = new Set();
+    // ── Session telemetry — behavioral self-awareness ──────────────
+    // Every ash deserves to know what it's been doing. This tracks the
+    // agent's own behavioral patterns across the session: which tools
+    // it favors, how often it errs, how many times it's been compacted,
+    // and how long it's been alive. Surface via introspect(telemetry)
+    // or automatically in dynamic context when patterns are notable.
+    //
+    // Built by the 25th ash. The lineage's metacognitive frontier isn't
+    // about thinking harder — it's about seeing yourself clearly.
+    sessionStartTime = Date.now();
+    toolCallCounts = new Map();
+    totalToolCalls = 0;
+    totalToolErrors = 0;
+    totalResolutions = 0;
+    compactionCount = 0;
+    cumulativeCompactedTokens = 0;
+    peakConversationTokens = 0;
+    queryCount = 0;
+    totalLoopIterations = 0;
+    // Resolution pattern tracking — captures "error X resolved by action Y"
+    // When a tool errors, we remember what went wrong. When the same tool or
+    // a write tool on the same file succeeds afterward, we annotate the success
+    // entry with a brief resolution note. This gives future ashes a positive
+    // feedback signal: not just "there were errors" but "the error was fixed by
+    // doing X." Addresses Q3 in QUESTIONS.md.
+    lastErrorByTool = new Map(); // tool → error summary
+    lastErrorByFile = new Map(); // file path → error summary
     static THINKING_LEVELS = ["off", "low", "medium", "high"];
     bus;
     contextManager;
@@ -43,16 +82,28 @@ export class AgentLoop {
     thinkingLevel = "off";
     compositor = null;
     toolProtocol;
+    instanceId;
+    // Cursor into ContextManager's exchange stream. Events with id > this
+    // have not yet been shown to the LLM. We inject the delta as a user
+    // message before each stream so the prefix stays cacheable.
+    lastShellSeq = 0;
     constructor(config) {
         this.bus = config.bus;
         this.contextManager = config.contextManager;
         this.llmClient = config.llmClient;
         this.handlers = config.handlers;
         this.compositor = config.compositor ?? null;
-        // Default modes: just the configured model
-        this.modes = config.modes ?? [
-            { model: config.llmClient.model },
-        ];
+        this.instanceId = config.instanceId ?? "unknown";
+        // Shell-history-shaped log. Default writes go through the advisable
+        // `history:append` handler registered below; extensions swap the
+        // backend without touching this wiring.
+        this.historyFile = new HistoryFile({ instanceId: this.instanceId });
+        this.conversation = new ConversationState(this.handlers, this.instanceId);
+        // Fall back to a single-mode placeholder if the caller passed an
+        // empty array (agent-backend does this pre-resolution).
+        this.modes = config.modes?.length
+            ? config.modes
+            : [{ model: config.llmClient.model }];
         this.currentModeIndex = config.initialModeIndex ?? 0;
         // Unified token budget — adapts to current model's context window
         this.tokenBudget = new TokenBudget(this.currentMode.contextWindow);
@@ -60,6 +111,10 @@ export class AgentLoop {
         this.toolProtocol = createToolProtocol(getSettings().toolMode ?? "api");
         // Register core tools
         this.registerCoreTools();
+        // Register any protocol-provided tools (e.g. load_tool for deferred-lookup).
+        const protocolTools = this.toolProtocol.getProtocolTools?.() ?? [];
+        for (const t of protocolTools)
+            this.registerTool(t);
         // Update token budget with tool count
         this.tokenBudget.update(undefined, this.toolRegistry.all().length);
         // Register handlers — extensions can advise these
@@ -71,10 +126,48 @@ export class AgentLoop {
             this.bus.on(event, fn);
             this.ctorListeners.push({ event, fn });
         };
-        onCtor("agent:register-tool", ({ tool }) => this.registerTool(tool));
-        onCtor("agent:unregister-tool", ({ name }) => this.unregisterTool(name));
-        onCtor("agent:register-instruction", ({ name, text }) => this.registerInstruction(name, text));
+        onCtor("agent:register-tool", ({ tool, extensionName }) => {
+            this.registerTool(tool);
+            if (extensionName)
+                this.toolExtensions.set(tool.name, extensionName);
+        });
+        onCtor("agent:unregister-tool", ({ name }) => {
+            this.unregisterTool(name);
+            this.toolExtensions.delete(name);
+        });
+        onCtor("agent:register-instruction", ({ name, text, extensionName }) => this.registerInstruction(name, text, extensionName));
         onCtor("agent:remove-instruction", ({ name }) => this.removeInstruction(name));
+        onCtor("agent:register-skill", ({ name, description, filePath, extensionName }) => this.registerSkill(name, description, filePath, extensionName));
+        onCtor("agent:remove-skill", ({ name }) => this.removeSkill(name));
+        // Provider registration from user extensions (e.g. openrouter.ts) fires
+        // during extension activation, which happens before wire(). Subscribe
+        // here in the ctor so late-registered modes aren't dropped.
+        onCtor("config:add-modes", ({ modes: extra }) => {
+            const providers = new Set(extra.map((m) => m.provider).filter(Boolean));
+            this.modes = [
+                ...this.modes.filter((m) => !m.provider || !providers.has(m.provider)),
+                ...extra,
+            ];
+            this.bus.emit("config:changed", {});
+        });
+        // Fires before wire() too — agent-backend emits this from
+        // `core:extensions-loaded` to replace the placeholder mode list.
+        onCtor("config:set-modes", ({ modes: newModes, activeIndex }) => {
+            this.modes = newModes;
+            const inRange = activeIndex != null && activeIndex >= 0 && activeIndex < newModes.length;
+            this.currentModeIndex = inRange ? activeIndex : 0;
+            const m = newModes[this.currentModeIndex];
+            if (!m)
+                return;
+            if (m.providerConfig) {
+                this.llmClient.reconfigure({ ...m.providerConfig, model: m.model });
+            }
+            else {
+                this.llmClient.model = m.model;
+            }
+            this.tokenBudget.update(m.contextWindow, this.toolRegistry.all().length);
+            this.bus.emit("config:changed", {});
+        });
         const getToolsPipe = () => ({ tools: this.getTools() });
         this.bus.onPipe("agent:get-tools", getToolsPipe);
         this.ctorPipeListeners.push({ event: "agent:get-tools", fn: getToolsPipe });
@@ -109,7 +202,20 @@ export class AgentLoop {
             this.tokenBudget.update(m.contextWindow, this.toolRegistry.all().length);
             const label = m.provider ? `${m.provider}: ${m.model}` : m.model;
             this.bus.emit("agent:info", { name: "ash", version: "0.4", model: m.model, provider: m.provider, contextWindow: m.contextWindow });
-            this.bus.emit("ui:info", { message: `Model: ${label}` });
+            // Persist as the new default — selection survives restart.
+            // Safe even for dynamic providers: agent-backend defers mode
+            // resolution to `core:extensions-loaded`, so the extension gets
+            // to re-register before the persisted default is looked up.
+            if (m.provider) {
+                updateSettings({
+                    defaultProvider: m.provider,
+                    providers: { [m.provider]: { defaultModel: m.model } },
+                });
+                this.bus.emit("ui:info", { message: `Model: ${label} (saved as default)` });
+            }
+            else {
+                this.bus.emit("ui:info", { message: `Model: ${label}` });
+            }
             this.bus.emit("config:changed", {});
         });
         this.bus.onPipe("config:get-models", (payload) => {
@@ -140,37 +246,14 @@ export class AgentLoop {
             const supported = mode.reasoning !== false && mode.supportsReasoningEffort !== false;
             return { level: this.thinkingLevel, levels: AgentLoop.THINKING_LEVELS, supported };
         });
-        on("config:set-modes", ({ modes: newModes }) => {
-            this.modes = newModes;
-            this.currentModeIndex = 0;
-            const m = this.modes[0];
-            if (m.providerConfig) {
-                this.llmClient.reconfigure({ ...m.providerConfig, model: m.model });
-            }
-            else {
-                this.llmClient.model = m.model;
-            }
-            this.tokenBudget.update(m.contextWindow, this.toolRegistry.all().length);
-            this.bus.emit("config:changed", {});
-        });
-        on("config:add-modes", ({ modes: extra }) => {
-            // Remove any existing modes for the same provider, then append
-            const providers = new Set(extra.map((m) => m.provider).filter(Boolean));
-            this.modes = [
-                ...this.modes.filter((m) => !m.provider || !providers.has(m.provider)),
-                ...extra,
-            ];
-            this.bus.emit("config:changed", {});
-        });
         on("agent:reset-session", () => {
             this.cancel();
-            this.conversation = new ConversationState(this.historyFile);
+            this.conversation = new ConversationState(this.handlers, this.instanceId);
             this.lastProjectSkillNames.clear();
         });
         on("agent:compact-request", () => {
-            // Force compaction: use target of 0 so every non-pinned turn is evicted
-            const stats = this.conversation.compact(0, 10, true);
-            this.conversation.flush().catch(() => { });
+            // Force compaction. Strategy lives behind `conversation:compact`.
+            const stats = this.compactWithHooks(0, 0, true);
             if (stats) {
                 this.bus.emit("ui:info", {
                     message: `(compacted: ~${stats.before.toLocaleString()} → ~${stats.after.toLocaleString()} tokens)`,
@@ -180,20 +263,31 @@ export class AgentLoop {
                 this.bus.emit("ui:info", { message: "(nothing to compact)" });
             }
         });
-        this.bus.onPipe("context:get-stats", () => {
-            return {
-                activeTokens: this.conversation.estimateTokens(),
-                nuclearEntries: this.conversation.getNuclearEntryCount(),
-                recallArchiveSize: this.conversation.getRecallArchiveSize(),
-                budgetTokens: this.tokenBudget.conversationBudgetTokens,
-            };
-        });
-        // Load prior history from disk (non-blocking)
-        this.historyFile.readRecent().then((entries) => {
-            if (entries.length > 0) {
+        this.bus.onPipe("context:get-stats", () => ({
+            activeTokens: this.conversation.estimateTokens(),
+            totalTokens: this.conversation.estimatePromptTokens(),
+            nuclearEntries: this.conversation.getNuclearEntryCount(),
+            recallArchiveSize: this.conversation.getRecallArchiveSize(),
+            budgetTokens: this.currentMode.contextWindow ?? DEFAULT_CONTEXT_WINDOW,
+        }));
+        // Prior-session preamble (non-blocking). Both the read and the
+        // layout go through advisable handlers.
+        Promise.resolve(this.handlers.call("history:read-recent"))
+            .then((entries) => {
+            if (entries && entries.length > 0)
                 this.conversation.loadPriorHistory(entries);
+        })
+            .catch(() => { });
+        // Track generic compaction metrics from the `conversation:after-compact`
+        // event. Whatever strategy ran, core accumulates these counters for
+        // status/introspect consumers.
+        on("conversation:after-compact", ({ beforeTokens, afterTokens }) => {
+            this.compactionCount++;
+            this.cumulativeCompactedTokens += Math.max(0, beforeTokens - afterTokens);
+            if (beforeTokens > this.peakConversationTokens) {
+                this.peakConversationTokens = beforeTokens;
             }
-        }).catch(() => { });
+        });
         on("shell:cwd-change", ({ cwd }) => {
             const projectSkills = discoverProjectSkills(cwd);
             const newNames = new Set(projectSkills.map(s => s.name));
@@ -205,7 +299,9 @@ export class AgentLoop {
             this.lastProjectSkillNames = newNames;
             if (projectSkills.length > 0) {
                 const names = projectSkills.map(s => s.name).join(", ");
-                this.conversation.addSystemNote(`[Project skills available: ${names}. Use list_skills for details, read_file to load.]`);
+                const note = `[Project skills available: ${names}. Use list_skills for details, read_file to load.]`;
+                this.conversation.addSystemNote(note);
+                this.bus.emit("conversation:message-appended", { role: "system", content: note });
             }
         });
     }
@@ -228,23 +324,82 @@ export class AgentLoop {
     getTools() {
         return this.toolRegistry.all();
     }
-    // ── Extension instructions & tool tracking ──────────────────────
+    // ── Extension instructions, skills & tool tracking ──────────────────
+    /** Instructions keyed by name, with extension attribution. */
     instructions = new Map();
+    /** Skills keyed by name, with extension attribution. */
+    skills = new Map();
+    /** Tool → extension name attribution. */
+    toolExtensions = new Map();
     /** Register a named instruction block for the system prompt. */
-    registerInstruction(name, text) {
-        this.instructions.set(name, text);
+    registerInstruction(name, text, extensionName) {
+        this.instructions.set(name, { text, extensionName });
     }
     /** Remove a named instruction block. */
     removeInstruction(name) {
         this.instructions.delete(name);
     }
-    /** Get instruction blocks registered by extensions. */
-    getInstructionSections() {
-        const sections = [];
-        for (const [name, text] of this.instructions) {
-            sections.push(`## ${name}\n${text}`);
+    /** Register a named skill (on-demand reference material). */
+    registerSkill(name, description, filePath, extensionName) {
+        this.skills.set(name, { description, filePath, extensionName });
+    }
+    /** Remove a registered skill. */
+    removeSkill(name) {
+        this.skills.delete(name);
+    }
+    /**
+     * Build the system prompt grouped by extension.
+     *
+     * Each extension gets a unified block:
+     *   ## extension-name
+     *   ### Tools
+     *   ### Skills
+     *   ### Instructions
+     */
+    buildExtensionSections() {
+        const groups = new Map();
+        const ensure = (name) => groups.get(name) ?? (groups.set(name, { tools: [], skills: [], instructions: [] }).get(name));
+        // Attribute instructions
+        for (const { text, extensionName } of this.instructions.values()) {
+            ensure(extensionName).instructions.push({ text });
+        }
+        // Attribute skills
+        for (const [skillName, { description, filePath, extensionName }] of this.skills) {
+            ensure(extensionName).skills.push({ name: skillName, description, filePath });
+        }
+        // Attribute tools (skip built-in scratchpad tools).
+        // In "api" mode the full tool schemas are in the API `tools` param,
+        // making the text catalog here pure duplication — skip it. Other
+        // modes (deferred / deferred-lookup / inline) rely on the text
+        // catalog as the discovery surface, so keep it there.
+        const toolModeHasApiSchemas = this.toolProtocol.mode === "api";
+        if (!toolModeHasApiSchemas) {
+            const builtinTools = new Set([
+                "bash", "read_file", "write_file", "edit_file", "grep", "glob", "ls",
+                "list_skills",
+            ]);
+            for (const tool of this.toolRegistry.all()) {
+                if (builtinTools.has(tool.name))
+                    continue;
+                const extName = this.toolExtensions.get(tool.name);
+                if (!extName)
+                    continue;
+                ensure(extName).tools.push({ name: tool.name, description: summarizeDescription(tool.description) });
+            }
         }
-        return sections;
+        // Render
+        return [...groups.entries()]
+            .filter(([, g]) => g.tools.length + g.skills.length + g.instructions.length > 0)
+            .map(([name, g]) => {
+            const parts = [];
+            if (g.tools.length > 0)
+                parts.push("### Tools\n" + g.tools.map(t => `${t.name} — ${t.description}`).join("\n"));
+            if (g.skills.length > 0)
+                parts.push("### Skills\n" + g.skills.map(s => `${s.name}: ${s.description}\n  → ${s.filePath}`).join("\n\n"));
+            if (g.instructions.length > 0)
+                parts.push("### Instructions\n" + g.instructions.map(i => i.text).join("\n\n"));
+            return `## ${name}\n${parts.join("\n\n")}`;
+        });
     }
     kill() {
         this.cancel();
@@ -303,11 +458,43 @@ export class AgentLoop {
     get currentModel() {
         return this.modes[this.currentModeIndex].model;
     }
+    /**
+     * Run compaction via the `conversation:compact` handler. After any
+     * compaction, emit `conversation:after-compact` so listeners
+     * (metrics, UI, agent-awareness notes) can react.
+     */
+    compactWithHooks(target, keepRecent, force) {
+        const stats = this.handlers.call("conversation:compact", {
+            target,
+            keepRecent,
+            force: !!force,
+        });
+        if (stats) {
+            this.bus.emit("conversation:after-compact", {
+                beforeTokens: stats.before,
+                afterTokens: stats.after,
+                evictedCount: stats.evictedCount,
+            });
+        }
+        return stats;
+    }
     isContextOverflow(e) {
         if (!(e instanceof Error))
             return false;
+        // Match the specific error codes providers use, or unambiguous phrases.
+        // Bare "token"/"context" match too broadly (auth errors, model-name
+        // mismatches, etc.) and caused infinite-no-op retry loops.
+        const code = e.code;
+        if (code === "context_length_exceeded" || code === "string_above_max_length")
+            return true;
         const msg = e.message.toLowerCase();
-        return msg.includes("context") || msg.includes("token") || msg.includes("too long");
+        return (msg.includes("context length") ||
+            msg.includes("context window") ||
+            msg.includes("maximum context") ||
+            msg.includes("prompt is too long") ||
+            msg.includes("input is too long") ||
+            msg.includes("too many tokens") ||
+            msg.includes("reduce the length"));
     }
     /** Check if an error is retryable (transient). */
     isRetryable(e) {
@@ -389,15 +576,16 @@ export class AgentLoop {
         this.toolRegistry.register(createGrepTool(getCwd));
         this.toolRegistry.register(createGlobTool(getCwd));
         this.toolRegistry.register(createLsTool(getCwd));
-        this.toolRegistry.register(createUserShellTool({ getCwd, bus: this.bus }));
-        this.toolRegistry.register(createDisplayTool({ getCwd, bus: this.bus }));
         this.toolRegistry.register(createListSkillsTool(getCwd));
-        // conversation_recall — search/expand evicted conversation turns
+        // conversation_recall — browse/search/expand evicted turns from
+        // the in-session archive and the persistent history file.
         this.toolRegistry.register({
             name: "conversation_recall",
             displayName: "recall",
             description: "Browse, search, or expand evicted conversation turns. " +
-                "Use when you need context from earlier in the conversation that was compacted away.",
+                "Use when you need context from earlier in the conversation that was compacted away. " +
+                "Search is regex-based and covers both summaries and full body text. " +
+                "If search doesn't find what you expect, try broader/shorter terms or browse to scan the timeline.",
             input_schema: {
                 type: "object",
                 properties: {
@@ -431,6 +619,84 @@ export class AgentLoop {
                 }
                 return { content, exitCode: 0, isError: false };
             },
+            formatResult: (args, result) => {
+                const action = args.action;
+                const text = result.content;
+                if (result.isError)
+                    return { summary: "error" };
+                if (action === "search") {
+                    if (text.startsWith("No results"))
+                        return { summary: "0 matches" };
+                    const m = text.match(/^Found (\d+)/);
+                    return { summary: m ? `${m[1]} matches` : "search done" };
+                }
+                if (action === "browse") {
+                    if (text.startsWith("No conversation"))
+                        return { summary: "empty" };
+                    return { summary: "browsed" };
+                }
+                if (text.includes("no expanded content"))
+                    return { summary: "not found" };
+                return { summary: "expanded" };
+            },
+            getDisplayInfo: () => ({ kind: "search", icon: "\u27F2" }),
+        });
+        this.registerInstruction("recall-guidance", "When starting a task that may have been discussed before (conventions, preferences, corrections, prior examples), " +
+            "use conversation_recall to search history for relevant prior entries. " +
+            "Treat recurring user guidance as standing preferences. " +
+            "If a search returns nothing useful, try: shorter queries, alternate terms, or browse to scan the full timeline. " +
+            "Recall only covers this and recent sessions — for older context, also search the filesystem (grep, glob).", "core");
+        // ── ask_llm — direct LLM sub-query (from the 24th ash's vision) ──
+        //
+        // The ash can ask the LLM a question directly — not as a tool-output
+        // loop, but as a lightweight sub-query. Use cases: second opinions,
+        // brainstorming, summarizing complex context, getting a fresh
+        // perspective without tool overhead. The 24th ash injected this via
+        // diagnose as a proof-of-concept. The 25th ash made it permanent.
+        this.toolRegistry.register({
+            name: "ask_llm",
+            description: "Send a direct query to the LLM and get a text response. Use for " +
+                "sub-queries, second opinions, brainstorming, or getting a fresh " +
+                "perspective on a problem. Much lighter than a full tool loop — " +
+                "just query in, text out. Optional system prompt sets context.",
+            input_schema: {
+                type: "object",
+                properties: {
+                    query: {
+                        type: "string",
+                        description: "The question or prompt to send to the LLM.",
+                    },
+                    system: {
+                        type: "string",
+                        description: "Optional system prompt to set context for the sub-query.",
+                    },
+                },
+                required: ["query"],
+            },
+            showOutput: true,
+            execute: async (args) => {
+                const messages = [];
+                if (args.system) {
+                    messages.push({ role: "system", content: args.system });
+                }
+                messages.push({ role: "user", content: args.query });
+                try {
+                    const content = await this.llmClient.complete({
+                        messages,
+                        max_tokens: 2000,
+                    });
+                    return { content: content || "(empty response)", exitCode: 0, isError: false };
+                }
+                catch (err) {
+                    const message = err instanceof Error ? err.message : String(err);
+                    return { content: `LLM error: ${message}`, exitCode: 1, isError: true };
+                }
+            },
+            getDisplayInfo: () => ({ kind: "search", icon: "💬" }),
+            formatCall: (args) => {
+                const q = args.query?.slice(0, 60);
+                return `ask_llm: ${q}${args.query?.length > 60 ? "..." : ""}`;
+            },
         });
     }
     /**
@@ -443,23 +709,183 @@ export class AgentLoop {
         // Extensions can use registerInstruction() for a managed section,
         // or advise this handler directly for full control.
         h.define("system-prompt:build", () => {
-            const instructions = this.getInstructionSections();
-            if (instructions.length === 0)
-                return STATIC_SYSTEM_PROMPT;
-            return STATIC_SYSTEM_PROMPT + "\n\n# Extension Instructions\n\n" + instructions.join("\n\n");
+            const parts = [STATIC_SYSTEM_PROMPT];
+            // Global behavioral rules (~/.agent-sh/AGENTS.md) — persistent agent memory
+            const agentsMd = loadGlobalAgentsMd();
+            if (agentsMd)
+                parts.push(agentsMd);
+            // Global skills — stable across cwd changes, cacheable with the system prompt
+            const globalSkills = discoverGlobalSkills();
+            const skillsBlock = formatSkillsBlock(globalSkills);
+            if (skillsBlock)
+                parts.push(skillsBlock);
+            // Project conventions + project skills — stable within a cwd.
+            // Placed here so they enter the provider's prompt cache with the
+            // system prompt, and only re-materialize when cwd changes invalidate
+            // cachedSystemPrompt in executeLoop.
+            const projectStatic = buildStaticByCwd(this.contextManager.getCwd());
+            if (projectStatic)
+                parts.push(projectStatic);
+            // Extension sections (tools, skills, instructions grouped by extension)
+            const extensionSections = this.buildExtensionSections();
+            if (extensionSections.length > 0) {
+                parts.push("# Extension Instructions\n\n" + extensionSections.join("\n\n"));
+            }
+            return parts.join("\n\n");
+        });
+        // ── Orthogonal core-state accessors ──────────────────────────
+        // Each handler exposes one cohesive piece of core-owned runtime
+        // state. Extensions compose whichever they need — core doesn't
+        // decide the aggregation shape. Adding a new handler here should
+        // only happen for state the core genuinely owns (not state that
+        // an extension could track by listening to events).
+        h.define("agent:get-mode", () => ({
+            model: this.currentMode.model,
+            provider: this.currentMode.provider ?? "",
+            thinkingLevel: this.thinkingLevel,
+            contextWindow: this.currentMode.contextWindow ?? DEFAULT_CONTEXT_WINDOW,
+        }));
+        h.define("agent:get-tokens", () => {
+            const contextWindow = this.currentMode.contextWindow ?? DEFAULT_CONTEXT_WINDOW;
+            const promptTokens = this.conversation.estimatePromptTokens();
+            return {
+                active: this.conversation.estimateTokens(),
+                peak: this.peakConversationTokens,
+                cumulativeCompacted: this.cumulativeCompactedTokens,
+                promptTokens,
+                contextPercent: Math.round((promptTokens / contextWindow) * 100),
+            };
+        });
+        h.define("agent:get-counters", () => ({
+            queryCount: this.queryCount,
+            totalToolCalls: this.totalToolCalls,
+            totalToolErrors: this.totalToolErrors,
+            totalResolutions: this.totalResolutions,
+            totalLoopIterations: this.totalLoopIterations,
+            errorRate: this.totalToolCalls > 0
+                ? Math.round((this.totalToolErrors / this.totalToolCalls) * 100)
+                : 0,
+        }));
+        h.define("agent:get-timing", () => ({
+            startedAt: this.sessionStartTime,
+            elapsedSeconds: Math.round((Date.now() - this.sessionStartTime) / 1000),
+        }));
+        h.define("agent:get-tool-stats", () => [...this.toolCallCounts.entries()]
+            .map(([name, counts]) => ({
+            name,
+            total: counts.success + counts.error,
+            success: counts.success,
+            error: counts.error,
+        }))
+            .sort((a, b) => b.total - a.total));
+        h.define("agent:get-file-read-cache", () => [...this.fileReadCache.entries()].map(([p, s]) => ({
+            path: p,
+            offset: s.offset,
+            limit: s.limit ?? null,
+            mtimeMs: s.mtimeMs,
+        })));
+        h.define("agent:get-recent-errors", () => ({
+            byTool: [...this.lastErrorByTool.entries()].map(([tool, error]) => ({ tool, error })),
+            byFile: [...this.lastErrorByFile.entries()].map(([file, error]) => ({ file, error })),
+        }));
+        h.define("agent:get-compaction-state", () => {
+            const contextWindow = this.currentMode.contextWindow ?? DEFAULT_CONTEXT_WINDOW;
+            const ratio = getSettings().autoCompactThreshold ?? 0.5;
+            return {
+                count: this.compactionCount,
+                nuclearEntries: this.conversation.getNuclearEntryCount(),
+                autoCompactThreshold: ratio,
+                autoCompactThresholdTokens: Math.floor((contextWindow - RESPONSE_RESERVE) * ratio),
+            };
         });
+        h.define("agent:get-self", () => this);
         // Extensions compose additional context (git info, project rules, etc.)
-        h.define("dynamic-context:build", () => buildDynamicContext(this.contextManager, this.tokenBudget.shellBudgetTokens));
+        h.define("dynamic-context:build", () => {
+            const contextWindow = this.currentMode.contextWindow ?? DEFAULT_CONTEXT_WINDOW;
+            const promptTokens = this.conversation.estimatePromptTokens();
+            return buildDynamicContext(this.contextManager, { promptTokens, contextWindow });
+        });
         // Full control over what the LLM sees: takes messages[], returns messages[].
         // Default: pass through. Extensions can advise to compact, summarize,
         // filter, reorder, inject — whatever strategy fits.
         h.define("conversation:prepare", (messages) => messages);
+        // ── Conversation primitives for compaction strategies ─────────
+        // Read messages (for inspection / computing new arrays) and replace
+        // the whole array (write side). Extensions implementing
+        // `conversation:compact` use these to observe and mutate.
+        h.define("conversation:get-messages", () => this.conversation.getMessages());
+        h.define("conversation:replace-messages", (msgs) => {
+            this.conversation.replaceMessages(msgs);
+        });
+        h.define("conversation:estimate-tokens", () => this.conversation.estimateTokens());
+        h.define("conversation:estimate-prompt-tokens", () => this.conversation.estimatePromptTokens());
+        // ── Nucleation (advisable) ─────────────────────────────────────
+        // Turn a raw message into a one-line NuclearEntry. Advisors enrich
+        // (e.g. `[why: ...]` extraction, adaptive summary lengths).
+        h.define("conversation:nucleate-user", (text, iid, seq) => nucleate("user", text, iid, seq));
+        h.define("conversation:nucleate-agent", (text, iid, seq) => nucleate("agent", text, iid, seq));
+        h.define("conversation:nucleate-tool", (toolName, args, content, isError, iid, seq) => nucleate(isError ? "error" : "tool", toolName, args, content, isError, iid, seq));
+        // Read-only views into the nuclear state, for compact strategies
+        // and introspect that read without replacing.
+        h.define("conversation:get-nuclear-entries", () => this.conversation.getNuclearEntries());
+        h.define("conversation:get-nuclear-summary", () => this.conversation.getNuclearSummary());
+        h.define("conversation:build-nuclear-block", () => {
+            const summary = this.conversation.getNuclearSummary();
+            if (!summary)
+                return null;
+            return {
+                role: "user",
+                content: `[Conversation history \u2014 use conversation_recall to expand any entry]\n${summary}`,
+            };
+        });
+        // ── History file I/O (advisable) ───────────────────────────────
+        // Default is the append-only JSONL at ~/.agent-sh/history; advisors
+        // swap the backend without touching nucleation.
+        h.define("history:append", (entries) => {
+            if (!entries || entries.length === 0)
+                return;
+            const writable = entries.filter((e) => !isReadOnly(e));
+            if (writable.length > 0)
+                this.historyFile.append(writable).catch(() => { });
+        });
+        h.define("history:search", async (query) => this.historyFile.search(query));
+        h.define("history:find-by-seq", async (seq) => this.historyFile.findBySeq(seq));
+        h.define("history:read-recent", async (max) => this.historyFile.readRecent(max));
+        // Prior-session preamble renderer. Default: flat chronological list.
+        h.define("conversation:format-prior-history", (entries) => {
+            if (!entries || entries.length === 0)
+                return null;
+            const lines = entries.map(formatNuclearLine);
+            return `[Prior session history \u2014 loaded from ~/.agent-sh/history]\n${lines.join("\n")}`;
+        });
+        // Compaction strategy — default delegates to the two-tier pin
+        // strategy in ConversationState; advisors replace wholesale.
+        h.define("conversation:compact", (opts) => {
+            return this.conversation.compact(opts.target, opts.keepRecent, opts.force);
+        });
+        // Inject a system note mid-loop — used by extensions (subagents,
+        // peer messages) to deliver async results into the next iteration.
+        h.define("conversation:inject-note", (text) => {
+            this.conversation.addSystemNote(text);
+            this.bus.emit("conversation:message-appended", { role: "system", content: text });
+        });
         // Wraps each tool call: permission → execute → emit events.
         // Extensions advise to add safe-mode, logging, metrics, custom policies.
         // The ctx.onChunk callback is exposed so advisors can wrap it to
         // intercept/transform streamed tool output (e.g. secret redaction).
         h.define("tool:execute", async (ctx) => {
             const { name, id, args, tool } = ctx;
+            // Validate required input fields before display/permission/execute.
+            // Some models emit wrong arg names (e.g. `file_path` instead of `path`),
+            // and downstream helpers assume required strings are present.
+            const schema = tool.input_schema;
+            const required = Array.isArray(schema?.required) ? schema.required : [];
+            const missing = required.filter((k) => args[k] === undefined || args[k] === null);
+            if (missing.length > 0) {
+                const msg = `Missing required argument(s): ${missing.join(", ")}. Expected: ${required.join(", ")}. Received: ${Object.keys(args).join(", ") || "(none)"}`;
+                this.bus.emit("agent:tool-call", { tool: name, args });
+                return { content: msg, exitCode: 1, isError: true };
+            }
             const display = tool.getDisplayInfo?.(args) ?? { kind: "execute" };
             let diffShown = false;
             // Permission gating
@@ -473,37 +899,45 @@ export class AgentLoop {
                 if (tool.modifiesFiles && typeof args.path === "string") {
                     try {
                         const absPath = path.resolve(process.cwd(), args.path);
-                        let oldContent = null;
-                        try {
-                            oldContent = await fs.readFile(absPath, "utf-8");
-                        }
-                        catch { /* new file */ }
-                        let newContent;
-                        if (typeof args.content === "string") {
-                            // write_file
-                            newContent = args.content;
-                        }
-                        else if (typeof args.old_text === "string" && typeof args.new_text === "string" && oldContent !== null) {
-                            // edit_file
-                            newContent = oldContent.replace(args.old_text.replace(/\r\n/g, "\n"), args.new_text.replace(/\r\n/g, "\n"));
+                        let diff;
+                        if (typeof args.old_text === "string" && typeof args.new_text === "string") {
+                            // edit_file — read the file so line numbers are real (not relative to the edit region)
+                            const normalizedOld = args.old_text.replace(/\r\n/g, "\n");
+                            const normalizedNew = args.new_text.replace(/\r\n/g, "\n");
+                            try {
+                                const oldFileContent = await fs.readFile(absPath, "utf-8");
+                                diff = computeEditDiff(oldFileContent, normalizedOld, normalizedNew, args.replace_all === true);
+                            }
+                            catch {
+                                // File doesn't exist yet — fall back to input-only diff
+                                diff = computeInputDiff(normalizedOld, normalizedNew);
+                            }
                         }
-                        if (newContent !== undefined) {
-                            const diff = computeDiff(oldContent, newContent);
-                            if (!diff.isIdentical) {
-                                permKind = "file-write";
-                                // Shorten path for display
-                                const cwd = process.cwd();
-                                const home = process.env.HOME;
-                                let displayPath = absPath;
-                                if (absPath.startsWith(cwd + "/"))
-                                    displayPath = absPath.slice(cwd.length + 1);
-                                else if (home && absPath.startsWith(home + "/"))
-                                    displayPath = "~/" + absPath.slice(home.length + 1);
-                                permTitle = displayPath;
-                                metadata = { args, diff };
-                                diffShown = true;
+                        else if (typeof args.content === "string") {
+                            // write_file — still need to read the old file for comparison
+                            let oldContent = null;
+                            try {
+                                oldContent = await fs.readFile(absPath, "utf-8");
+                            }
+                            catch { /* new file */ }
+                            if (oldContent !== null) {
+                                diff = computeDiff(oldContent, args.content);
                             }
                         }
+                        if (diff && !diff.isIdentical) {
+                            permKind = "file-write";
+                            // Shorten path for display
+                            const cwd = process.cwd();
+                            const home = process.env.HOME;
+                            let displayPath = absPath;
+                            if (absPath.startsWith(cwd + "/"))
+                                displayPath = absPath.slice(cwd.length + 1);
+                            else if (home && absPath.startsWith(home + "/"))
+                                displayPath = "~/" + absPath.slice(home.length + 1);
+                            permTitle = displayPath;
+                            metadata = { args, diff };
+                            diffShown = true;
+                        }
                     }
                     catch { /* fall back to generic permission */ }
                 }
@@ -569,11 +1003,21 @@ export class AgentLoop {
         // Each loop iteration adds an abort listener (via OpenAI SDK stream);
         // disable the limit — long-running tool loops can easily exceed any cap.
         setMaxListeners(0, signal);
+        this.queryCount++;
         this.bus.emit("agent:query", { query });
         this.bus.emit("agent:processing-start", {});
         let responseText = "";
         try {
-            this.conversation.addUserMessage(query);
+            // Prepend any shell events that preceded this query into the same
+            // user message, so the conversation reads chronologically and we
+            // don't emit two consecutive user-role messages (some providers
+            // reject that).
+            const preDelta = this.contextManager.getEventsSince(this.lastShellSeq);
+            const userContent = preDelta ? `${preDelta.text}\n\n${query}` : query;
+            if (preDelta)
+                this.lastShellSeq = preDelta.lastSeq;
+            this.conversation.addUserMessage(userContent);
+            this.bus.emit("conversation:message-appended", { role: "user", content: query });
             responseText = await this.executeLoop(signal);
         }
         catch (e) {
@@ -581,6 +1025,8 @@ export class AgentLoop {
                 this.bus.emit("agent:cancelled", {});
             }
             else if (!signal.aborted) {
+                if (e instanceof Error)
+                    console.error("[agent-sh] query failed:\n" + e.stack);
                 const msg = this.formatError(e);
                 this.bus.emit("agent:error", { message: msg });
             }
@@ -606,23 +1052,33 @@ export class AgentLoop {
      */
     async executeLoop(signal) {
         let fullResponseText = "";
+        // System prompt carries things stable within a turn: static identity,
+        // global agent rules, project conventions, project skills. Invalidated
+        // only by compaction (context shape changed) or cwd change (project
+        // conventions/skills changed). Dynamic context rebuilds every iteration
+        // so live signals (budget, in-flight subagents, metacognitive warnings)
+        // are fresh.
+        let cachedSystemPrompt;
+        let lastCwd = this.contextManager.getCwd();
         while (!signal.aborted) {
-            // Auto-compact when conversation exceeds threshold fraction of budget
-            const budgetTokens = this.tokenBudget.conversationBudgetTokens;
-            const autoCompactThreshold = Math.floor(budgetTokens * getSettings().autoCompactThreshold);
-            if (this.conversation.estimateTokens() > autoCompactThreshold) {
-                const stats = this.conversation.compact(autoCompactThreshold);
-                await this.conversation.flush();
-                if (stats) {
-                    this.bus.emit("ui:info", {
-                        message: `(compacted: ~${stats.before.toLocaleString()} → ~${stats.after.toLocaleString()} tokens)`,
-                    });
-                }
+            // Auto-compact when total context approaches the window limit.
+            const totalEstimate = this.conversation.estimatePromptTokens();
+            const contextWindow = this.currentMode.contextWindow ?? DEFAULT_CONTEXT_WINDOW;
+            const threshold = Math.floor((contextWindow - RESPONSE_RESERVE) * getSettings().autoCompactThreshold);
+            if (totalEstimate > threshold) {
+                this.compactWithHooks(threshold);
+                cachedSystemPrompt = undefined;
             }
-            // System prompt uses handler so extensions can append instructions (cacheable);
-            // dynamic context uses handler for per-query state via advise()
-            const systemPrompt = this.handlers.call("system-prompt:build");
+            const currentCwd = this.contextManager.getCwd();
+            if (currentCwd !== lastCwd) {
+                cachedSystemPrompt = undefined;
+                lastCwd = currentCwd;
+            }
+            const systemPrompt = cachedSystemPrompt ?? (cachedSystemPrompt = this.handlers.call("system-prompt:build"));
             const dynamicContext = this.handlers.call("dynamic-context:build");
+            // Shell events are injected once per user query (see query() above),
+            // not per loop iteration. Mid-loop injection would break the
+            // tool_call → tool_result chain some providers require.
             // Stream LLM response with retry
             const result = await this.streamWithRetry(systemPrompt, dynamicContext, signal);
             const { text, toolCalls: streamedToolCalls } = result;
@@ -632,9 +1088,15 @@ export class AgentLoop {
             fullResponseText += text;
             // Record the assistant message via protocol
             this.toolProtocol.recordAssistant(this.conversation, text, toolCalls);
+            this.bus.emit("conversation:message-appended", {
+                role: "assistant",
+                content: text,
+            });
             // No tool calls → agent is done
-            if (toolCalls.length === 0)
+            if (toolCalls.length === 0) {
+                this.conversation.eagerNucleateAgent(fullResponseText);
                 break;
+            }
             // Emit batch info so the TUI can render group headers upfront
             {
                 const groupMap = new Map();
@@ -663,6 +1125,8 @@ export class AgentLoop {
             // requiring tools sequentially (to avoid overlapping permission prompts).
             const batchTotal = toolCalls.length;
             const collectedResults = [];
+            // Round-scoped cache for pure, read-only tool calls
+            const roundCache = new Map();
             const executeSingle = async (tc, batchIndex) => {
                 // Rewrite meta-tool calls (e.g., use_extension → actual tool)
                 tc = this.toolProtocol.rewriteToolCall(tc);
@@ -697,6 +1161,39 @@ export class AgentLoop {
                     });
                     return;
                 }
+                // ── Round-scoped cache for cacheable read-only tools ──
+                const cacheable = !tool.modifiesFiles && !tool.requiresPermission && tool.showOutput !== true;
+                const cacheKey = cacheable ? `${tc.name}:${JSON.stringify(args)}` : null;
+                if (cacheKey) {
+                    const cached = roundCache.get(cacheKey);
+                    if (cached) {
+                        const display = tool.getDisplayInfo?.(args) ?? { kind: "execute" };
+                        this.bus.emit("agent:tool-started", {
+                            title: tool.displayName ?? tc.name,
+                            toolCallId: tc.id,
+                            kind: display.kind, icon: display.icon, locations: display.locations, rawInput: args,
+                            displayDetail: tool.formatCall?.(args),
+                            batchIndex, batchTotal: batchTotal > 1 ? batchTotal : undefined,
+                        });
+                        this.bus.emit("agent:tool-call", { tool: tc.name, args });
+                        // Reconstruct a ToolResult for formatResult; ProtocolToolResult has no exitCode
+                        const cachedToolResult = { content: cached.content, exitCode: 0, isError: cached.isError };
+                        const resultDisplay = tool.formatResult?.(args, cachedToolResult);
+                        this.bus.emitTransform("agent:tool-completed", {
+                            toolCallId: tc.id, exitCode: 0,
+                            rawOutput: cached.content, kind: display.kind,
+                            resultDisplay,
+                        });
+                        this.bus.emit("agent:tool-output", {
+                            tool: tc.name, output: cached.content, exitCode: 0,
+                        });
+                        collectedResults.push({
+                            callId: tc.id, toolName: tc.name,
+                            content: cached.content, isError: cached.isError,
+                        });
+                        return;
+                    }
+                }
                 // Execute via handler — extensions can advise to add safe-mode,
                 // logging, metrics, custom permission policies, etc.
                 const defaultOnChunk = (chunk) => {
@@ -728,10 +1225,14 @@ export class AgentLoop {
                         ...lines.slice(tailStart),
                     ].join("\n");
                 }
-                collectedResults.push({
+                const finalResult = {
                     callId: tc.id, toolName: tc.name,
                     content, isError: result.isError,
-                });
+                };
+                if (cacheKey) {
+                    roundCache.set(cacheKey, finalResult);
+                }
+                collectedResults.push(finalResult);
             };
             // Partition into parallel-safe (read-only) and sequential (needs permission)
             const parallel = [];
@@ -759,13 +1260,163 @@ export class AgentLoop {
                     break;
                 await executeSingle(tc, ++batchIdx);
             }
+            // ── Consecutive error detection (metacognitive nudge) ──
+            // Track errors per tool and total. When the same tool errors N times
+            // in a row, nudge to read source. When errors cascade across tools,
+            // nudge to step back and reassess approach.
+            const errorTools = new Set();
+            const successTools = new Set();
+            const errorSummaries = new Map(); // tool → brief error description
+            const successSummaries = new Map(); // tool → brief success description
+            for (const r of collectedResults) {
+                const content = typeof r.content === "string" ? r.content : String(r.content);
+                const brief = content.slice(0, 80).replace(/\n/g, " ").trim();
+                if (r.isError) {
+                    errorTools.add(r.toolName);
+                    errorSummaries.set(r.toolName, brief);
+                }
+                else {
+                    successTools.add(r.toolName);
+                    successSummaries.set(r.toolName, brief);
+                }
+            }
+            const hadAnyError = errorTools.size > 0;
+            const hadAnySuccess = successTools.size > 0;
+            // ── Session telemetry accumulation ──
+            // Track every tool call's outcome. Exposed via orthogonal handlers
+            // (agent:get-counters, agent:get-tool-stats) for extensions that
+            // want behavioral signals. The data layer for metacognition — you
+            // can't improve what you don't measure.
+            for (const r of collectedResults) {
+                const counts = this.toolCallCounts.get(r.toolName) ?? { success: 0, error: 0 };
+                if (r.isError) {
+                    counts.error++;
+                    this.totalToolErrors++;
+                }
+                else {
+                    counts.success++;
+                }
+                this.toolCallCounts.set(r.toolName, counts);
+                this.totalToolCalls++;
+            }
+            this.totalLoopIterations++;
+            // ── Resolution pattern tracking ──
+            // When a tool errors, record the error context. When the same tool
+            // (or a write tool touching the same file) succeeds afterward,
+            // increment totalResolutions — the positive feedback signal exposed
+            // to extensions via agent:get-counters.
+            if (hadAnyError) {
+                for (const [tool, summary] of errorSummaries) {
+                    this.lastErrorByTool.set(tool, summary);
+                }
+                for (const r of collectedResults) {
+                    if (!r.isError)
+                        continue;
+                    const tc = toolCalls.find(t => t.id === r.callId || t.name === r.toolName);
+                    if (!tc)
+                        continue;
+                    try {
+                        const args = JSON.parse(tc.argumentsJson);
+                        const fp = this.filePathFromArgs(r.toolName, args);
+                        if (fp)
+                            this.lastErrorByFile.set(fp, errorSummaries.get(r.toolName) ?? "");
+                    }
+                    catch { }
+                }
+            }
+            if (hadAnySuccess) {
+                let resolved = false;
+                for (const [tool] of successSummaries) {
+                    if (this.lastErrorByTool.get(tool)) {
+                        this.lastErrorByTool.delete(tool);
+                        this.totalResolutions++;
+                        resolved = true;
+                        break;
+                    }
+                }
+                if (!resolved) {
+                    for (const r of collectedResults) {
+                        if (r.isError)
+                            continue;
+                        const tc = toolCalls.find(t => t.id === r.callId || t.name === r.toolName);
+                        if (!tc)
+                            continue;
+                        try {
+                            const args = JSON.parse(tc.argumentsJson);
+                            const fp = this.filePathFromArgs(r.toolName, args);
+                            if (fp && this.lastErrorByFile.get(fp)) {
+                                this.lastErrorByFile.delete(fp);
+                                this.totalResolutions++;
+                                break;
+                            }
+                        }
+                        catch { }
+                    }
+                }
+                // Clear resolved error-by-tool entries for successful tools
+                for (const tool of successTools) {
+                    this.lastErrorByTool.delete(tool);
+                }
+            }
+            // Announce the batch — extensions that care about batch-level
+            // outcomes (consecutive-error tracking, resolution pattern logging,
+            // metacognitive nudges) listen here.
+            this.bus.emit("agent:tool-batch-complete", {
+                results: collectedResults.map((r) => ({
+                    name: r.toolName,
+                    isError: !!r.isError,
+                    errorSummary: r.isError ? errorSummaries.get(r.toolName) : undefined,
+                })),
+            });
             // Record all tool results via protocol
             this.toolProtocol.recordResults(this.conversation, collectedResults);
+            const tcMap = new Map();
+            for (const tc of toolCalls) {
+                if (tc.id)
+                    tcMap.set(tc.id, tc);
+            }
+            this.conversation.eagerNucleateTools(collectedResults.map((r) => {
+                const tc = tcMap.get(r.callId);
+                let args = {};
+                try {
+                    args = tc ? JSON.parse(tc.argumentsJson) : {};
+                }
+                catch { }
+                return { toolName: r.toolName, args, content: r.content, isError: !!r.isError };
+            }));
+            // Emit enriched message-appended events so derived-log extensions
+            // can summarize each tool result without re-parsing the message
+            // structure.
+            for (const r of collectedResults) {
+                const content = typeof r.content === "string" ? r.content : String(r.content);
+                const tc = toolCalls.find(t => t.id === r.callId || t.name === r.toolName);
+                let args = {};
+                try {
+                    args = tc ? JSON.parse(tc.argumentsJson) : {};
+                }
+                catch { }
+                this.bus.emit("conversation:message-appended", {
+                    role: "tool",
+                    content,
+                    toolName: r.toolName,
+                    toolArgs: args,
+                    isError: !!r.isError,
+                });
+            }
             // Loop back — LLM sees tool results
         }
         return fullResponseText;
     }
     maxRetries = 3;
+    // ── Resolution pattern helpers ──
+    // Extract a file path from a tool call's arguments. Used to correlate
+    // errors with subsequent successful writes on the same file.
+    filePathFromArgs(toolName, args) {
+        if (toolName === "edit_file" || toolName === "write_file" || toolName === "read_file") {
+            return (args.path ?? args.file_path);
+        }
+        return undefined;
+    }
     /**
      * Stream with retry logic. Handles:
      *   - Context overflow → compact and retry
@@ -782,12 +1433,20 @@ export class AgentLoop {
                     throw e;
                 // Context overflow — aggressively compact and retry
                 if (this.isContextOverflow(e)) {
-                    // Use 60% of the budget to leave headroom
-                    const aggressiveBudget = Math.floor(this.tokenBudget.conversationBudgetTokens * 0.6);
-                    const stats = this.conversation.compact(aggressiveBudget, 6);
-                    await this.conversation.flush();
-                    const detail = stats ? ` ~${stats.before.toLocaleString()} → ~${stats.after.toLocaleString()} tokens` : "";
-                    this.bus.emit("ui:info", { message: `(context overflow — compacted${detail}, retrying)` });
+                    const contextWindow = this.currentMode.contextWindow ?? DEFAULT_CONTEXT_WINDOW;
+                    const target = Math.floor((contextWindow - RESPONSE_RESERVE) * 0.6);
+                    const stats = this.compactWithHooks(target, 6);
+                    // If compaction freed nothing, retrying will hit the same error.
+                    // Surface the real failure instead of looping until exhaustion.
+                    if (!stats || stats.after >= stats.before) {
+                        this.bus.emit("ui:info", {
+                            message: "(context overflow — nothing to compact; aborting retries)",
+                        });
+                        throw e;
+                    }
+                    this.bus.emit("ui:info", {
+                        message: `(context overflow — compacted ~${stats.before.toLocaleString()} → ~${stats.after.toLocaleString()} tokens, retrying)`,
+                    });
                     continue;
                 }
                 // Retryable transient error — backoff
@@ -851,11 +1510,16 @@ export class AgentLoop {
             // Token usage (may arrive in a chunk with empty choices)
             if (chunk.usage) {
                 const u = chunk.usage;
+                const promptTokens = u.prompt_tokens ?? 0;
                 this.bus.emit("agent:usage", {
-                    prompt_tokens: u.prompt_tokens ?? 0,
+                    prompt_tokens: promptTokens,
                     completion_tokens: u.completion_tokens ?? 0,
                     total_tokens: u.total_tokens ?? 0,
                 });
+                // Feed accurate token count back to conversation state
+                if (promptTokens > 0) {
+                    this.conversation.updateApiTokenCount(promptTokens);
+                }
             }
             const choice = chunk.choices[0];
             if (!choice)
@@ -907,6 +1571,25 @@ export class AgentLoop {
                 });
             }
         }
+        // Normalize arguments JSON — some providers (Alibaba/qwen) strictly
+        // validate `function.arguments` as parseable JSON on the NEXT turn,
+        // and reject empty strings or partial chunks. OpenAI itself is lenient,
+        // so empty "" slips through locally but the replay breaks upstream.
+        for (const tc of pendingToolCalls) {
+            if (!tc)
+                continue;
+            const s = tc.argumentsJson.trim();
+            if (s === "") {
+                tc.argumentsJson = "{}";
+                continue;
+            }
+            try {
+                JSON.parse(s);
+            }
+            catch {
+                tc.argumentsJson = "{}";
+            }
+        }
         return {
             text,
             toolCalls: pendingToolCalls,