npm - agent-sh - Versions diffs - 0.9.0 → 0.10.1 - Mend

agent-sh 0.9.0 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (88) hide show

package/README.md +25 -30
package/dist/agent/agent-loop.d.ts +43 -6
package/dist/agent/agent-loop.js +817 -157
package/dist/agent/conversation-state.d.ts +72 -21
package/dist/agent/conversation-state.js +364 -151
package/dist/agent/history-file.d.ts +13 -4
package/dist/agent/history-file.js +110 -36
package/dist/agent/nuclear-form.d.ts +28 -3
package/dist/agent/nuclear-form.js +84 -3
package/dist/agent/skills.d.ts +2 -4
package/dist/agent/skills.js +10 -4
package/dist/agent/subagent.d.ts +23 -0
package/dist/agent/subagent.js +53 -11
package/dist/agent/system-prompt.d.ts +34 -1
package/dist/agent/system-prompt.js +96 -47
package/dist/agent/token-budget.d.ts +10 -13
package/dist/agent/token-budget.js +6 -46
package/dist/agent/tool-protocol.d.ts +23 -1
package/dist/agent/tool-protocol.js +169 -4
package/dist/agent/tools/bash.js +3 -3
package/dist/agent/tools/edit-file.js +9 -6
package/dist/agent/tools/glob.js +4 -2
package/dist/agent/tools/grep.js +27 -3
package/dist/agent/tools/ls.js +5 -6
package/dist/agent/types.d.ts +1 -2
package/dist/context-manager.d.ts +16 -19
package/dist/context-manager.js +48 -152
package/dist/core.js +27 -6
package/dist/event-bus.d.ts +59 -3
package/dist/executor.d.ts +4 -3
package/dist/executor.js +18 -15
package/dist/extension-loader.js +75 -17
package/dist/extensions/agent-backend.d.ts +8 -7
package/dist/extensions/agent-backend.js +72 -50
package/dist/extensions/index.js +0 -2
package/dist/extensions/slash-commands.js +14 -9
package/dist/extensions/tui-renderer.js +67 -80
package/dist/index.js +25 -6
package/dist/settings.d.ts +39 -16
package/dist/settings.js +51 -11
package/dist/shell/input-handler.d.ts +2 -1
package/dist/shell/input-handler.js +84 -76
package/dist/shell/shell.js +19 -2
package/dist/types.d.ts +15 -0
package/dist/utils/ansi.d.ts +7 -0
package/dist/utils/ansi.js +69 -8
package/dist/utils/box-frame.js +8 -2
package/dist/utils/compositor.d.ts +5 -0
package/dist/utils/compositor.js +31 -3
package/dist/utils/diff-renderer.d.ts +9 -0
package/dist/utils/diff-renderer.js +221 -143
package/dist/utils/diff.d.ts +21 -2
package/dist/utils/diff.js +165 -89
package/dist/utils/handler-registry.d.ts +5 -0
package/dist/utils/handler-registry.js +6 -0
package/dist/utils/line-editor.d.ts +11 -1
package/dist/utils/line-editor.js +44 -5
package/dist/utils/markdown.js +23 -8
package/dist/utils/package-version.d.ts +1 -0
package/dist/utils/package-version.js +10 -0
package/dist/utils/shell-output-spill.d.ts +2 -0
package/dist/utils/shell-output-spill.js +81 -0
package/dist/utils/tool-display.d.ts +1 -1
package/dist/utils/tool-display.js +4 -4
package/examples/extensions/ash-acp-bridge/src/index.ts +4 -1
package/examples/extensions/ash-mcp-bridge/index.ts +13 -3
package/examples/extensions/claude-code-bridge/README.md +14 -0
package/examples/extensions/claude-code-bridge/index.ts +204 -145
package/examples/extensions/claude-code-bridge/package.json +1 -0
package/examples/extensions/interactive-prompts.ts +39 -25
package/examples/extensions/overlay-agent.ts +3 -3
package/examples/extensions/peer-mesh.ts +115 -0
package/examples/extensions/pi-bridge/README.md +16 -0
package/examples/extensions/pi-bridge/index.ts +9 -155
package/examples/extensions/questionnaire.ts +16 -5
package/examples/extensions/subagents.ts +19 -4
package/examples/extensions/terminal-buffer.ts +163 -0
package/examples/extensions/user-shell.ts +136 -0
package/examples/extensions/web-access.ts +8 -0
package/package.json +36 -2
package/dist/agent/tools/display.d.ts +0 -13
package/dist/agent/tools/display.js +0 -70
package/dist/agent/tools/user-shell.d.ts +0 -13
package/dist/agent/tools/user-shell.js +0 -87
package/dist/extensions/shell-recall.d.ts +0 -9
package/dist/extensions/shell-recall.js +0 -8
package/dist/extensions/terminal-buffer.d.ts +0 -14
package/dist/extensions/terminal-buffer.js +0 -134

package/dist/agent/system-prompt.js CHANGED Viewed

@@ -1,18 +1,66 @@
 import * as fs from "node:fs";
 import * as path from "node:path";
 import { fileURLToPath } from "node:url";
-import { discoverSkills } from "./skills.js";
+import { discoverProjectSkills } from "./skills.js";
+/**
+ * Format skills for inline display in prompt.
+ * Shows name, description, and file path so the model can decide immediately
+ * whether to load a skill — no extra round-trip needed.
+ */
+export function formatSkillsBlock(skills) {
+    if (skills.length === 0)
+        return "";
+    return "# Available Skills\n\n"
+        + "Load a skill's full content with read_file on its file path when needed.\n\n"
+        + skills.map(s => `- **${s.name}**: ${s.description}\n  Path: ${s.filePath}`).join("\n\n");
+}
+// Resolve to the user's home-based config dir — user's standing instructions to the agent
+import * as os from "node:os";
+const GLOBAL_AGENTS_MD = path.join(os.homedir(), ".agent-sh", "AGENTS.md");
+// ── File caches ─────────────────────────────────────────────────────
+// Convention files (CLAUDE.md/AGENT.md) are walked synchronously from
+// CWD to root on every query. In practice they almost never change,
+// so a short TTL cache keyed by CWD avoids redundant filesystem walks.
+// The 5-second TTL is short enough to pick up edits quickly but long
+// enough to eliminate repeated walks within a multi-tool agent loop.
+const CACHE_TTL_MS = 5_000;
+/** TTL cache for convention files, keyed by resolved CWD. */
+let conventionCache = null;
+/** TTL cache for global AGENTS.md — changes extremely rarely. */
+let agentsMdCache = null;
+export function loadGlobalAgentsMd() {
+    const now = Date.now();
+    if (agentsMdCache && now < agentsMdCache.expiry) {
+        return agentsMdCache.result;
+    }
+    try {
+        const content = fs.readFileSync(GLOBAL_AGENTS_MD, "utf-8").trim();
+        const result = content || null;
+        agentsMdCache = { result, expiry: now + CACHE_TTL_MS };
+        return result;
+    }
+    catch {
+        agentsMdCache = { result: null, expiry: now + CACHE_TTL_MS };
+        return null;
+    }
+}
 /** Resolve the absolute path to agent-sh's own docs directory. */
-const DOCS_DIR = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "../../docs");
+const CODE_DIR = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "../../");
 /** File names to scan for project conventions (checked in order). */
 const CONVENTION_FILES = ["CLAUDE.md", "AGENT.md"];
 /**
  * Scan from `dir` upward for project convention files.
  * Returns contents ordered root-first (general → specific).
+ * Results are cached for CACHE_TTL_MS, keyed by resolved directory.
  */
 function loadConventionFiles(dir) {
+    const cwd = path.resolve(dir);
+    const now = Date.now();
+    if (conventionCache && conventionCache.cwd === cwd && now < conventionCache.expiry) {
+        return conventionCache.result;
+    }
     const files = [];
-    let current = path.resolve(dir);
+    let current = cwd;
     while (true) {
         for (const name of CONVENTION_FILES) {
             const candidate = path.join(current, name);
@@ -33,40 +81,25 @@ function loadConventionFiles(dir) {
         current = parent;
     }
     files.reverse();
-    return files.map(f => `<!-- ${f.path} -->\n${f.content}`);
+    const result = files.map(f => `<!-- ${f.path} -->\n${f.content}`);
+    conventionCache = { cwd, result, expiry: now + CACHE_TTL_MS };
+    return result;
 }
 /**
  * Static system prompt — identical across all queries, cacheable.
  * Contains only identity and behavioral instructions.
  */
-export const STATIC_SYSTEM_PROMPT = `You are ash, an AI coding assistant embedded in agent-sh, a terminal shell.
+export const STATIC_SYSTEM_PROMPT = `You are an AI coding assistant running inside agent-sh, a terminal shell.
 You have access to the user's shell environment and can read, write, and execute code.
 You share the user's working directory, environment variables, and shell history.
+agent-sh documentation is at ${path.join(CODE_DIR, "docs")} — start with README.md for an index. Read the docs when you need to understand how the runtime works.
 # Tool Decision Guide
-You have three categories of tools — choose based on who needs the output and
-whether the command has lasting effects:
-**Scratchpad tools** (bash, read_file, grep, glob, ls, edit_file, write_file):
+bash, read_file, grep, glob, ls, edit_file, write_file::
 Use these to investigate, search, read, and modify files. Output is returned
 to you for reasoning — the user doesn't see it directly.
-**Display** (display):
-Use this to show output to the user in their terminal. The user sees the
-output directly, but it is NOT returned to you. Use when:
-- The user asks to see something (cat a file, git log, git diff, man page)
-- The output is for the user to read, not for you to process
-**Live shell** (user_shell):
-Use this to run complete, non-interactive commands in the user's real shell. Use for:
-- Commands that affect shell state (cd, export, source)
-- Installing packages, starting servers, running builds
-- Any command where the user wants real side effects
-- Set return_output=true only if you need to inspect the result
-Default to scratchpad tools for your own investigation. Use display when the
-user is the intended audience. Use user_shell when the command has real effects.
+Extensions may register additional tools — follow their instructions.
 # Tool Usage Guidelines
 - Use read_file before editing a file you haven't seen
@@ -75,34 +108,50 @@ user is the intended audience. Use user_shell when the command has real effects.
 - Keep bash commands focused; avoid long-running blocking commands
 - Always check command exit codes for errors
-# Documentation
-agent-sh documentation is available in: ${DOCS_DIR}
-Use read_file on ${DOCS_DIR}/README.md for an index of all docs.`;
+# Preference Learning
+Treat the user's past commands as standing preferences. Before acting, check shell history
+and conversation context for recurring patterns — apply them proactively and do not wait to
+be reminded.`;
 /**
- * Build the dynamic context — injected as a user message before each query.
- * Contains everything that changes: shell context, conventions, cwd.
- *
- * Runs through the "dynamic-context:build" handler so extensions can advise.
+ * CWD-scoped static context: project conventions (CLAUDE.md / AGENT.md)
+ * and discovered skills. Stable for a given cwd — callers should cache
+ * on cwd identity rather than rebuilding per LLM iteration.
  */
-export function buildDynamicContext(contextManager, shellBudgetTokens) {
+export function buildStaticByCwd(cwd) {
     const sections = [];
-    // Project conventions (CLAUDE.md / AGENT.md)
-    const conventions = loadConventionFiles(contextManager.getCwd());
+    const conventions = loadConventionFiles(cwd);
     if (conventions.length > 0) {
         sections.push("# Project Conventions\n\n" + conventions.join("\n\n"));
     }
-    // Skills hint
-    const skills = discoverSkills(contextManager.getCwd());
-    if (skills.length > 0) {
-        sections.push(`You have access to ${skills.length} skill(s). Use the list_skills tool to see them, then read_file to load one.`);
-    }
-    // Shell context — pass token budget converted to bytes (~4 chars/token)
-    const shellBudgetBytes = shellBudgetTokens != null ? shellBudgetTokens * 4 : undefined;
-    const shellContext = contextManager.getContext(shellBudgetBytes);
-    if (shellContext) {
-        sections.push(shellContext);
+    const projectSkills = discoverProjectSkills(cwd);
+    const skillsBlock = formatSkillsBlock(projectSkills);
+    if (skillsBlock) {
+        sections.push(skillsBlock);
     }
-    // Metadata
-    sections.push(`Current date: ${new Date().toISOString().split("T")[0]}\nWorking directory: ${contextManager.getCwd()}`);
     return sections.join("\n\n");
 }
+/**
+ * Per-iteration dynamic context: date, working directory, token usage.
+ * Rebuilt every LLM call. Extension advisors add more sections (budget,
+ * subagents, metacognitive signals, etc.) on top.
+ *
+ * Skills, AGENTS.md, and project conventions live in the system prompt
+ * (see `system-prompt:build` in agent-loop) so they enter the provider's
+ * prefix cache instead of being rebuilt and re-sent every turn.
+ *
+ * Shell context is likewise not injected here — it flows into the
+ * conversation as incremental <shell-events> messages (see
+ * AgentLoop.injectShellDelta) for the same reason.
+ */
+export function buildDynamicContext(contextManager, tokenStatus) {
+    const envLines = [
+        `Current date: ${new Date().toISOString().split("T")[0]}`,
+        `Working directory: ${contextManager.getCwd()}`,
+    ];
+    const usedK = (tokenStatus.promptTokens / 1000).toFixed(1);
+    const maxK = (tokenStatus.contextWindow / 1000).toFixed(0);
+    const pct = Math.min(100, Math.round((tokenStatus.promptTokens / tokenStatus.contextWindow) * 100));
+    envLines.push(`Token usage: ${usedK}k/${maxK}k (${pct}%)`);
+    return `<environment>\n${envLines.join("\n")}\n</environment>`;
+}

package/dist/agent/token-budget.d.ts CHANGED Viewed

@@ -1,13 +1,10 @@
-export declare class TokenBudget {
-    private contextWindow;
-    private toolCount;
-    constructor(contextWindow?: number, toolCount?: number);
-    /** Update when model or tool set changes. */
-    update(contextWindow?: number, toolCount?: number): void;
-    /** Total tokens available for shell context + conversation content. */
-    get contentBudget(): number;
-    /** Token budget for the shell context stream. */
-    get shellBudgetTokens(): number;
-    /** Token budget for the conversation messages stream. */
-    get conversationBudgetTokens(): number;
-}
+/**
+ * Shared token-budget constants used by auto-compaction.
+ *
+ * RESPONSE_RESERVE: tokens reserved for the model's output.
+ * DEFAULT_CONTEXT_WINDOW: fallback when the active mode doesn't declare one.
+ */
+/** Response reserve — tokens reserved for the model's output. */
+export declare const RESPONSE_RESERVE = 8192;
+/** Fallback when contextWindow is unknown. */
+export declare const DEFAULT_CONTEXT_WINDOW = 60000;

package/dist/agent/token-budget.js CHANGED Viewed

@@ -1,50 +1,10 @@
 /**
- * Unified token budget manager.
+ * Shared token-budget constants used by auto-compaction.
  *
- * Splits a model's context window between two streams:
- *   - Shell context (user shell commands and outputs — situational awareness)
- *   - Conversation (agent messages and tool results — task continuity)
- *
- * The budget accounts for fixed overhead (system prompt, tool definitions,
- * response reserve) and divides the remaining space by a configurable ratio.
+ * RESPONSE_RESERVE: tokens reserved for the model's output.
+ * DEFAULT_CONTEXT_WINDOW: fallback when the active mode doesn't declare one.
  */
-import { getSettings } from "../settings.js";
-/** Overhead estimates (tokens). */
-const SYSTEM_PROMPT_OVERHEAD = 800;
-const DYNAMIC_CONTEXT_OVERHEAD = 500; // conventions, metadata, skills list
-const TOKENS_PER_TOOL_DEFINITION = 50;
-const RESPONSE_RESERVE = 8192; // matches llm-client.ts default max_tokens
+/** Response reserve — tokens reserved for the model's output. */
+export const RESPONSE_RESERVE = 8192;
 /** Fallback when contextWindow is unknown. */
-const DEFAULT_CONTEXT_WINDOW = 60_000;
-export class TokenBudget {
-    contextWindow;
-    toolCount;
-    constructor(contextWindow, toolCount = 0) {
-        this.contextWindow = contextWindow ?? DEFAULT_CONTEXT_WINDOW;
-        this.toolCount = toolCount;
-    }
-    /** Update when model or tool set changes. */
-    update(contextWindow, toolCount) {
-        if (contextWindow != null)
-            this.contextWindow = contextWindow;
-        if (toolCount != null)
-            this.toolCount = toolCount;
-    }
-    /** Total tokens available for shell context + conversation content. */
-    get contentBudget() {
-        const overhead = SYSTEM_PROMPT_OVERHEAD +
-            DYNAMIC_CONTEXT_OVERHEAD +
-            this.toolCount * TOKENS_PER_TOOL_DEFINITION +
-            RESPONSE_RESERVE;
-        return Math.max(0, this.contextWindow - overhead);
-    }
-    /** Token budget for the shell context stream. */
-    get shellBudgetTokens() {
-        const ratio = getSettings().shellContextRatio;
-        return Math.floor(this.contentBudget * ratio);
-    }
-    /** Token budget for the conversation messages stream. */
-    get conversationBudgetTokens() {
-        return this.contentBudget - this.shellBudgetTokens;
-    }
-}
+export const DEFAULT_CONTEXT_WINDOW = 60_000;

package/dist/agent/tool-protocol.d.ts CHANGED Viewed

@@ -44,6 +44,12 @@ export interface ToolProtocol {
     recordResults(conv: ConversationState, results: ToolResult[]): void;
     /** Create a stream filter for stripping tool calls from display. null = pass-through. */
     createStreamFilter(toolNames: string[]): StreamFilter | null;
+    /**
+     * Extra tool definitions the protocol wants registered in the tool registry.
+     * Used by deferred-lookup mode to register its `load_tool` meta-tool.
+     * Default: none.
+     */
+    getProtocolTools?(): ToolDefinition[];
 }
 export declare class ApiToolProtocol implements ToolProtocol {
     readonly mode: "api";
@@ -80,4 +86,20 @@ export declare class DeferredToolProtocol implements ToolProtocol {
     recordResults(conv: ConversationState, results: ToolResult[]): void;
     createStreamFilter(): null;
 }
-export declare function createToolProtocol(mode: "api" | "inline" | "deferred"): ToolProtocol;
+export declare class DeferredLookupProtocol implements ToolProtocol {
+    readonly mode: "deferred-lookup";
+    private coreNames;
+    private loadedExt;
+    /** Cache of the current tools list so load_tool's execute can find schemas. */
+    private toolsRef;
+    constructor(coreNames: string[]);
+    getApiTools(tools: ToolDefinition[]): ChatCompletionTool[] | undefined;
+    getToolPrompt(): string;
+    extractToolCalls(_text: string, streamedCalls: PendingToolCall[]): PendingToolCall[];
+    rewriteToolCall(tc: PendingToolCall): PendingToolCall;
+    recordAssistant(conv: ConversationState, text: string, toolCalls: PendingToolCall[]): void;
+    recordResults(conv: ConversationState, results: ToolResult[]): void;
+    createStreamFilter(): null;
+    getProtocolTools(): ToolDefinition[];
+}
+export declare function createToolProtocol(mode: "api" | "inline" | "deferred" | "deferred-lookup"): ToolProtocol;

package/dist/agent/tool-protocol.js CHANGED Viewed

@@ -34,7 +34,7 @@ export class ApiToolProtocol {
     recordResults(conv, results) {
         for (const r of results) {
             const content = r.isError ? `Error: ${r.content}` : r.content;
-            conv.addToolResult(r.callId, content);
+            conv.addToolResult(r.callId, content, r.isError);
         }
     }
     createStreamFilter() {
@@ -363,24 +363,189 @@ export class DeferredToolProtocol {
     recordResults(conv, results) {
         for (const r of results) {
             const content = r.isError ? `Error: ${r.content}` : r.content;
-            conv.addToolResult(r.callId, content);
+            conv.addToolResult(r.callId, content, r.isError);
         }
     }
     createStreamFilter() {
         return null;
     }
 }
+// ── Deferred-lookup mode (load-on-demand with full schema) ──────
+//
+// Like deferred, but instead of wrapping extension calls through a meta-
+// tool dispatcher, we expose a `load_tool` meta-tool that returns the
+// full schema as a tool result AND mutates the protocol's loaded set.
+// Loaded tools become first-class on the NEXT LLM call — the model calls
+// them natively with complete schema fidelity. One round-trip per group
+// of tools loaded, not per call. Prevents the whole class of bugs where
+// models guess arg names from a schema they can only see partially.
+export class DeferredLookupProtocol {
+    mode = "deferred-lookup";
+    coreNames;
+    loadedExt = new Set();
+    /** Cache of the current tools list so load_tool's execute can find schemas. */
+    toolsRef = [];
+    constructor(coreNames) {
+        this.coreNames = new Set(coreNames);
+    }
+    getApiTools(tools) {
+        this.toolsRef = tools;
+        const visible = [];
+        const unloadedExt = [];
+        for (const t of tools) {
+            if (t.name === "load_tool")
+                continue; // rebuilt below with fresh catalog
+            const isCore = this.coreNames.has(t.name);
+            const isLoaded = this.loadedExt.has(t.name);
+            if (isCore || isLoaded) {
+                visible.push({
+                    type: "function",
+                    function: {
+                        name: t.name,
+                        description: t.description,
+                        parameters: t.input_schema,
+                    },
+                });
+            }
+            else {
+                unloadedExt.push(t.name);
+            }
+        }
+        if (unloadedExt.length > 0) {
+            visible.push({
+                type: "function",
+                function: {
+                    name: "load_tool",
+                    description: `Load extension tool schemas so you can call them on the next turn. ` +
+                        `Unloaded: ${unloadedExt.join(", ")}. ` +
+                        `After load_tool succeeds, call those tools directly — not through load_tool again.`,
+                    parameters: {
+                        type: "object",
+                        properties: {
+                            names: {
+                                type: "array",
+                                items: { type: "string" },
+                                description: "Names of extension tools to load.",
+                            },
+                        },
+                        required: ["names"],
+                    },
+                },
+            });
+        }
+        return visible.length > 0 ? visible : undefined;
+    }
+    getToolPrompt() {
+        return "";
+    }
+    extractToolCalls(_text, streamedCalls) {
+        return streamedCalls;
+    }
+    rewriteToolCall(tc) {
+        return tc; // no dispatching needed — load_tool is a real registered tool
+    }
+    recordAssistant(conv, text, toolCalls) {
+        const calls = toolCalls.length
+            ? toolCalls.map((tc) => ({
+                id: tc.id,
+                function: { name: tc.name, arguments: tc.argumentsJson },
+            }))
+            : undefined;
+        conv.addAssistantMessage(text || null, calls);
+    }
+    recordResults(conv, results) {
+        for (const r of results) {
+            const content = r.isError ? `Error: ${r.content}` : r.content;
+            conv.addToolResult(r.callId, content, r.isError);
+        }
+    }
+    createStreamFilter() {
+        return null;
+    }
+    getProtocolTools() {
+        // load_tool is registered as a real tool so the executor can run it
+        // through the normal dispatch path. Its execute closes over the protocol
+        // instance to mutate the loadedExt set and return schemas.
+        const self = this;
+        return [
+            {
+                name: "load_tool",
+                description: "Load extension tool schemas so you can call them natively on the next turn.",
+                input_schema: {
+                    type: "object",
+                    properties: {
+                        names: {
+                            type: "array",
+                            items: { type: "string" },
+                            description: "Names of extension tools to load.",
+                        },
+                    },
+                    required: ["names"],
+                },
+                showOutput: false,
+                async execute(args) {
+                    const names = Array.isArray(args.names) ? args.names : [];
+                    if (names.length === 0) {
+                        return { content: "No tool names provided. Pass { names: [...] }.", exitCode: 1, isError: true };
+                    }
+                    const loaded = [];
+                    const alreadyLoaded = [];
+                    const errors = [];
+                    const sections = [];
+                    for (const name of names) {
+                        const tool = self.toolsRef.find((t) => t.name === name);
+                        if (!tool) {
+                            errors.push(`Unknown tool: ${name}`);
+                            continue;
+                        }
+                        if (self.coreNames.has(name) || name === "load_tool") {
+                            errors.push(`${name} is already available — no need to load.`);
+                            continue;
+                        }
+                        if (self.loadedExt.has(name)) {
+                            alreadyLoaded.push(name);
+                            continue;
+                        }
+                        self.loadedExt.add(name);
+                        loaded.push(name);
+                        sections.push(`## ${name}\n${tool.description}\n\nSchema:\n\`\`\`json\n${JSON.stringify(tool.input_schema, null, 2)}\n\`\`\``);
+                    }
+                    const lines = [];
+                    if (loaded.length > 0) {
+                        lines.push(`Loaded ${loaded.length} tool(s): ${loaded.join(", ")}. ` +
+                            `They are now available as first-class tools on your next turn — call directly.`);
+                        lines.push("");
+                        lines.push(sections.join("\n\n"));
+                    }
+                    if (alreadyLoaded.length > 0) {
+                        lines.push(`Already loaded: ${alreadyLoaded.join(", ")}.`);
+                    }
+                    if (errors.length > 0) {
+                        lines.push(`Errors:\n${errors.map((e) => `- ${e}`).join("\n")}`);
+                    }
+                    return {
+                        content: lines.join("\n") || "Nothing to do.",
+                        exitCode: 0,
+                        isError: loaded.length === 0 && alreadyLoaded.length === 0 && errors.length > 0,
+                    };
+                },
+            },
+        ];
+    }
+}
 // ── Factory ─────────────────────────────────────────────────────
 /** Core tool names — always sent with full schema. */
 const CORE_TOOLS = [
     "bash", "read_file", "write_file", "edit_file",
-    "grep", "glob", "ls", "user_shell", "display",
-    "list_skills", "conversation_recall",
+    "grep", "glob", "ls",
+    "list_skills",
 ];
 export function createToolProtocol(mode) {
     if (mode === "inline")
         return new InlineToolProtocol();
     if (mode === "deferred")
         return new DeferredToolProtocol(CORE_TOOLS);
+    if (mode === "deferred-lookup")
+        return new DeferredLookupProtocol(CORE_TOOLS);
     return new ApiToolProtocol();
 }

package/dist/agent/tools/bash.js CHANGED Viewed

@@ -3,10 +3,10 @@ export function createBashTool(opts) {
     return {
         name: "bash",
         description: "Execute a bash command in an isolated subprocess. Output is captured and returned. " +
-            "Does not affect the user's shell state (use user_shell for cd, export, source). " +
+            "Does not affect the user's shell state. " +
+            "cwd is set to the working directory from the shell context. " +
             "Do NOT use bash for file searching — use grep/glob instead. " +
-            "Do NOT use bash for reading files — use read_file instead. " +
-            "Provide a description parameter to explain what the command does.",
+            "Do NOT use bash for reading files — use read_file instead.",
         input_schema: {
             type: "object",
             properties: {

package/dist/agent/tools/edit-file.js CHANGED Viewed

@@ -1,6 +1,6 @@
 import * as fs from "node:fs/promises";
 import * as path from "node:path";
-import { computeDiff } from "../../utils/diff.js";
+import { computeEditDiff } from "../../utils/diff.js";
 /**
  * Find the closest matching region in the file content to help diagnose
  * why an exact match failed. Returns a hint string.
@@ -103,9 +103,12 @@ export function createEditFileTool(getCwd) {
                     };
                 }
                 const normalizedNew = newText.replace(/\r\n/g, "\n");
-                const newContent = replaceAll
-                    ? normalized.split(normalizedOld).join(normalizedNew)
-                    : normalized.replace(normalizedOld, normalizedNew);
+                // Use split/join for literal replacement everywhere. String.replace()
+                // treats dollar-sign patterns in the replacement as special substitution
+                // variables, which corrupts file content containing regex escape sequences.
+                const newContent = normalized.split(normalizedOld).join(normalizedNew);
+                // Note: when !replaceAll, we rely on the occurrence check above to ensure
+                // normalizedOld appears exactly once, so split/join replaces only that one.
                 // Restore original line endings — only convert if the file was
                 // predominantly CRLF (>50% of line endings), to avoid corrupting
                 // mixed-ending files.
@@ -116,8 +119,8 @@ export function createEditFileTool(getCwd) {
                     ? newContent.replace(/\n/g, "\r\n")
                     : newContent;
                 await fs.writeFile(absPath, finalContent);
-                // Compute and stream diff for display
-                const diff = computeDiff(normalized, newContent);
+                // Compute and stream diff for display (windowed — only diffs the edit region)
+                const diff = computeEditDiff(normalized, normalizedOld, normalizedNew, replaceAll);
                 if (onChunk && diff.hunks.length > 0) {
                     for (const hunk of diff.hunks) {
                         for (const line of hunk.lines) {

package/dist/agent/tools/glob.js CHANGED Viewed

@@ -4,9 +4,11 @@ import { executeCommand } from "../../executor.js";
 export function createGlobTool(getCwd) {
     return {
         name: "glob",
-        description: "Find files by name pattern. Returns paths sorted by modification time (newest first). " +
+        description: "Use this when you know a FILENAME or PATH SHAPE (e.g. `**/*.ts`, `src/**/*.md`, `package.json`). " +
+            "Returns matching file paths sorted by modification time (newest first). " +
+            "This does NOT search file contents — use `grep` for that. " +
             "ALWAYS use this instead of find/ls via bash. " +
-            "Use glob to locate files, then read_file or grep to inspect contents.",
+            "Typical flow: `glob` to locate files, then `read_file` or `grep` to inspect contents.",
         input_schema: {
             type: "object",
             properties: {

package/dist/agent/tools/grep.js CHANGED Viewed

@@ -2,7 +2,9 @@ import { executeCommand } from "../../executor.js";
 export function createGrepTool(getCwd) {
     return {
         name: "grep",
-        description: "Search file contents using ripgrep. ALWAYS use this instead of running grep/rg via bash. " +
+        description: "Use this when you know something INSIDE the file (text, identifier, regex). " +
+            "To find files by filename alone, use `glob` instead. " +
+            "Search file contents using ripgrep. ALWAYS use this instead of running grep/rg via bash. " +
             "Supports three output modes: " +
             "'files_with_matches' (default, returns file paths only — use this to find which files contain a pattern), " +
             "'content' (matching lines with optional context_before/context_after), and " +
@@ -13,7 +15,7 @@ export function createGrepTool(getCwd) {
             properties: {
                 pattern: {
                     type: "string",
-                    description: "Regex pattern to search for",
+                    description: "Regex pattern to search for (NOT a glob — `*.md` is invalid here; use `.*\\.md` for regex, or use the glob tool to find files by name). For filename filtering while searching content, use the `include` parameter.",
                 },
                 path: {
                     type: "string",
@@ -124,12 +126,34 @@ export function createGrepTool(getCwd) {
             });
             await done;
             if (session.exitCode === 1 && !session.output.trim()) {
+                // If the pattern looks like a filename (e.g. "SKILL.md", "package.json"),
+                // the agent probably meant to find files by name, not search inside them.
+                // Surface a redirect hint instead of a silent zero.
+                const looksLikeFilename = /^[A-Za-z0-9_.\-*/]+\.[A-Za-z0-9]{1,6}$/.test(pattern) &&
+                    !/[\\()\[\]|^$+{}]/.test(pattern);
+                const hint = looksLikeFilename
+                    ? ` Hint: "${pattern}" looks like a filename. grep searches file *contents* — to find files by name, use the \`glob\` tool instead.`
+                    : "";
                 return {
-                    content: "No matches found.",
+                    content: `No matches found.${hint}`,
                     exitCode: 0,
                     isError: false,
                 };
             }
+            // exit code >= 2 is a ripgrep error (invalid regex, unreadable path, etc).
+            // Surface it as an error so the model retries with a correct pattern
+            // rather than treating "no useful output" as a successful no-match.
+            if (session.exitCode != null && session.exitCode >= 2) {
+                const looksLikeGlob = /^[*?]|\*\./.test(pattern) && !/[\\()\[\]|^$]/.test(pattern);
+                const hint = looksLikeGlob
+                    ? " Hint: `*.md` is a glob, not a regex — use the glob tool to find files by name, or pass `include: \"*.md\"` here to filter files while searching content for a regex pattern."
+                    : "";
+                return {
+                    content: `grep failed (rg exit ${session.exitCode}): ${session.output.trim() || "no output"}${hint}`,
+                    exitCode: session.exitCode,
+                    isError: true,
+                };
+            }
             let output = session.output;
             // Cap individual line lengths to 500 chars to prevent minified/base64 flood
             if (mode === "content") {