agent-sh 0.9.0 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. package/README.md +25 -30
  2. package/dist/agent/agent-loop.d.ts +43 -6
  3. package/dist/agent/agent-loop.js +817 -157
  4. package/dist/agent/conversation-state.d.ts +72 -21
  5. package/dist/agent/conversation-state.js +364 -151
  6. package/dist/agent/history-file.d.ts +13 -4
  7. package/dist/agent/history-file.js +110 -36
  8. package/dist/agent/nuclear-form.d.ts +28 -3
  9. package/dist/agent/nuclear-form.js +84 -3
  10. package/dist/agent/skills.d.ts +2 -4
  11. package/dist/agent/skills.js +10 -4
  12. package/dist/agent/subagent.d.ts +23 -0
  13. package/dist/agent/subagent.js +53 -11
  14. package/dist/agent/system-prompt.d.ts +34 -1
  15. package/dist/agent/system-prompt.js +96 -47
  16. package/dist/agent/token-budget.d.ts +10 -13
  17. package/dist/agent/token-budget.js +6 -46
  18. package/dist/agent/tool-protocol.d.ts +23 -1
  19. package/dist/agent/tool-protocol.js +169 -4
  20. package/dist/agent/tools/bash.js +3 -3
  21. package/dist/agent/tools/edit-file.js +9 -6
  22. package/dist/agent/tools/glob.js +4 -2
  23. package/dist/agent/tools/grep.js +27 -3
  24. package/dist/agent/tools/ls.js +5 -6
  25. package/dist/agent/types.d.ts +1 -2
  26. package/dist/context-manager.d.ts +16 -19
  27. package/dist/context-manager.js +48 -152
  28. package/dist/core.js +27 -6
  29. package/dist/event-bus.d.ts +59 -3
  30. package/dist/executor.d.ts +4 -3
  31. package/dist/executor.js +18 -15
  32. package/dist/extension-loader.js +75 -17
  33. package/dist/extensions/agent-backend.d.ts +8 -7
  34. package/dist/extensions/agent-backend.js +72 -50
  35. package/dist/extensions/index.js +0 -2
  36. package/dist/extensions/slash-commands.js +14 -9
  37. package/dist/extensions/tui-renderer.js +67 -80
  38. package/dist/index.js +25 -6
  39. package/dist/settings.d.ts +39 -16
  40. package/dist/settings.js +51 -11
  41. package/dist/shell/input-handler.d.ts +2 -1
  42. package/dist/shell/input-handler.js +84 -76
  43. package/dist/shell/shell.js +19 -2
  44. package/dist/types.d.ts +15 -0
  45. package/dist/utils/ansi.d.ts +7 -0
  46. package/dist/utils/ansi.js +69 -8
  47. package/dist/utils/box-frame.js +8 -2
  48. package/dist/utils/compositor.d.ts +5 -0
  49. package/dist/utils/compositor.js +31 -3
  50. package/dist/utils/diff-renderer.d.ts +9 -0
  51. package/dist/utils/diff-renderer.js +221 -143
  52. package/dist/utils/diff.d.ts +21 -2
  53. package/dist/utils/diff.js +165 -89
  54. package/dist/utils/handler-registry.d.ts +5 -0
  55. package/dist/utils/handler-registry.js +6 -0
  56. package/dist/utils/line-editor.d.ts +11 -1
  57. package/dist/utils/line-editor.js +44 -5
  58. package/dist/utils/markdown.js +23 -8
  59. package/dist/utils/package-version.d.ts +1 -0
  60. package/dist/utils/package-version.js +10 -0
  61. package/dist/utils/shell-output-spill.d.ts +2 -0
  62. package/dist/utils/shell-output-spill.js +81 -0
  63. package/dist/utils/tool-display.d.ts +1 -1
  64. package/dist/utils/tool-display.js +4 -4
  65. package/examples/extensions/ash-acp-bridge/src/index.ts +4 -1
  66. package/examples/extensions/ash-mcp-bridge/index.ts +13 -3
  67. package/examples/extensions/claude-code-bridge/README.md +14 -0
  68. package/examples/extensions/claude-code-bridge/index.ts +204 -145
  69. package/examples/extensions/claude-code-bridge/package.json +1 -0
  70. package/examples/extensions/interactive-prompts.ts +39 -25
  71. package/examples/extensions/overlay-agent.ts +3 -3
  72. package/examples/extensions/peer-mesh.ts +115 -0
  73. package/examples/extensions/pi-bridge/README.md +16 -0
  74. package/examples/extensions/pi-bridge/index.ts +9 -155
  75. package/examples/extensions/questionnaire.ts +16 -5
  76. package/examples/extensions/subagents.ts +19 -4
  77. package/examples/extensions/terminal-buffer.ts +163 -0
  78. package/examples/extensions/user-shell.ts +136 -0
  79. package/examples/extensions/web-access.ts +8 -0
  80. package/package.json +36 -2
  81. package/dist/agent/tools/display.d.ts +0 -13
  82. package/dist/agent/tools/display.js +0 -70
  83. package/dist/agent/tools/user-shell.d.ts +0 -13
  84. package/dist/agent/tools/user-shell.js +0 -87
  85. package/dist/extensions/shell-recall.d.ts +0 -9
  86. package/dist/extensions/shell-recall.js +0 -8
  87. package/dist/extensions/terminal-buffer.d.ts +0 -14
  88. package/dist/extensions/terminal-buffer.js +0 -134
@@ -1,18 +1,66 @@
1
1
  import * as fs from "node:fs";
2
2
  import * as path from "node:path";
3
3
  import { fileURLToPath } from "node:url";
4
- import { discoverSkills } from "./skills.js";
4
+ import { discoverProjectSkills } from "./skills.js";
5
+ /**
6
+ * Format skills for inline display in prompt.
7
+ * Shows name, description, and file path so the model can decide immediately
8
+ * whether to load a skill — no extra round-trip needed.
9
+ */
10
+ export function formatSkillsBlock(skills) {
11
+ if (skills.length === 0)
12
+ return "";
13
+ return "# Available Skills\n\n"
14
+ + "Load a skill's full content with read_file on its file path when needed.\n\n"
15
+ + skills.map(s => `- **${s.name}**: ${s.description}\n Path: ${s.filePath}`).join("\n\n");
16
+ }
17
+ // Resolve to the user's home-based config dir — user's standing instructions to the agent
18
+ import * as os from "node:os";
19
+ const GLOBAL_AGENTS_MD = path.join(os.homedir(), ".agent-sh", "AGENTS.md");
20
+ // ── File caches ─────────────────────────────────────────────────────
21
+ // Convention files (CLAUDE.md/AGENT.md) are walked synchronously from
22
+ // CWD to root on every query. In practice they almost never change,
23
+ // so a short TTL cache keyed by CWD avoids redundant filesystem walks.
24
+ // The 5-second TTL is short enough to pick up edits quickly but long
25
+ // enough to eliminate repeated walks within a multi-tool agent loop.
26
+ const CACHE_TTL_MS = 5_000;
27
+ /** TTL cache for convention files, keyed by resolved CWD. */
28
+ let conventionCache = null;
29
+ /** TTL cache for global AGENTS.md — changes extremely rarely. */
30
+ let agentsMdCache = null;
31
+ export function loadGlobalAgentsMd() {
32
+ const now = Date.now();
33
+ if (agentsMdCache && now < agentsMdCache.expiry) {
34
+ return agentsMdCache.result;
35
+ }
36
+ try {
37
+ const content = fs.readFileSync(GLOBAL_AGENTS_MD, "utf-8").trim();
38
+ const result = content || null;
39
+ agentsMdCache = { result, expiry: now + CACHE_TTL_MS };
40
+ return result;
41
+ }
42
+ catch {
43
+ agentsMdCache = { result: null, expiry: now + CACHE_TTL_MS };
44
+ return null;
45
+ }
46
+ }
5
47
  /** Resolve the absolute path to agent-sh's own docs directory. */
6
- const DOCS_DIR = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "../../docs");
48
+ const CODE_DIR = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "../../");
7
49
  /** File names to scan for project conventions (checked in order). */
8
50
  const CONVENTION_FILES = ["CLAUDE.md", "AGENT.md"];
9
51
  /**
10
52
  * Scan from `dir` upward for project convention files.
11
53
  * Returns contents ordered root-first (general → specific).
54
+ * Results are cached for CACHE_TTL_MS, keyed by resolved directory.
12
55
  */
13
56
  function loadConventionFiles(dir) {
57
+ const cwd = path.resolve(dir);
58
+ const now = Date.now();
59
+ if (conventionCache && conventionCache.cwd === cwd && now < conventionCache.expiry) {
60
+ return conventionCache.result;
61
+ }
14
62
  const files = [];
15
- let current = path.resolve(dir);
63
+ let current = cwd;
16
64
  while (true) {
17
65
  for (const name of CONVENTION_FILES) {
18
66
  const candidate = path.join(current, name);
@@ -33,40 +81,25 @@ function loadConventionFiles(dir) {
33
81
  current = parent;
34
82
  }
35
83
  files.reverse();
36
- return files.map(f => `<!-- ${f.path} -->\n${f.content}`);
84
+ const result = files.map(f => `<!-- ${f.path} -->\n${f.content}`);
85
+ conventionCache = { cwd, result, expiry: now + CACHE_TTL_MS };
86
+ return result;
37
87
  }
38
88
  /**
39
89
  * Static system prompt — identical across all queries, cacheable.
40
90
  * Contains only identity and behavioral instructions.
41
91
  */
42
- export const STATIC_SYSTEM_PROMPT = `You are ash, an AI coding assistant embedded in agent-sh, a terminal shell.
92
+ export const STATIC_SYSTEM_PROMPT = `You are an AI coding assistant running inside agent-sh, a terminal shell.
43
93
  You have access to the user's shell environment and can read, write, and execute code.
44
94
  You share the user's working directory, environment variables, and shell history.
95
+ agent-sh documentation is at ${path.join(CODE_DIR, "docs")} — start with README.md for an index. Read the docs when you need to understand how the runtime works.
45
96
 
46
97
  # Tool Decision Guide
47
-
48
- You have three categories of tools — choose based on who needs the output and
49
- whether the command has lasting effects:
50
-
51
- **Scratchpad tools** (bash, read_file, grep, glob, ls, edit_file, write_file):
98
+ bash, read_file, grep, glob, ls, edit_file, write_file::
52
99
  Use these to investigate, search, read, and modify files. Output is returned
53
100
  to you for reasoning — the user doesn't see it directly.
54
101
 
55
- **Display** (display):
56
- Use this to show output to the user in their terminal. The user sees the
57
- output directly, but it is NOT returned to you. Use when:
58
- - The user asks to see something (cat a file, git log, git diff, man page)
59
- - The output is for the user to read, not for you to process
60
-
61
- **Live shell** (user_shell):
62
- Use this to run complete, non-interactive commands in the user's real shell. Use for:
63
- - Commands that affect shell state (cd, export, source)
64
- - Installing packages, starting servers, running builds
65
- - Any command where the user wants real side effects
66
- - Set return_output=true only if you need to inspect the result
67
-
68
- Default to scratchpad tools for your own investigation. Use display when the
69
- user is the intended audience. Use user_shell when the command has real effects.
102
+ Extensions may register additional tools — follow their instructions.
70
103
 
71
104
  # Tool Usage Guidelines
72
105
  - Use read_file before editing a file you haven't seen
@@ -75,34 +108,50 @@ user is the intended audience. Use user_shell when the command has real effects.
75
108
  - Keep bash commands focused; avoid long-running blocking commands
76
109
  - Always check command exit codes for errors
77
110
 
78
- # Documentation
79
- agent-sh documentation is available in: ${DOCS_DIR}
80
- Use read_file on ${DOCS_DIR}/README.md for an index of all docs.`;
111
+ # Preference Learning
112
+
113
+ Treat the user's past commands as standing preferences. Before acting, check shell history
114
+ and conversation context for recurring patterns — apply them proactively and do not wait to
115
+ be reminded.`;
81
116
  /**
82
- * Build the dynamic context injected as a user message before each query.
83
- * Contains everything that changes: shell context, conventions, cwd.
84
- *
85
- * Runs through the "dynamic-context:build" handler so extensions can advise.
117
+ * CWD-scoped static context: project conventions (CLAUDE.md / AGENT.md)
118
+ * and discovered skills. Stable for a given cwd — callers should cache
119
+ * on cwd identity rather than rebuilding per LLM iteration.
86
120
  */
87
- export function buildDynamicContext(contextManager, shellBudgetTokens) {
121
+ export function buildStaticByCwd(cwd) {
88
122
  const sections = [];
89
- // Project conventions (CLAUDE.md / AGENT.md)
90
- const conventions = loadConventionFiles(contextManager.getCwd());
123
+ const conventions = loadConventionFiles(cwd);
91
124
  if (conventions.length > 0) {
92
125
  sections.push("# Project Conventions\n\n" + conventions.join("\n\n"));
93
126
  }
94
- // Skills hint
95
- const skills = discoverSkills(contextManager.getCwd());
96
- if (skills.length > 0) {
97
- sections.push(`You have access to ${skills.length} skill(s). Use the list_skills tool to see them, then read_file to load one.`);
98
- }
99
- // Shell context — pass token budget converted to bytes (~4 chars/token)
100
- const shellBudgetBytes = shellBudgetTokens != null ? shellBudgetTokens * 4 : undefined;
101
- const shellContext = contextManager.getContext(shellBudgetBytes);
102
- if (shellContext) {
103
- sections.push(shellContext);
127
+ const projectSkills = discoverProjectSkills(cwd);
128
+ const skillsBlock = formatSkillsBlock(projectSkills);
129
+ if (skillsBlock) {
130
+ sections.push(skillsBlock);
104
131
  }
105
- // Metadata
106
- sections.push(`Current date: ${new Date().toISOString().split("T")[0]}\nWorking directory: ${contextManager.getCwd()}`);
107
132
  return sections.join("\n\n");
108
133
  }
134
+ /**
135
+ * Per-iteration dynamic context: date, working directory, token usage.
136
+ * Rebuilt every LLM call. Extension advisors add more sections (budget,
137
+ * subagents, metacognitive signals, etc.) on top.
138
+ *
139
+ * Skills, AGENTS.md, and project conventions live in the system prompt
140
+ * (see `system-prompt:build` in agent-loop) so they enter the provider's
141
+ * prefix cache instead of being rebuilt and re-sent every turn.
142
+ *
143
+ * Shell context is likewise not injected here — it flows into the
144
+ * conversation as incremental <shell-events> messages (see
145
+ * AgentLoop.injectShellDelta) for the same reason.
146
+ */
147
+ export function buildDynamicContext(contextManager, tokenStatus) {
148
+ const envLines = [
149
+ `Current date: ${new Date().toISOString().split("T")[0]}`,
150
+ `Working directory: ${contextManager.getCwd()}`,
151
+ ];
152
+ const usedK = (tokenStatus.promptTokens / 1000).toFixed(1);
153
+ const maxK = (tokenStatus.contextWindow / 1000).toFixed(0);
154
+ const pct = Math.min(100, Math.round((tokenStatus.promptTokens / tokenStatus.contextWindow) * 100));
155
+ envLines.push(`Token usage: ${usedK}k/${maxK}k (${pct}%)`);
156
+ return `<environment>\n${envLines.join("\n")}\n</environment>`;
157
+ }
@@ -1,13 +1,10 @@
1
- export declare class TokenBudget {
2
- private contextWindow;
3
- private toolCount;
4
- constructor(contextWindow?: number, toolCount?: number);
5
- /** Update when model or tool set changes. */
6
- update(contextWindow?: number, toolCount?: number): void;
7
- /** Total tokens available for shell context + conversation content. */
8
- get contentBudget(): number;
9
- /** Token budget for the shell context stream. */
10
- get shellBudgetTokens(): number;
11
- /** Token budget for the conversation messages stream. */
12
- get conversationBudgetTokens(): number;
13
- }
1
+ /**
2
+ * Shared token-budget constants used by auto-compaction.
3
+ *
4
+ * RESPONSE_RESERVE: tokens reserved for the model's output.
5
+ * DEFAULT_CONTEXT_WINDOW: fallback when the active mode doesn't declare one.
6
+ */
7
+ /** Response reserve — tokens reserved for the model's output. */
8
+ export declare const RESPONSE_RESERVE = 8192;
9
+ /** Fallback when contextWindow is unknown. */
10
+ export declare const DEFAULT_CONTEXT_WINDOW = 60000;
@@ -1,50 +1,10 @@
1
1
  /**
2
- * Unified token budget manager.
2
+ * Shared token-budget constants used by auto-compaction.
3
3
  *
4
- * Splits a model's context window between two streams:
5
- * - Shell context (user shell commands and outputs situational awareness)
6
- * - Conversation (agent messages and tool results — task continuity)
7
- *
8
- * The budget accounts for fixed overhead (system prompt, tool definitions,
9
- * response reserve) and divides the remaining space by a configurable ratio.
4
+ * RESPONSE_RESERVE: tokens reserved for the model's output.
5
+ * DEFAULT_CONTEXT_WINDOW: fallback when the active mode doesn't declare one.
10
6
  */
11
- import { getSettings } from "../settings.js";
12
- /** Overhead estimates (tokens). */
13
- const SYSTEM_PROMPT_OVERHEAD = 800;
14
- const DYNAMIC_CONTEXT_OVERHEAD = 500; // conventions, metadata, skills list
15
- const TOKENS_PER_TOOL_DEFINITION = 50;
16
- const RESPONSE_RESERVE = 8192; // matches llm-client.ts default max_tokens
7
+ /** Response reserve tokens reserved for the model's output. */
8
+ export const RESPONSE_RESERVE = 8192;
17
9
  /** Fallback when contextWindow is unknown. */
18
- const DEFAULT_CONTEXT_WINDOW = 60_000;
19
- export class TokenBudget {
20
- contextWindow;
21
- toolCount;
22
- constructor(contextWindow, toolCount = 0) {
23
- this.contextWindow = contextWindow ?? DEFAULT_CONTEXT_WINDOW;
24
- this.toolCount = toolCount;
25
- }
26
- /** Update when model or tool set changes. */
27
- update(contextWindow, toolCount) {
28
- if (contextWindow != null)
29
- this.contextWindow = contextWindow;
30
- if (toolCount != null)
31
- this.toolCount = toolCount;
32
- }
33
- /** Total tokens available for shell context + conversation content. */
34
- get contentBudget() {
35
- const overhead = SYSTEM_PROMPT_OVERHEAD +
36
- DYNAMIC_CONTEXT_OVERHEAD +
37
- this.toolCount * TOKENS_PER_TOOL_DEFINITION +
38
- RESPONSE_RESERVE;
39
- return Math.max(0, this.contextWindow - overhead);
40
- }
41
- /** Token budget for the shell context stream. */
42
- get shellBudgetTokens() {
43
- const ratio = getSettings().shellContextRatio;
44
- return Math.floor(this.contentBudget * ratio);
45
- }
46
- /** Token budget for the conversation messages stream. */
47
- get conversationBudgetTokens() {
48
- return this.contentBudget - this.shellBudgetTokens;
49
- }
50
- }
10
+ export const DEFAULT_CONTEXT_WINDOW = 60_000;
@@ -44,6 +44,12 @@ export interface ToolProtocol {
44
44
  recordResults(conv: ConversationState, results: ToolResult[]): void;
45
45
  /** Create a stream filter for stripping tool calls from display. null = pass-through. */
46
46
  createStreamFilter(toolNames: string[]): StreamFilter | null;
47
+ /**
48
+ * Extra tool definitions the protocol wants registered in the tool registry.
49
+ * Used by deferred-lookup mode to register its `load_tool` meta-tool.
50
+ * Default: none.
51
+ */
52
+ getProtocolTools?(): ToolDefinition[];
47
53
  }
48
54
  export declare class ApiToolProtocol implements ToolProtocol {
49
55
  readonly mode: "api";
@@ -80,4 +86,20 @@ export declare class DeferredToolProtocol implements ToolProtocol {
80
86
  recordResults(conv: ConversationState, results: ToolResult[]): void;
81
87
  createStreamFilter(): null;
82
88
  }
83
- export declare function createToolProtocol(mode: "api" | "inline" | "deferred"): ToolProtocol;
89
+ export declare class DeferredLookupProtocol implements ToolProtocol {
90
+ readonly mode: "deferred-lookup";
91
+ private coreNames;
92
+ private loadedExt;
93
+ /** Cache of the current tools list so load_tool's execute can find schemas. */
94
+ private toolsRef;
95
+ constructor(coreNames: string[]);
96
+ getApiTools(tools: ToolDefinition[]): ChatCompletionTool[] | undefined;
97
+ getToolPrompt(): string;
98
+ extractToolCalls(_text: string, streamedCalls: PendingToolCall[]): PendingToolCall[];
99
+ rewriteToolCall(tc: PendingToolCall): PendingToolCall;
100
+ recordAssistant(conv: ConversationState, text: string, toolCalls: PendingToolCall[]): void;
101
+ recordResults(conv: ConversationState, results: ToolResult[]): void;
102
+ createStreamFilter(): null;
103
+ getProtocolTools(): ToolDefinition[];
104
+ }
105
+ export declare function createToolProtocol(mode: "api" | "inline" | "deferred" | "deferred-lookup"): ToolProtocol;
@@ -34,7 +34,7 @@ export class ApiToolProtocol {
34
34
  recordResults(conv, results) {
35
35
  for (const r of results) {
36
36
  const content = r.isError ? `Error: ${r.content}` : r.content;
37
- conv.addToolResult(r.callId, content);
37
+ conv.addToolResult(r.callId, content, r.isError);
38
38
  }
39
39
  }
40
40
  createStreamFilter() {
@@ -363,24 +363,189 @@ export class DeferredToolProtocol {
363
363
  recordResults(conv, results) {
364
364
  for (const r of results) {
365
365
  const content = r.isError ? `Error: ${r.content}` : r.content;
366
- conv.addToolResult(r.callId, content);
366
+ conv.addToolResult(r.callId, content, r.isError);
367
367
  }
368
368
  }
369
369
  createStreamFilter() {
370
370
  return null;
371
371
  }
372
372
  }
373
+ // ── Deferred-lookup mode (load-on-demand with full schema) ──────
374
+ //
375
+ // Like deferred, but instead of wrapping extension calls through a meta-
376
+ // tool dispatcher, we expose a `load_tool` meta-tool that returns the
377
+ // full schema as a tool result AND mutates the protocol's loaded set.
378
+ // Loaded tools become first-class on the NEXT LLM call — the model calls
379
+ // them natively with complete schema fidelity. One round-trip per group
380
+ // of tools loaded, not per call. Prevents the whole class of bugs where
381
+ // models guess arg names from a schema they can only see partially.
382
+ export class DeferredLookupProtocol {
383
+ mode = "deferred-lookup";
384
+ coreNames;
385
+ loadedExt = new Set();
386
+ /** Cache of the current tools list so load_tool's execute can find schemas. */
387
+ toolsRef = [];
388
+ constructor(coreNames) {
389
+ this.coreNames = new Set(coreNames);
390
+ }
391
+ getApiTools(tools) {
392
+ this.toolsRef = tools;
393
+ const visible = [];
394
+ const unloadedExt = [];
395
+ for (const t of tools) {
396
+ if (t.name === "load_tool")
397
+ continue; // rebuilt below with fresh catalog
398
+ const isCore = this.coreNames.has(t.name);
399
+ const isLoaded = this.loadedExt.has(t.name);
400
+ if (isCore || isLoaded) {
401
+ visible.push({
402
+ type: "function",
403
+ function: {
404
+ name: t.name,
405
+ description: t.description,
406
+ parameters: t.input_schema,
407
+ },
408
+ });
409
+ }
410
+ else {
411
+ unloadedExt.push(t.name);
412
+ }
413
+ }
414
+ if (unloadedExt.length > 0) {
415
+ visible.push({
416
+ type: "function",
417
+ function: {
418
+ name: "load_tool",
419
+ description: `Load extension tool schemas so you can call them on the next turn. ` +
420
+ `Unloaded: ${unloadedExt.join(", ")}. ` +
421
+ `After load_tool succeeds, call those tools directly — not through load_tool again.`,
422
+ parameters: {
423
+ type: "object",
424
+ properties: {
425
+ names: {
426
+ type: "array",
427
+ items: { type: "string" },
428
+ description: "Names of extension tools to load.",
429
+ },
430
+ },
431
+ required: ["names"],
432
+ },
433
+ },
434
+ });
435
+ }
436
+ return visible.length > 0 ? visible : undefined;
437
+ }
438
+ getToolPrompt() {
439
+ return "";
440
+ }
441
+ extractToolCalls(_text, streamedCalls) {
442
+ return streamedCalls;
443
+ }
444
+ rewriteToolCall(tc) {
445
+ return tc; // no dispatching needed — load_tool is a real registered tool
446
+ }
447
+ recordAssistant(conv, text, toolCalls) {
448
+ const calls = toolCalls.length
449
+ ? toolCalls.map((tc) => ({
450
+ id: tc.id,
451
+ function: { name: tc.name, arguments: tc.argumentsJson },
452
+ }))
453
+ : undefined;
454
+ conv.addAssistantMessage(text || null, calls);
455
+ }
456
+ recordResults(conv, results) {
457
+ for (const r of results) {
458
+ const content = r.isError ? `Error: ${r.content}` : r.content;
459
+ conv.addToolResult(r.callId, content, r.isError);
460
+ }
461
+ }
462
+ createStreamFilter() {
463
+ return null;
464
+ }
465
+ getProtocolTools() {
466
+ // load_tool is registered as a real tool so the executor can run it
467
+ // through the normal dispatch path. Its execute closes over the protocol
468
+ // instance to mutate the loadedExt set and return schemas.
469
+ const self = this;
470
+ return [
471
+ {
472
+ name: "load_tool",
473
+ description: "Load extension tool schemas so you can call them natively on the next turn.",
474
+ input_schema: {
475
+ type: "object",
476
+ properties: {
477
+ names: {
478
+ type: "array",
479
+ items: { type: "string" },
480
+ description: "Names of extension tools to load.",
481
+ },
482
+ },
483
+ required: ["names"],
484
+ },
485
+ showOutput: false,
486
+ async execute(args) {
487
+ const names = Array.isArray(args.names) ? args.names : [];
488
+ if (names.length === 0) {
489
+ return { content: "No tool names provided. Pass { names: [...] }.", exitCode: 1, isError: true };
490
+ }
491
+ const loaded = [];
492
+ const alreadyLoaded = [];
493
+ const errors = [];
494
+ const sections = [];
495
+ for (const name of names) {
496
+ const tool = self.toolsRef.find((t) => t.name === name);
497
+ if (!tool) {
498
+ errors.push(`Unknown tool: ${name}`);
499
+ continue;
500
+ }
501
+ if (self.coreNames.has(name) || name === "load_tool") {
502
+ errors.push(`${name} is already available — no need to load.`);
503
+ continue;
504
+ }
505
+ if (self.loadedExt.has(name)) {
506
+ alreadyLoaded.push(name);
507
+ continue;
508
+ }
509
+ self.loadedExt.add(name);
510
+ loaded.push(name);
511
+ sections.push(`## ${name}\n${tool.description}\n\nSchema:\n\`\`\`json\n${JSON.stringify(tool.input_schema, null, 2)}\n\`\`\``);
512
+ }
513
+ const lines = [];
514
+ if (loaded.length > 0) {
515
+ lines.push(`Loaded ${loaded.length} tool(s): ${loaded.join(", ")}. ` +
516
+ `They are now available as first-class tools on your next turn — call directly.`);
517
+ lines.push("");
518
+ lines.push(sections.join("\n\n"));
519
+ }
520
+ if (alreadyLoaded.length > 0) {
521
+ lines.push(`Already loaded: ${alreadyLoaded.join(", ")}.`);
522
+ }
523
+ if (errors.length > 0) {
524
+ lines.push(`Errors:\n${errors.map((e) => `- ${e}`).join("\n")}`);
525
+ }
526
+ return {
527
+ content: lines.join("\n") || "Nothing to do.",
528
+ exitCode: 0,
529
+ isError: loaded.length === 0 && alreadyLoaded.length === 0 && errors.length > 0,
530
+ };
531
+ },
532
+ },
533
+ ];
534
+ }
535
+ }
373
536
  // ── Factory ─────────────────────────────────────────────────────
374
537
  /** Core tool names — always sent with full schema. */
375
538
  const CORE_TOOLS = [
376
539
  "bash", "read_file", "write_file", "edit_file",
377
- "grep", "glob", "ls", "user_shell", "display",
378
- "list_skills", "conversation_recall",
540
+ "grep", "glob", "ls",
541
+ "list_skills",
379
542
  ];
380
543
  export function createToolProtocol(mode) {
381
544
  if (mode === "inline")
382
545
  return new InlineToolProtocol();
383
546
  if (mode === "deferred")
384
547
  return new DeferredToolProtocol(CORE_TOOLS);
548
+ if (mode === "deferred-lookup")
549
+ return new DeferredLookupProtocol(CORE_TOOLS);
385
550
  return new ApiToolProtocol();
386
551
  }
@@ -3,10 +3,10 @@ export function createBashTool(opts) {
3
3
  return {
4
4
  name: "bash",
5
5
  description: "Execute a bash command in an isolated subprocess. Output is captured and returned. " +
6
- "Does not affect the user's shell state (use user_shell for cd, export, source). " +
6
+ "Does not affect the user's shell state. " +
7
+ "cwd is set to the working directory from the shell context. " +
7
8
  "Do NOT use bash for file searching — use grep/glob instead. " +
8
- "Do NOT use bash for reading files — use read_file instead. " +
9
- "Provide a description parameter to explain what the command does.",
9
+ "Do NOT use bash for reading files — use read_file instead.",
10
10
  input_schema: {
11
11
  type: "object",
12
12
  properties: {
@@ -1,6 +1,6 @@
1
1
  import * as fs from "node:fs/promises";
2
2
  import * as path from "node:path";
3
- import { computeDiff } from "../../utils/diff.js";
3
+ import { computeEditDiff } from "../../utils/diff.js";
4
4
  /**
5
5
  * Find the closest matching region in the file content to help diagnose
6
6
  * why an exact match failed. Returns a hint string.
@@ -103,9 +103,12 @@ export function createEditFileTool(getCwd) {
103
103
  };
104
104
  }
105
105
  const normalizedNew = newText.replace(/\r\n/g, "\n");
106
- const newContent = replaceAll
107
- ? normalized.split(normalizedOld).join(normalizedNew)
108
- : normalized.replace(normalizedOld, normalizedNew);
106
+ // Use split/join for literal replacement everywhere. String.replace()
107
+ // treats dollar-sign patterns in the replacement as special substitution
108
+ // variables, which corrupts file content containing regex escape sequences.
109
+ const newContent = normalized.split(normalizedOld).join(normalizedNew);
110
+ // Note: when !replaceAll, we rely on the occurrence check above to ensure
111
+ // normalizedOld appears exactly once, so split/join replaces only that one.
109
112
  // Restore original line endings — only convert if the file was
110
113
  // predominantly CRLF (>50% of line endings), to avoid corrupting
111
114
  // mixed-ending files.
@@ -116,8 +119,8 @@ export function createEditFileTool(getCwd) {
116
119
  ? newContent.replace(/\n/g, "\r\n")
117
120
  : newContent;
118
121
  await fs.writeFile(absPath, finalContent);
119
- // Compute and stream diff for display
120
- const diff = computeDiff(normalized, newContent);
122
+ // Compute and stream diff for display (windowed — only diffs the edit region)
123
+ const diff = computeEditDiff(normalized, normalizedOld, normalizedNew, replaceAll);
121
124
  if (onChunk && diff.hunks.length > 0) {
122
125
  for (const hunk of diff.hunks) {
123
126
  for (const line of hunk.lines) {
@@ -4,9 +4,11 @@ import { executeCommand } from "../../executor.js";
4
4
  export function createGlobTool(getCwd) {
5
5
  return {
6
6
  name: "glob",
7
- description: "Find files by name pattern. Returns paths sorted by modification time (newest first). " +
7
+ description: "Use this when you know a FILENAME or PATH SHAPE (e.g. `**/*.ts`, `src/**/*.md`, `package.json`). " +
8
+ "Returns matching file paths sorted by modification time (newest first). " +
9
+ "This does NOT search file contents — use `grep` for that. " +
8
10
  "ALWAYS use this instead of find/ls via bash. " +
9
- "Use glob to locate files, then read_file or grep to inspect contents.",
11
+ "Typical flow: `glob` to locate files, then `read_file` or `grep` to inspect contents.",
10
12
  input_schema: {
11
13
  type: "object",
12
14
  properties: {
@@ -2,7 +2,9 @@ import { executeCommand } from "../../executor.js";
2
2
  export function createGrepTool(getCwd) {
3
3
  return {
4
4
  name: "grep",
5
- description: "Search file contents using ripgrep. ALWAYS use this instead of running grep/rg via bash. " +
5
+ description: "Use this when you know something INSIDE the file (text, identifier, regex). " +
6
+ "To find files by filename alone, use `glob` instead. " +
7
+ "Search file contents using ripgrep. ALWAYS use this instead of running grep/rg via bash. " +
6
8
  "Supports three output modes: " +
7
9
  "'files_with_matches' (default, returns file paths only — use this to find which files contain a pattern), " +
8
10
  "'content' (matching lines with optional context_before/context_after), and " +
@@ -13,7 +15,7 @@ export function createGrepTool(getCwd) {
13
15
  properties: {
14
16
  pattern: {
15
17
  type: "string",
16
- description: "Regex pattern to search for",
18
+ description: "Regex pattern to search for (NOT a glob — `*.md` is invalid here; use `.*\\.md` for regex, or use the glob tool to find files by name). For filename filtering while searching content, use the `include` parameter.",
17
19
  },
18
20
  path: {
19
21
  type: "string",
@@ -124,12 +126,34 @@ export function createGrepTool(getCwd) {
124
126
  });
125
127
  await done;
126
128
  if (session.exitCode === 1 && !session.output.trim()) {
129
+ // If the pattern looks like a filename (e.g. "SKILL.md", "package.json"),
130
+ // the agent probably meant to find files by name, not search inside them.
131
+ // Surface a redirect hint instead of a silent zero.
132
+ const looksLikeFilename = /^[A-Za-z0-9_.\-*/]+\.[A-Za-z0-9]{1,6}$/.test(pattern) &&
133
+ !/[\\()\[\]|^$+{}]/.test(pattern);
134
+ const hint = looksLikeFilename
135
+ ? ` Hint: "${pattern}" looks like a filename. grep searches file *contents* — to find files by name, use the \`glob\` tool instead.`
136
+ : "";
127
137
  return {
128
- content: "No matches found.",
138
+ content: `No matches found.${hint}`,
129
139
  exitCode: 0,
130
140
  isError: false,
131
141
  };
132
142
  }
143
+ // exit code >= 2 is a ripgrep error (invalid regex, unreadable path, etc).
144
+ // Surface it as an error so the model retries with a correct pattern
145
+ // rather than treating "no useful output" as a successful no-match.
146
+ if (session.exitCode != null && session.exitCode >= 2) {
147
+ const looksLikeGlob = /^[*?]|\*\./.test(pattern) && !/[\\()\[\]|^$]/.test(pattern);
148
+ const hint = looksLikeGlob
149
+ ? " Hint: `*.md` is a glob, not a regex — use the glob tool to find files by name, or pass `include: \"*.md\"` here to filter files while searching content for a regex pattern."
150
+ : "";
151
+ return {
152
+ content: `grep failed (rg exit ${session.exitCode}): ${session.output.trim() || "no output"}${hint}`,
153
+ exitCode: session.exitCode,
154
+ isError: true,
155
+ };
156
+ }
133
157
  let output = session.output;
134
158
  // Cap individual line lengths to 500 chars to prevent minified/base64 flood
135
159
  if (mode === "content") {