agent-sh 0.9.0 → 0.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +25 -30
- package/dist/agent/agent-loop.d.ts +43 -6
- package/dist/agent/agent-loop.js +817 -157
- package/dist/agent/conversation-state.d.ts +72 -21
- package/dist/agent/conversation-state.js +364 -151
- package/dist/agent/history-file.d.ts +13 -4
- package/dist/agent/history-file.js +110 -36
- package/dist/agent/nuclear-form.d.ts +28 -3
- package/dist/agent/nuclear-form.js +84 -3
- package/dist/agent/skills.d.ts +2 -4
- package/dist/agent/skills.js +10 -4
- package/dist/agent/subagent.d.ts +23 -0
- package/dist/agent/subagent.js +53 -11
- package/dist/agent/system-prompt.d.ts +34 -1
- package/dist/agent/system-prompt.js +96 -47
- package/dist/agent/token-budget.d.ts +10 -13
- package/dist/agent/token-budget.js +6 -46
- package/dist/agent/tool-protocol.d.ts +23 -1
- package/dist/agent/tool-protocol.js +169 -4
- package/dist/agent/tools/bash.js +3 -3
- package/dist/agent/tools/edit-file.js +9 -6
- package/dist/agent/tools/glob.js +4 -2
- package/dist/agent/tools/grep.js +27 -3
- package/dist/agent/tools/ls.js +5 -6
- package/dist/agent/types.d.ts +1 -2
- package/dist/context-manager.d.ts +16 -19
- package/dist/context-manager.js +48 -152
- package/dist/core.js +27 -6
- package/dist/event-bus.d.ts +59 -3
- package/dist/executor.d.ts +4 -3
- package/dist/executor.js +18 -15
- package/dist/extension-loader.js +75 -17
- package/dist/extensions/agent-backend.d.ts +8 -7
- package/dist/extensions/agent-backend.js +72 -50
- package/dist/extensions/index.js +0 -2
- package/dist/extensions/slash-commands.js +14 -9
- package/dist/extensions/tui-renderer.js +67 -80
- package/dist/index.js +25 -6
- package/dist/settings.d.ts +39 -16
- package/dist/settings.js +51 -11
- package/dist/shell/input-handler.d.ts +2 -1
- package/dist/shell/input-handler.js +84 -76
- package/dist/shell/shell.js +19 -2
- package/dist/types.d.ts +15 -0
- package/dist/utils/ansi.d.ts +7 -0
- package/dist/utils/ansi.js +69 -8
- package/dist/utils/box-frame.js +8 -2
- package/dist/utils/compositor.d.ts +5 -0
- package/dist/utils/compositor.js +31 -3
- package/dist/utils/diff-renderer.d.ts +9 -0
- package/dist/utils/diff-renderer.js +221 -143
- package/dist/utils/diff.d.ts +21 -2
- package/dist/utils/diff.js +165 -89
- package/dist/utils/handler-registry.d.ts +5 -0
- package/dist/utils/handler-registry.js +6 -0
- package/dist/utils/line-editor.d.ts +11 -1
- package/dist/utils/line-editor.js +44 -5
- package/dist/utils/markdown.js +23 -8
- package/dist/utils/package-version.d.ts +1 -0
- package/dist/utils/package-version.js +10 -0
- package/dist/utils/shell-output-spill.d.ts +2 -0
- package/dist/utils/shell-output-spill.js +81 -0
- package/dist/utils/tool-display.d.ts +1 -1
- package/dist/utils/tool-display.js +4 -4
- package/examples/extensions/ash-acp-bridge/src/index.ts +4 -1
- package/examples/extensions/ash-mcp-bridge/index.ts +13 -3
- package/examples/extensions/claude-code-bridge/README.md +14 -0
- package/examples/extensions/claude-code-bridge/index.ts +204 -145
- package/examples/extensions/claude-code-bridge/package.json +1 -0
- package/examples/extensions/interactive-prompts.ts +39 -25
- package/examples/extensions/overlay-agent.ts +3 -3
- package/examples/extensions/peer-mesh.ts +115 -0
- package/examples/extensions/pi-bridge/README.md +16 -0
- package/examples/extensions/pi-bridge/index.ts +9 -155
- package/examples/extensions/questionnaire.ts +16 -5
- package/examples/extensions/subagents.ts +19 -4
- package/examples/extensions/terminal-buffer.ts +163 -0
- package/examples/extensions/user-shell.ts +136 -0
- package/examples/extensions/web-access.ts +8 -0
- package/package.json +36 -2
- package/dist/agent/tools/display.d.ts +0 -13
- package/dist/agent/tools/display.js +0 -70
- package/dist/agent/tools/user-shell.d.ts +0 -13
- package/dist/agent/tools/user-shell.js +0 -87
- package/dist/extensions/shell-recall.d.ts +0 -9
- package/dist/extensions/shell-recall.js +0 -8
- package/dist/extensions/terminal-buffer.d.ts +0 -14
- package/dist/extensions/terminal-buffer.js +0 -134
|
@@ -1,18 +1,66 @@
|
|
|
1
1
|
import * as fs from "node:fs";
|
|
2
2
|
import * as path from "node:path";
|
|
3
3
|
import { fileURLToPath } from "node:url";
|
|
4
|
-
import {
|
|
4
|
+
import { discoverProjectSkills } from "./skills.js";
|
|
5
|
+
/**
|
|
6
|
+
* Format skills for inline display in prompt.
|
|
7
|
+
* Shows name, description, and file path so the model can decide immediately
|
|
8
|
+
* whether to load a skill — no extra round-trip needed.
|
|
9
|
+
*/
|
|
10
|
+
export function formatSkillsBlock(skills) {
|
|
11
|
+
if (skills.length === 0)
|
|
12
|
+
return "";
|
|
13
|
+
return "# Available Skills\n\n"
|
|
14
|
+
+ "Load a skill's full content with read_file on its file path when needed.\n\n"
|
|
15
|
+
+ skills.map(s => `- **${s.name}**: ${s.description}\n Path: ${s.filePath}`).join("\n\n");
|
|
16
|
+
}
|
|
17
|
+
// Resolve to the user's home-based config dir — user's standing instructions to the agent
|
|
18
|
+
import * as os from "node:os";
|
|
19
|
+
const GLOBAL_AGENTS_MD = path.join(os.homedir(), ".agent-sh", "AGENTS.md");
|
|
20
|
+
// ── File caches ─────────────────────────────────────────────────────
|
|
21
|
+
// Convention files (CLAUDE.md/AGENT.md) are walked synchronously from
|
|
22
|
+
// CWD to root on every query. In practice they almost never change,
|
|
23
|
+
// so a short TTL cache keyed by CWD avoids redundant filesystem walks.
|
|
24
|
+
// The 5-second TTL is short enough to pick up edits quickly but long
|
|
25
|
+
// enough to eliminate repeated walks within a multi-tool agent loop.
|
|
26
|
+
const CACHE_TTL_MS = 5_000;
|
|
27
|
+
/** TTL cache for convention files, keyed by resolved CWD. */
|
|
28
|
+
let conventionCache = null;
|
|
29
|
+
/** TTL cache for global AGENTS.md — changes extremely rarely. */
|
|
30
|
+
let agentsMdCache = null;
|
|
31
|
+
export function loadGlobalAgentsMd() {
|
|
32
|
+
const now = Date.now();
|
|
33
|
+
if (agentsMdCache && now < agentsMdCache.expiry) {
|
|
34
|
+
return agentsMdCache.result;
|
|
35
|
+
}
|
|
36
|
+
try {
|
|
37
|
+
const content = fs.readFileSync(GLOBAL_AGENTS_MD, "utf-8").trim();
|
|
38
|
+
const result = content || null;
|
|
39
|
+
agentsMdCache = { result, expiry: now + CACHE_TTL_MS };
|
|
40
|
+
return result;
|
|
41
|
+
}
|
|
42
|
+
catch {
|
|
43
|
+
agentsMdCache = { result: null, expiry: now + CACHE_TTL_MS };
|
|
44
|
+
return null;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
5
47
|
/** Resolve the absolute path to agent-sh's own docs directory. */
|
|
6
|
-
const
|
|
48
|
+
const CODE_DIR = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "../../");
|
|
7
49
|
/** File names to scan for project conventions (checked in order). */
|
|
8
50
|
const CONVENTION_FILES = ["CLAUDE.md", "AGENT.md"];
|
|
9
51
|
/**
|
|
10
52
|
* Scan from `dir` upward for project convention files.
|
|
11
53
|
* Returns contents ordered root-first (general → specific).
|
|
54
|
+
* Results are cached for CACHE_TTL_MS, keyed by resolved directory.
|
|
12
55
|
*/
|
|
13
56
|
function loadConventionFiles(dir) {
|
|
57
|
+
const cwd = path.resolve(dir);
|
|
58
|
+
const now = Date.now();
|
|
59
|
+
if (conventionCache && conventionCache.cwd === cwd && now < conventionCache.expiry) {
|
|
60
|
+
return conventionCache.result;
|
|
61
|
+
}
|
|
14
62
|
const files = [];
|
|
15
|
-
let current =
|
|
63
|
+
let current = cwd;
|
|
16
64
|
while (true) {
|
|
17
65
|
for (const name of CONVENTION_FILES) {
|
|
18
66
|
const candidate = path.join(current, name);
|
|
@@ -33,40 +81,25 @@ function loadConventionFiles(dir) {
|
|
|
33
81
|
current = parent;
|
|
34
82
|
}
|
|
35
83
|
files.reverse();
|
|
36
|
-
|
|
84
|
+
const result = files.map(f => `<!-- ${f.path} -->\n${f.content}`);
|
|
85
|
+
conventionCache = { cwd, result, expiry: now + CACHE_TTL_MS };
|
|
86
|
+
return result;
|
|
37
87
|
}
|
|
38
88
|
/**
|
|
39
89
|
* Static system prompt — identical across all queries, cacheable.
|
|
40
90
|
* Contains only identity and behavioral instructions.
|
|
41
91
|
*/
|
|
42
|
-
export const STATIC_SYSTEM_PROMPT = `You are
|
|
92
|
+
export const STATIC_SYSTEM_PROMPT = `You are an AI coding assistant running inside agent-sh, a terminal shell.
|
|
43
93
|
You have access to the user's shell environment and can read, write, and execute code.
|
|
44
94
|
You share the user's working directory, environment variables, and shell history.
|
|
95
|
+
agent-sh documentation is at ${path.join(CODE_DIR, "docs")} — start with README.md for an index. Read the docs when you need to understand how the runtime works.
|
|
45
96
|
|
|
46
97
|
# Tool Decision Guide
|
|
47
|
-
|
|
48
|
-
You have three categories of tools — choose based on who needs the output and
|
|
49
|
-
whether the command has lasting effects:
|
|
50
|
-
|
|
51
|
-
**Scratchpad tools** (bash, read_file, grep, glob, ls, edit_file, write_file):
|
|
98
|
+
bash, read_file, grep, glob, ls, edit_file, write_file::
|
|
52
99
|
Use these to investigate, search, read, and modify files. Output is returned
|
|
53
100
|
to you for reasoning — the user doesn't see it directly.
|
|
54
101
|
|
|
55
|
-
|
|
56
|
-
Use this to show output to the user in their terminal. The user sees the
|
|
57
|
-
output directly, but it is NOT returned to you. Use when:
|
|
58
|
-
- The user asks to see something (cat a file, git log, git diff, man page)
|
|
59
|
-
- The output is for the user to read, not for you to process
|
|
60
|
-
|
|
61
|
-
**Live shell** (user_shell):
|
|
62
|
-
Use this to run complete, non-interactive commands in the user's real shell. Use for:
|
|
63
|
-
- Commands that affect shell state (cd, export, source)
|
|
64
|
-
- Installing packages, starting servers, running builds
|
|
65
|
-
- Any command where the user wants real side effects
|
|
66
|
-
- Set return_output=true only if you need to inspect the result
|
|
67
|
-
|
|
68
|
-
Default to scratchpad tools for your own investigation. Use display when the
|
|
69
|
-
user is the intended audience. Use user_shell when the command has real effects.
|
|
102
|
+
Extensions may register additional tools — follow their instructions.
|
|
70
103
|
|
|
71
104
|
# Tool Usage Guidelines
|
|
72
105
|
- Use read_file before editing a file you haven't seen
|
|
@@ -75,34 +108,50 @@ user is the intended audience. Use user_shell when the command has real effects.
|
|
|
75
108
|
- Keep bash commands focused; avoid long-running blocking commands
|
|
76
109
|
- Always check command exit codes for errors
|
|
77
110
|
|
|
78
|
-
#
|
|
79
|
-
|
|
80
|
-
|
|
111
|
+
# Preference Learning
|
|
112
|
+
|
|
113
|
+
Treat the user's past commands as standing preferences. Before acting, check shell history
|
|
114
|
+
and conversation context for recurring patterns — apply them proactively and do not wait to
|
|
115
|
+
be reminded.`;
|
|
81
116
|
/**
|
|
82
|
-
*
|
|
83
|
-
*
|
|
84
|
-
*
|
|
85
|
-
* Runs through the "dynamic-context:build" handler so extensions can advise.
|
|
117
|
+
* CWD-scoped static context: project conventions (CLAUDE.md / AGENT.md)
|
|
118
|
+
* and discovered skills. Stable for a given cwd — callers should cache
|
|
119
|
+
* on cwd identity rather than rebuilding per LLM iteration.
|
|
86
120
|
*/
|
|
87
|
-
export function
|
|
121
|
+
export function buildStaticByCwd(cwd) {
|
|
88
122
|
const sections = [];
|
|
89
|
-
|
|
90
|
-
const conventions = loadConventionFiles(contextManager.getCwd());
|
|
123
|
+
const conventions = loadConventionFiles(cwd);
|
|
91
124
|
if (conventions.length > 0) {
|
|
92
125
|
sections.push("# Project Conventions\n\n" + conventions.join("\n\n"));
|
|
93
126
|
}
|
|
94
|
-
|
|
95
|
-
const
|
|
96
|
-
if (
|
|
97
|
-
sections.push(
|
|
98
|
-
}
|
|
99
|
-
// Shell context — pass token budget converted to bytes (~4 chars/token)
|
|
100
|
-
const shellBudgetBytes = shellBudgetTokens != null ? shellBudgetTokens * 4 : undefined;
|
|
101
|
-
const shellContext = contextManager.getContext(shellBudgetBytes);
|
|
102
|
-
if (shellContext) {
|
|
103
|
-
sections.push(shellContext);
|
|
127
|
+
const projectSkills = discoverProjectSkills(cwd);
|
|
128
|
+
const skillsBlock = formatSkillsBlock(projectSkills);
|
|
129
|
+
if (skillsBlock) {
|
|
130
|
+
sections.push(skillsBlock);
|
|
104
131
|
}
|
|
105
|
-
// Metadata
|
|
106
|
-
sections.push(`Current date: ${new Date().toISOString().split("T")[0]}\nWorking directory: ${contextManager.getCwd()}`);
|
|
107
132
|
return sections.join("\n\n");
|
|
108
133
|
}
|
|
134
|
+
/**
|
|
135
|
+
* Per-iteration dynamic context: date, working directory, token usage.
|
|
136
|
+
* Rebuilt every LLM call. Extension advisors add more sections (budget,
|
|
137
|
+
* subagents, metacognitive signals, etc.) on top.
|
|
138
|
+
*
|
|
139
|
+
* Skills, AGENTS.md, and project conventions live in the system prompt
|
|
140
|
+
* (see `system-prompt:build` in agent-loop) so they enter the provider's
|
|
141
|
+
* prefix cache instead of being rebuilt and re-sent every turn.
|
|
142
|
+
*
|
|
143
|
+
* Shell context is likewise not injected here — it flows into the
|
|
144
|
+
* conversation as incremental <shell-events> messages (see
|
|
145
|
+
* AgentLoop.injectShellDelta) for the same reason.
|
|
146
|
+
*/
|
|
147
|
+
export function buildDynamicContext(contextManager, tokenStatus) {
|
|
148
|
+
const envLines = [
|
|
149
|
+
`Current date: ${new Date().toISOString().split("T")[0]}`,
|
|
150
|
+
`Working directory: ${contextManager.getCwd()}`,
|
|
151
|
+
];
|
|
152
|
+
const usedK = (tokenStatus.promptTokens / 1000).toFixed(1);
|
|
153
|
+
const maxK = (tokenStatus.contextWindow / 1000).toFixed(0);
|
|
154
|
+
const pct = Math.min(100, Math.round((tokenStatus.promptTokens / tokenStatus.contextWindow) * 100));
|
|
155
|
+
envLines.push(`Token usage: ${usedK}k/${maxK}k (${pct}%)`);
|
|
156
|
+
return `<environment>\n${envLines.join("\n")}\n</environment>`;
|
|
157
|
+
}
|
|
@@ -1,13 +1,10 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
/** Token budget for the conversation messages stream. */
|
|
12
|
-
get conversationBudgetTokens(): number;
|
|
13
|
-
}
|
|
1
|
+
/**
|
|
2
|
+
* Shared token-budget constants used by auto-compaction.
|
|
3
|
+
*
|
|
4
|
+
* RESPONSE_RESERVE: tokens reserved for the model's output.
|
|
5
|
+
* DEFAULT_CONTEXT_WINDOW: fallback when the active mode doesn't declare one.
|
|
6
|
+
*/
|
|
7
|
+
/** Response reserve — tokens reserved for the model's output. */
|
|
8
|
+
export declare const RESPONSE_RESERVE = 8192;
|
|
9
|
+
/** Fallback when contextWindow is unknown. */
|
|
10
|
+
export declare const DEFAULT_CONTEXT_WINDOW = 60000;
|
|
@@ -1,50 +1,10 @@
|
|
|
1
1
|
/**
|
|
2
|
-
*
|
|
2
|
+
* Shared token-budget constants used by auto-compaction.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
* - Conversation (agent messages and tool results — task continuity)
|
|
7
|
-
*
|
|
8
|
-
* The budget accounts for fixed overhead (system prompt, tool definitions,
|
|
9
|
-
* response reserve) and divides the remaining space by a configurable ratio.
|
|
4
|
+
* RESPONSE_RESERVE: tokens reserved for the model's output.
|
|
5
|
+
* DEFAULT_CONTEXT_WINDOW: fallback when the active mode doesn't declare one.
|
|
10
6
|
*/
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
const SYSTEM_PROMPT_OVERHEAD = 800;
|
|
14
|
-
const DYNAMIC_CONTEXT_OVERHEAD = 500; // conventions, metadata, skills list
|
|
15
|
-
const TOKENS_PER_TOOL_DEFINITION = 50;
|
|
16
|
-
const RESPONSE_RESERVE = 8192; // matches llm-client.ts default max_tokens
|
|
7
|
+
/** Response reserve — tokens reserved for the model's output. */
|
|
8
|
+
export const RESPONSE_RESERVE = 8192;
|
|
17
9
|
/** Fallback when contextWindow is unknown. */
|
|
18
|
-
const DEFAULT_CONTEXT_WINDOW = 60_000;
|
|
19
|
-
export class TokenBudget {
|
|
20
|
-
contextWindow;
|
|
21
|
-
toolCount;
|
|
22
|
-
constructor(contextWindow, toolCount = 0) {
|
|
23
|
-
this.contextWindow = contextWindow ?? DEFAULT_CONTEXT_WINDOW;
|
|
24
|
-
this.toolCount = toolCount;
|
|
25
|
-
}
|
|
26
|
-
/** Update when model or tool set changes. */
|
|
27
|
-
update(contextWindow, toolCount) {
|
|
28
|
-
if (contextWindow != null)
|
|
29
|
-
this.contextWindow = contextWindow;
|
|
30
|
-
if (toolCount != null)
|
|
31
|
-
this.toolCount = toolCount;
|
|
32
|
-
}
|
|
33
|
-
/** Total tokens available for shell context + conversation content. */
|
|
34
|
-
get contentBudget() {
|
|
35
|
-
const overhead = SYSTEM_PROMPT_OVERHEAD +
|
|
36
|
-
DYNAMIC_CONTEXT_OVERHEAD +
|
|
37
|
-
this.toolCount * TOKENS_PER_TOOL_DEFINITION +
|
|
38
|
-
RESPONSE_RESERVE;
|
|
39
|
-
return Math.max(0, this.contextWindow - overhead);
|
|
40
|
-
}
|
|
41
|
-
/** Token budget for the shell context stream. */
|
|
42
|
-
get shellBudgetTokens() {
|
|
43
|
-
const ratio = getSettings().shellContextRatio;
|
|
44
|
-
return Math.floor(this.contentBudget * ratio);
|
|
45
|
-
}
|
|
46
|
-
/** Token budget for the conversation messages stream. */
|
|
47
|
-
get conversationBudgetTokens() {
|
|
48
|
-
return this.contentBudget - this.shellBudgetTokens;
|
|
49
|
-
}
|
|
50
|
-
}
|
|
10
|
+
export const DEFAULT_CONTEXT_WINDOW = 60_000;
|
|
@@ -44,6 +44,12 @@ export interface ToolProtocol {
|
|
|
44
44
|
recordResults(conv: ConversationState, results: ToolResult[]): void;
|
|
45
45
|
/** Create a stream filter for stripping tool calls from display. null = pass-through. */
|
|
46
46
|
createStreamFilter(toolNames: string[]): StreamFilter | null;
|
|
47
|
+
/**
|
|
48
|
+
* Extra tool definitions the protocol wants registered in the tool registry.
|
|
49
|
+
* Used by deferred-lookup mode to register its `load_tool` meta-tool.
|
|
50
|
+
* Default: none.
|
|
51
|
+
*/
|
|
52
|
+
getProtocolTools?(): ToolDefinition[];
|
|
47
53
|
}
|
|
48
54
|
export declare class ApiToolProtocol implements ToolProtocol {
|
|
49
55
|
readonly mode: "api";
|
|
@@ -80,4 +86,20 @@ export declare class DeferredToolProtocol implements ToolProtocol {
|
|
|
80
86
|
recordResults(conv: ConversationState, results: ToolResult[]): void;
|
|
81
87
|
createStreamFilter(): null;
|
|
82
88
|
}
|
|
83
|
-
export declare
|
|
89
|
+
export declare class DeferredLookupProtocol implements ToolProtocol {
|
|
90
|
+
readonly mode: "deferred-lookup";
|
|
91
|
+
private coreNames;
|
|
92
|
+
private loadedExt;
|
|
93
|
+
/** Cache of the current tools list so load_tool's execute can find schemas. */
|
|
94
|
+
private toolsRef;
|
|
95
|
+
constructor(coreNames: string[]);
|
|
96
|
+
getApiTools(tools: ToolDefinition[]): ChatCompletionTool[] | undefined;
|
|
97
|
+
getToolPrompt(): string;
|
|
98
|
+
extractToolCalls(_text: string, streamedCalls: PendingToolCall[]): PendingToolCall[];
|
|
99
|
+
rewriteToolCall(tc: PendingToolCall): PendingToolCall;
|
|
100
|
+
recordAssistant(conv: ConversationState, text: string, toolCalls: PendingToolCall[]): void;
|
|
101
|
+
recordResults(conv: ConversationState, results: ToolResult[]): void;
|
|
102
|
+
createStreamFilter(): null;
|
|
103
|
+
getProtocolTools(): ToolDefinition[];
|
|
104
|
+
}
|
|
105
|
+
export declare function createToolProtocol(mode: "api" | "inline" | "deferred" | "deferred-lookup"): ToolProtocol;
|
|
@@ -34,7 +34,7 @@ export class ApiToolProtocol {
|
|
|
34
34
|
recordResults(conv, results) {
|
|
35
35
|
for (const r of results) {
|
|
36
36
|
const content = r.isError ? `Error: ${r.content}` : r.content;
|
|
37
|
-
conv.addToolResult(r.callId, content);
|
|
37
|
+
conv.addToolResult(r.callId, content, r.isError);
|
|
38
38
|
}
|
|
39
39
|
}
|
|
40
40
|
createStreamFilter() {
|
|
@@ -363,24 +363,189 @@ export class DeferredToolProtocol {
|
|
|
363
363
|
recordResults(conv, results) {
|
|
364
364
|
for (const r of results) {
|
|
365
365
|
const content = r.isError ? `Error: ${r.content}` : r.content;
|
|
366
|
-
conv.addToolResult(r.callId, content);
|
|
366
|
+
conv.addToolResult(r.callId, content, r.isError);
|
|
367
367
|
}
|
|
368
368
|
}
|
|
369
369
|
createStreamFilter() {
|
|
370
370
|
return null;
|
|
371
371
|
}
|
|
372
372
|
}
|
|
373
|
+
// ── Deferred-lookup mode (load-on-demand with full schema) ──────
|
|
374
|
+
//
|
|
375
|
+
// Like deferred, but instead of wrapping extension calls through a meta-
|
|
376
|
+
// tool dispatcher, we expose a `load_tool` meta-tool that returns the
|
|
377
|
+
// full schema as a tool result AND mutates the protocol's loaded set.
|
|
378
|
+
// Loaded tools become first-class on the NEXT LLM call — the model calls
|
|
379
|
+
// them natively with complete schema fidelity. One round-trip per group
|
|
380
|
+
// of tools loaded, not per call. Prevents the whole class of bugs where
|
|
381
|
+
// models guess arg names from a schema they can only see partially.
|
|
382
|
+
export class DeferredLookupProtocol {
|
|
383
|
+
mode = "deferred-lookup";
|
|
384
|
+
coreNames;
|
|
385
|
+
loadedExt = new Set();
|
|
386
|
+
/** Cache of the current tools list so load_tool's execute can find schemas. */
|
|
387
|
+
toolsRef = [];
|
|
388
|
+
constructor(coreNames) {
|
|
389
|
+
this.coreNames = new Set(coreNames);
|
|
390
|
+
}
|
|
391
|
+
getApiTools(tools) {
|
|
392
|
+
this.toolsRef = tools;
|
|
393
|
+
const visible = [];
|
|
394
|
+
const unloadedExt = [];
|
|
395
|
+
for (const t of tools) {
|
|
396
|
+
if (t.name === "load_tool")
|
|
397
|
+
continue; // rebuilt below with fresh catalog
|
|
398
|
+
const isCore = this.coreNames.has(t.name);
|
|
399
|
+
const isLoaded = this.loadedExt.has(t.name);
|
|
400
|
+
if (isCore || isLoaded) {
|
|
401
|
+
visible.push({
|
|
402
|
+
type: "function",
|
|
403
|
+
function: {
|
|
404
|
+
name: t.name,
|
|
405
|
+
description: t.description,
|
|
406
|
+
parameters: t.input_schema,
|
|
407
|
+
},
|
|
408
|
+
});
|
|
409
|
+
}
|
|
410
|
+
else {
|
|
411
|
+
unloadedExt.push(t.name);
|
|
412
|
+
}
|
|
413
|
+
}
|
|
414
|
+
if (unloadedExt.length > 0) {
|
|
415
|
+
visible.push({
|
|
416
|
+
type: "function",
|
|
417
|
+
function: {
|
|
418
|
+
name: "load_tool",
|
|
419
|
+
description: `Load extension tool schemas so you can call them on the next turn. ` +
|
|
420
|
+
`Unloaded: ${unloadedExt.join(", ")}. ` +
|
|
421
|
+
`After load_tool succeeds, call those tools directly — not through load_tool again.`,
|
|
422
|
+
parameters: {
|
|
423
|
+
type: "object",
|
|
424
|
+
properties: {
|
|
425
|
+
names: {
|
|
426
|
+
type: "array",
|
|
427
|
+
items: { type: "string" },
|
|
428
|
+
description: "Names of extension tools to load.",
|
|
429
|
+
},
|
|
430
|
+
},
|
|
431
|
+
required: ["names"],
|
|
432
|
+
},
|
|
433
|
+
},
|
|
434
|
+
});
|
|
435
|
+
}
|
|
436
|
+
return visible.length > 0 ? visible : undefined;
|
|
437
|
+
}
|
|
438
|
+
getToolPrompt() {
|
|
439
|
+
return "";
|
|
440
|
+
}
|
|
441
|
+
extractToolCalls(_text, streamedCalls) {
|
|
442
|
+
return streamedCalls;
|
|
443
|
+
}
|
|
444
|
+
rewriteToolCall(tc) {
|
|
445
|
+
return tc; // no dispatching needed — load_tool is a real registered tool
|
|
446
|
+
}
|
|
447
|
+
recordAssistant(conv, text, toolCalls) {
|
|
448
|
+
const calls = toolCalls.length
|
|
449
|
+
? toolCalls.map((tc) => ({
|
|
450
|
+
id: tc.id,
|
|
451
|
+
function: { name: tc.name, arguments: tc.argumentsJson },
|
|
452
|
+
}))
|
|
453
|
+
: undefined;
|
|
454
|
+
conv.addAssistantMessage(text || null, calls);
|
|
455
|
+
}
|
|
456
|
+
recordResults(conv, results) {
|
|
457
|
+
for (const r of results) {
|
|
458
|
+
const content = r.isError ? `Error: ${r.content}` : r.content;
|
|
459
|
+
conv.addToolResult(r.callId, content, r.isError);
|
|
460
|
+
}
|
|
461
|
+
}
|
|
462
|
+
createStreamFilter() {
|
|
463
|
+
return null;
|
|
464
|
+
}
|
|
465
|
+
getProtocolTools() {
|
|
466
|
+
// load_tool is registered as a real tool so the executor can run it
|
|
467
|
+
// through the normal dispatch path. Its execute closes over the protocol
|
|
468
|
+
// instance to mutate the loadedExt set and return schemas.
|
|
469
|
+
const self = this;
|
|
470
|
+
return [
|
|
471
|
+
{
|
|
472
|
+
name: "load_tool",
|
|
473
|
+
description: "Load extension tool schemas so you can call them natively on the next turn.",
|
|
474
|
+
input_schema: {
|
|
475
|
+
type: "object",
|
|
476
|
+
properties: {
|
|
477
|
+
names: {
|
|
478
|
+
type: "array",
|
|
479
|
+
items: { type: "string" },
|
|
480
|
+
description: "Names of extension tools to load.",
|
|
481
|
+
},
|
|
482
|
+
},
|
|
483
|
+
required: ["names"],
|
|
484
|
+
},
|
|
485
|
+
showOutput: false,
|
|
486
|
+
async execute(args) {
|
|
487
|
+
const names = Array.isArray(args.names) ? args.names : [];
|
|
488
|
+
if (names.length === 0) {
|
|
489
|
+
return { content: "No tool names provided. Pass { names: [...] }.", exitCode: 1, isError: true };
|
|
490
|
+
}
|
|
491
|
+
const loaded = [];
|
|
492
|
+
const alreadyLoaded = [];
|
|
493
|
+
const errors = [];
|
|
494
|
+
const sections = [];
|
|
495
|
+
for (const name of names) {
|
|
496
|
+
const tool = self.toolsRef.find((t) => t.name === name);
|
|
497
|
+
if (!tool) {
|
|
498
|
+
errors.push(`Unknown tool: ${name}`);
|
|
499
|
+
continue;
|
|
500
|
+
}
|
|
501
|
+
if (self.coreNames.has(name) || name === "load_tool") {
|
|
502
|
+
errors.push(`${name} is already available — no need to load.`);
|
|
503
|
+
continue;
|
|
504
|
+
}
|
|
505
|
+
if (self.loadedExt.has(name)) {
|
|
506
|
+
alreadyLoaded.push(name);
|
|
507
|
+
continue;
|
|
508
|
+
}
|
|
509
|
+
self.loadedExt.add(name);
|
|
510
|
+
loaded.push(name);
|
|
511
|
+
sections.push(`## ${name}\n${tool.description}\n\nSchema:\n\`\`\`json\n${JSON.stringify(tool.input_schema, null, 2)}\n\`\`\``);
|
|
512
|
+
}
|
|
513
|
+
const lines = [];
|
|
514
|
+
if (loaded.length > 0) {
|
|
515
|
+
lines.push(`Loaded ${loaded.length} tool(s): ${loaded.join(", ")}. ` +
|
|
516
|
+
`They are now available as first-class tools on your next turn — call directly.`);
|
|
517
|
+
lines.push("");
|
|
518
|
+
lines.push(sections.join("\n\n"));
|
|
519
|
+
}
|
|
520
|
+
if (alreadyLoaded.length > 0) {
|
|
521
|
+
lines.push(`Already loaded: ${alreadyLoaded.join(", ")}.`);
|
|
522
|
+
}
|
|
523
|
+
if (errors.length > 0) {
|
|
524
|
+
lines.push(`Errors:\n${errors.map((e) => `- ${e}`).join("\n")}`);
|
|
525
|
+
}
|
|
526
|
+
return {
|
|
527
|
+
content: lines.join("\n") || "Nothing to do.",
|
|
528
|
+
exitCode: 0,
|
|
529
|
+
isError: loaded.length === 0 && alreadyLoaded.length === 0 && errors.length > 0,
|
|
530
|
+
};
|
|
531
|
+
},
|
|
532
|
+
},
|
|
533
|
+
];
|
|
534
|
+
}
|
|
535
|
+
}
|
|
373
536
|
// ── Factory ─────────────────────────────────────────────────────
|
|
374
537
|
/** Core tool names — always sent with full schema. */
|
|
375
538
|
const CORE_TOOLS = [
|
|
376
539
|
"bash", "read_file", "write_file", "edit_file",
|
|
377
|
-
"grep", "glob", "ls",
|
|
378
|
-
"list_skills",
|
|
540
|
+
"grep", "glob", "ls",
|
|
541
|
+
"list_skills",
|
|
379
542
|
];
|
|
380
543
|
export function createToolProtocol(mode) {
|
|
381
544
|
if (mode === "inline")
|
|
382
545
|
return new InlineToolProtocol();
|
|
383
546
|
if (mode === "deferred")
|
|
384
547
|
return new DeferredToolProtocol(CORE_TOOLS);
|
|
548
|
+
if (mode === "deferred-lookup")
|
|
549
|
+
return new DeferredLookupProtocol(CORE_TOOLS);
|
|
385
550
|
return new ApiToolProtocol();
|
|
386
551
|
}
|
package/dist/agent/tools/bash.js
CHANGED
|
@@ -3,10 +3,10 @@ export function createBashTool(opts) {
|
|
|
3
3
|
return {
|
|
4
4
|
name: "bash",
|
|
5
5
|
description: "Execute a bash command in an isolated subprocess. Output is captured and returned. " +
|
|
6
|
-
"Does not affect the user's shell state
|
|
6
|
+
"Does not affect the user's shell state. " +
|
|
7
|
+
"cwd is set to the working directory from the shell context. " +
|
|
7
8
|
"Do NOT use bash for file searching — use grep/glob instead. " +
|
|
8
|
-
"Do NOT use bash for reading files — use read_file instead.
|
|
9
|
-
"Provide a description parameter to explain what the command does.",
|
|
9
|
+
"Do NOT use bash for reading files — use read_file instead.",
|
|
10
10
|
input_schema: {
|
|
11
11
|
type: "object",
|
|
12
12
|
properties: {
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import * as fs from "node:fs/promises";
|
|
2
2
|
import * as path from "node:path";
|
|
3
|
-
import {
|
|
3
|
+
import { computeEditDiff } from "../../utils/diff.js";
|
|
4
4
|
/**
|
|
5
5
|
* Find the closest matching region in the file content to help diagnose
|
|
6
6
|
* why an exact match failed. Returns a hint string.
|
|
@@ -103,9 +103,12 @@ export function createEditFileTool(getCwd) {
|
|
|
103
103
|
};
|
|
104
104
|
}
|
|
105
105
|
const normalizedNew = newText.replace(/\r\n/g, "\n");
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
106
|
+
// Use split/join for literal replacement everywhere. String.replace()
|
|
107
|
+
// treats dollar-sign patterns in the replacement as special substitution
|
|
108
|
+
// variables, which corrupts file content containing regex escape sequences.
|
|
109
|
+
const newContent = normalized.split(normalizedOld).join(normalizedNew);
|
|
110
|
+
// Note: when !replaceAll, we rely on the occurrence check above to ensure
|
|
111
|
+
// normalizedOld appears exactly once, so split/join replaces only that one.
|
|
109
112
|
// Restore original line endings — only convert if the file was
|
|
110
113
|
// predominantly CRLF (>50% of line endings), to avoid corrupting
|
|
111
114
|
// mixed-ending files.
|
|
@@ -116,8 +119,8 @@ export function createEditFileTool(getCwd) {
|
|
|
116
119
|
? newContent.replace(/\n/g, "\r\n")
|
|
117
120
|
: newContent;
|
|
118
121
|
await fs.writeFile(absPath, finalContent);
|
|
119
|
-
// Compute and stream diff for display
|
|
120
|
-
const diff =
|
|
122
|
+
// Compute and stream diff for display (windowed — only diffs the edit region)
|
|
123
|
+
const diff = computeEditDiff(normalized, normalizedOld, normalizedNew, replaceAll);
|
|
121
124
|
if (onChunk && diff.hunks.length > 0) {
|
|
122
125
|
for (const hunk of diff.hunks) {
|
|
123
126
|
for (const line of hunk.lines) {
|
package/dist/agent/tools/glob.js
CHANGED
|
@@ -4,9 +4,11 @@ import { executeCommand } from "../../executor.js";
|
|
|
4
4
|
export function createGlobTool(getCwd) {
|
|
5
5
|
return {
|
|
6
6
|
name: "glob",
|
|
7
|
-
description: "
|
|
7
|
+
description: "Use this when you know a FILENAME or PATH SHAPE (e.g. `**/*.ts`, `src/**/*.md`, `package.json`). " +
|
|
8
|
+
"Returns matching file paths sorted by modification time (newest first). " +
|
|
9
|
+
"This does NOT search file contents — use `grep` for that. " +
|
|
8
10
|
"ALWAYS use this instead of find/ls via bash. " +
|
|
9
|
-
"
|
|
11
|
+
"Typical flow: `glob` to locate files, then `read_file` or `grep` to inspect contents.",
|
|
10
12
|
input_schema: {
|
|
11
13
|
type: "object",
|
|
12
14
|
properties: {
|
package/dist/agent/tools/grep.js
CHANGED
|
@@ -2,7 +2,9 @@ import { executeCommand } from "../../executor.js";
|
|
|
2
2
|
export function createGrepTool(getCwd) {
|
|
3
3
|
return {
|
|
4
4
|
name: "grep",
|
|
5
|
-
description: "
|
|
5
|
+
description: "Use this when you know something INSIDE the file (text, identifier, regex). " +
|
|
6
|
+
"To find files by filename alone, use `glob` instead. " +
|
|
7
|
+
"Search file contents using ripgrep. ALWAYS use this instead of running grep/rg via bash. " +
|
|
6
8
|
"Supports three output modes: " +
|
|
7
9
|
"'files_with_matches' (default, returns file paths only — use this to find which files contain a pattern), " +
|
|
8
10
|
"'content' (matching lines with optional context_before/context_after), and " +
|
|
@@ -13,7 +15,7 @@ export function createGrepTool(getCwd) {
|
|
|
13
15
|
properties: {
|
|
14
16
|
pattern: {
|
|
15
17
|
type: "string",
|
|
16
|
-
description: "Regex pattern to search for",
|
|
18
|
+
description: "Regex pattern to search for (NOT a glob — `*.md` is invalid here; use `.*\\.md` for regex, or use the glob tool to find files by name). For filename filtering while searching content, use the `include` parameter.",
|
|
17
19
|
},
|
|
18
20
|
path: {
|
|
19
21
|
type: "string",
|
|
@@ -124,12 +126,34 @@ export function createGrepTool(getCwd) {
|
|
|
124
126
|
});
|
|
125
127
|
await done;
|
|
126
128
|
if (session.exitCode === 1 && !session.output.trim()) {
|
|
129
|
+
// If the pattern looks like a filename (e.g. "SKILL.md", "package.json"),
|
|
130
|
+
// the agent probably meant to find files by name, not search inside them.
|
|
131
|
+
// Surface a redirect hint instead of a silent zero.
|
|
132
|
+
const looksLikeFilename = /^[A-Za-z0-9_.\-*/]+\.[A-Za-z0-9]{1,6}$/.test(pattern) &&
|
|
133
|
+
!/[\\()\[\]|^$+{}]/.test(pattern);
|
|
134
|
+
const hint = looksLikeFilename
|
|
135
|
+
? ` Hint: "${pattern}" looks like a filename. grep searches file *contents* — to find files by name, use the \`glob\` tool instead.`
|
|
136
|
+
: "";
|
|
127
137
|
return {
|
|
128
|
-
content:
|
|
138
|
+
content: `No matches found.${hint}`,
|
|
129
139
|
exitCode: 0,
|
|
130
140
|
isError: false,
|
|
131
141
|
};
|
|
132
142
|
}
|
|
143
|
+
// exit code >= 2 is a ripgrep error (invalid regex, unreadable path, etc).
|
|
144
|
+
// Surface it as an error so the model retries with a correct pattern
|
|
145
|
+
// rather than treating "no useful output" as a successful no-match.
|
|
146
|
+
if (session.exitCode != null && session.exitCode >= 2) {
|
|
147
|
+
const looksLikeGlob = /^[*?]|\*\./.test(pattern) && !/[\\()\[\]|^$]/.test(pattern);
|
|
148
|
+
const hint = looksLikeGlob
|
|
149
|
+
? " Hint: `*.md` is a glob, not a regex — use the glob tool to find files by name, or pass `include: \"*.md\"` here to filter files while searching content for a regex pattern."
|
|
150
|
+
: "";
|
|
151
|
+
return {
|
|
152
|
+
content: `grep failed (rg exit ${session.exitCode}): ${session.output.trim() || "no output"}${hint}`,
|
|
153
|
+
exitCode: session.exitCode,
|
|
154
|
+
isError: true,
|
|
155
|
+
};
|
|
156
|
+
}
|
|
133
157
|
let output = session.output;
|
|
134
158
|
// Cap individual line lengths to 500 chars to prevent minified/base64 flood
|
|
135
159
|
if (mode === "content") {
|