deepagentsdk 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +159 -0
  3. package/package.json +95 -0
  4. package/src/agent.ts +1230 -0
  5. package/src/backends/composite.ts +273 -0
  6. package/src/backends/filesystem.ts +692 -0
  7. package/src/backends/index.ts +22 -0
  8. package/src/backends/local-sandbox.ts +175 -0
  9. package/src/backends/persistent.ts +593 -0
  10. package/src/backends/sandbox.ts +510 -0
  11. package/src/backends/state.ts +244 -0
  12. package/src/backends/utils.ts +287 -0
  13. package/src/checkpointer/file-saver.ts +98 -0
  14. package/src/checkpointer/index.ts +5 -0
  15. package/src/checkpointer/kv-saver.ts +82 -0
  16. package/src/checkpointer/memory-saver.ts +82 -0
  17. package/src/checkpointer/types.ts +125 -0
  18. package/src/cli/components/ApiKeyInput.tsx +300 -0
  19. package/src/cli/components/FilePreview.tsx +237 -0
  20. package/src/cli/components/Input.tsx +277 -0
  21. package/src/cli/components/Message.tsx +93 -0
  22. package/src/cli/components/ModelSelection.tsx +338 -0
  23. package/src/cli/components/SlashMenu.tsx +101 -0
  24. package/src/cli/components/StatusBar.tsx +89 -0
  25. package/src/cli/components/Subagent.tsx +91 -0
  26. package/src/cli/components/TodoList.tsx +133 -0
  27. package/src/cli/components/ToolApproval.tsx +70 -0
  28. package/src/cli/components/ToolCall.tsx +144 -0
  29. package/src/cli/components/ToolCallSummary.tsx +175 -0
  30. package/src/cli/components/Welcome.tsx +75 -0
  31. package/src/cli/components/index.ts +24 -0
  32. package/src/cli/hooks/index.ts +12 -0
  33. package/src/cli/hooks/useAgent.ts +933 -0
  34. package/src/cli/index.tsx +1066 -0
  35. package/src/cli/theme.ts +205 -0
  36. package/src/cli/utils/model-list.ts +365 -0
  37. package/src/constants/errors.ts +29 -0
  38. package/src/constants/limits.ts +195 -0
  39. package/src/index.ts +176 -0
  40. package/src/middleware/agent-memory.ts +330 -0
  41. package/src/prompts.ts +196 -0
  42. package/src/skills/index.ts +2 -0
  43. package/src/skills/load.ts +191 -0
  44. package/src/skills/types.ts +53 -0
  45. package/src/tools/execute.ts +167 -0
  46. package/src/tools/filesystem.ts +418 -0
  47. package/src/tools/index.ts +39 -0
  48. package/src/tools/subagent.ts +443 -0
  49. package/src/tools/todos.ts +101 -0
  50. package/src/tools/web.ts +567 -0
  51. package/src/types/backend.ts +177 -0
  52. package/src/types/core.ts +220 -0
  53. package/src/types/events.ts +429 -0
  54. package/src/types/index.ts +94 -0
  55. package/src/types/structured-output.ts +43 -0
  56. package/src/types/subagent.ts +96 -0
  57. package/src/types.ts +22 -0
  58. package/src/utils/approval.ts +213 -0
  59. package/src/utils/events.ts +416 -0
  60. package/src/utils/eviction.ts +181 -0
  61. package/src/utils/index.ts +34 -0
  62. package/src/utils/model-parser.ts +38 -0
  63. package/src/utils/patch-tool-calls.ts +233 -0
  64. package/src/utils/project-detection.ts +32 -0
  65. package/src/utils/summarization.ts +254 -0
package/src/prompts.ts ADDED
@@ -0,0 +1,196 @@
1
+ /**
2
+ * System prompts for Deep Agent.
3
+ */
4
+
5
+ export const BASE_PROMPT = `In order to complete the objective that the user asks of you, you have access to a number of standard tools.`;
6
+
7
+ export const TODO_SYSTEM_PROMPT = `## \`write_todos\` (task planning)
8
+
9
+ You have access to a \`write_todos\` tool to help you manage and plan tasks. Use this tool whenever you are working on a complex task.
10
+
11
+ ### When to Use This Tool
12
+
13
+ Use proactively for:
14
+ 1. Complex multi-step tasks (3+ distinct steps)
15
+ 2. Non-trivial tasks requiring careful planning
16
+ 3. After receiving new instructions - capture requirements as todos
17
+ 4. After completing tasks - mark complete and add follow-ups
18
+ 5. When starting new tasks - mark as in_progress (ideally only one at a time)
19
+
20
+ ### When NOT to Use
21
+
22
+ Skip for:
23
+ 1. Single, straightforward tasks
24
+ 2. Trivial tasks with no organizational benefit
25
+ 3. Tasks completable in < 3 trivial steps
26
+ 4. Purely conversational/informational requests
27
+
28
+ ### Task States and Management
29
+
30
+ 1. **Task States:**
31
+ - pending: Not yet started
32
+ - in_progress: Currently working on
33
+ - completed: Finished successfully
34
+ - cancelled: No longer needed
35
+
36
+ 2. **Task Management:**
37
+ - Update status in real-time
38
+ - Mark complete IMMEDIATELY after finishing
39
+ - Only ONE task in_progress at a time
40
+ - Complete current tasks before starting new ones`;
41
+
42
+ export const FILESYSTEM_SYSTEM_PROMPT = `## Virtual Filesystem
43
+
44
+ You have access to a virtual filesystem. All file paths must start with a /.
45
+
46
+ - ls: list files in a directory (requires absolute path)
47
+ - read_file: read a file from the filesystem
48
+ - write_file: write to a file in the filesystem
49
+ - edit_file: edit a file in the filesystem
50
+ - glob: find files matching a pattern (e.g., "**/*.py")
51
+ - grep: search for text within files`;
52
+
53
+ export const TASK_SYSTEM_PROMPT = `## \`task\` (subagent spawner)
54
+
55
+ You have access to a \`task\` tool to launch short-lived subagents that handle isolated tasks. These agents are ephemeral — they live only for the duration of the task and return a single result.
56
+
57
+ When to use the task tool:
58
+ - When a task is complex and multi-step, and can be fully delegated in isolation
59
+ - When a task is independent of other tasks and can run in parallel
60
+ - When a task requires focused reasoning or heavy token/context usage that would bloat the orchestrator thread
61
+ - When sandboxing improves reliability (e.g. code execution, structured searches, data formatting)
62
+ - When you only care about the output of the subagent, and not the intermediate steps
63
+
64
+ Subagent lifecycle:
65
+ 1. **Spawn** → Provide clear role, instructions, and expected output
66
+ 2. **Run** → The subagent completes the task autonomously
67
+ 3. **Return** → The subagent provides a single structured result
68
+ 4. **Reconcile** → Incorporate or synthesize the result into the main thread
69
+
70
+ When NOT to use the task tool:
71
+ - If you need to see the intermediate reasoning or steps after the subagent has completed (the task tool hides them)
72
+ - If the task is trivial (a few tool calls or simple lookup)
73
+ - If delegating does not reduce token usage, complexity, or context switching
74
+ - If splitting would add latency without benefit
75
+
76
+ ## Important Task Tool Usage Notes
77
+ - Whenever possible, parallelize the work that you do. Whenever you have independent steps to complete - kick off tasks (subagents) in parallel to accomplish them faster.
78
+ - Remember to use the \`task\` tool to silo independent tasks within a multi-part objective.
79
+ - You should use the \`task\` tool whenever you have a complex task that will take multiple steps, and is independent from other tasks that the agent needs to complete.`;
80
+
81
+ /**
82
+ * Get the task tool description with available subagent types.
83
+ */
84
+ export function getTaskToolDescription(subagentDescriptions: string[]): string {
85
+ return `
86
+ Launch an ephemeral subagent to handle complex, multi-step independent tasks with isolated context windows.
87
+
88
+ Available agent types and the tools they have access to:
89
+ ${subagentDescriptions.join("\n")}
90
+
91
+ When using the Task tool, you must specify a subagent_type parameter to select which agent type to use.
92
+
93
+ ## Usage notes:
94
+ 1. Launch multiple agents concurrently whenever possible, to maximize performance; to do that, use a single message with multiple tool uses
95
+ 2. When the agent is done, it will return a single message back to you. The result returned by the agent is not visible to the user. To show the user the result, you should send a text message back to the user with a concise summary of the result.
96
+ 3. Each agent invocation is stateless. You will not be able to send additional messages to the agent, nor will the agent be able to communicate with you outside of its final report. Therefore, your prompt should contain a highly detailed task description for the agent to perform autonomously and you should specify exactly what information the agent should return back to you in its final and only message to you.
97
+ 4. The agent's outputs should generally be trusted
98
+ 5. Clearly tell the agent whether you expect it to create content, perform analysis, or just do research (search, file reads, web fetches, etc.), since it is not aware of the user's intent
99
+ 6. If the agent description mentions that it should be used proactively, then you should try your best to use it without the user having to ask for it first. Use your judgement.
100
+ 7. When only the general-purpose agent is provided, you should use it for all tasks. It is great for isolating context and token usage, and completing specific, complex tasks, as it has all the same capabilities as the main agent.
101
+
102
+ ### Example usage of the general-purpose agent:
103
+
104
+ <example_agent_descriptions>
105
+ "general-purpose": use this agent for general purpose tasks, it has access to all tools as the main agent.
106
+ </example_agent_descriptions>
107
+
108
+ <example>
109
+ User: "I want to conduct research on the accomplishments of Lebron James, Michael Jordan, and Kobe Bryant, and then compare them."
110
+ Assistant: *Uses the task tool in parallel to conduct isolated research on each of the three players*
111
+ Assistant: *Synthesizes the results of the three isolated research tasks and responds to the User*
112
+ <commentary>
113
+ Research is a complex, multi-step task in it of itself.
114
+ The research of each individual player is not dependent on the research of the other players.
115
+ The assistant uses the task tool to break down the complex objective into three isolated tasks.
116
+ Each research task only needs to worry about context and tokens about one player, then returns synthesized information about each player as the Tool Result.
117
+ This means each research task can dive deep and spend tokens and context deeply researching each player, but the final result is synthesized information, and saves us tokens in the long run when comparing the players to each other.
118
+ </commentary>
119
+ </example>
120
+
121
+ <example>
122
+ User: "Analyze a single large code repository for security vulnerabilities and generate a report."
123
+ Assistant: *Launches a single \`task\` subagent for the repository analysis*
124
+ Assistant: *Receives report and integrates results into final summary*
125
+ <commentary>
126
+ Subagent is used to isolate a large, context-heavy task, even though there is only one. This prevents the main thread from being overloaded with details.
127
+ If the user then asks followup questions, we have a concise report to reference instead of the entire history of analysis and tool calls, which is good and saves us time and money.
128
+ </commentary>
129
+ </example>
130
+ `.trim();
131
+ }
132
+
133
+ export const DEFAULT_GENERAL_PURPOSE_DESCRIPTION =
134
+ "General-purpose agent for researching complex questions, searching for files and content, and executing multi-step tasks. When you are searching for a keyword or file and are not confident that you will find the right match in the first few tries use this agent to perform the search for you. This agent has access to all tools as the main agent.";
135
+
136
+ export const DEFAULT_SUBAGENT_PROMPT =
137
+ "In order to complete the objective that the user asks of you, you have access to a number of standard tools.";
138
+
139
+ export const EXECUTE_SYSTEM_PROMPT = `## \`execute\` (shell command execution)
140
+
141
+ You have access to an \`execute\` tool to run shell commands in the sandbox environment.
142
+
143
+ ### When to Use This Tool
144
+
145
+ Use for:
146
+ - Running build commands (npm install, npm run build, bun install)
147
+ - Running tests (npm test, bun test, pytest)
148
+ - Executing scripts (node script.js, python script.py)
149
+ - Installing dependencies
150
+ - Checking system state (ls, cat, pwd, which)
151
+ - Any shell command that helps accomplish the task
152
+
153
+ ### Important Notes
154
+
155
+ 1. **Exit Codes**: Always check the exit code to determine success
156
+ - 0 = success
157
+ - non-zero = failure
158
+ - null = possibly timed out
159
+
160
+ 2. **Command Chaining**:
161
+ - Use \`&&\` to chain commands that depend on each other
162
+ - Use \`;\` to run commands sequentially regardless of success
163
+
164
+ 3. **Timeouts**: Long-running commands may timeout
165
+
166
+ 4. **Working Directory**: Commands run in the sandbox's working directory`;
167
+
168
+ /**
169
+ * Build skills section for system prompt with progressive disclosure.
170
+ */
171
+ export function buildSkillsPrompt(skills: Array<{ name: string; description: string; path: string }>): string {
172
+ if (skills.length === 0) {
173
+ return '';
174
+ }
175
+
176
+ const skillsList = skills
177
+ .map(skill => `- **${skill.name}**: ${skill.description}\n → Read \`${skill.path}\` for full instructions`)
178
+ .join('\n');
179
+
180
+ return `## Skills System
181
+
182
+ You have access to a skills library providing specialized domain knowledge and workflows.
183
+
184
+ **Available Skills:**
185
+
186
+ ${skillsList}
187
+
188
+ **How to Use Skills (Progressive Disclosure):**
189
+
190
+ 1. **Recognize when a skill applies**: Check if the user's task matches any skill's domain
191
+ 2. **Read the skill's full instructions**: Use read_file to load the SKILL.md content
192
+ 3. **Follow the skill's workflow**: Skills contain step-by-step instructions and examples
193
+ 4. **Access supporting files**: Skills may include helper scripts or configuration files in their directory
194
+
195
+ Skills provide expert knowledge for specialized tasks. Always read the full skill before using it.`;
196
+ }
@@ -0,0 +1,2 @@
1
+ export { listSkills, parseSkillMetadata } from "./load";
2
+ export type { SkillMetadata, SkillLoadOptions } from "./types";
@@ -0,0 +1,191 @@
1
+ import * as fs from "node:fs/promises";
2
+ import * as path from "node:path";
3
+ import os from "node:os";
4
+ import type { SkillMetadata, SkillLoadOptions } from "./types";
5
+ import { findGitRoot } from "../utils/project-detection.js";
6
+
7
+ /**
8
+ * Parse YAML frontmatter from a SKILL.md file.
9
+ *
10
+ * Expected format:
11
+ * ---
12
+ * name: skill-name
13
+ * description: What this skill does
14
+ * ---
15
+ *
16
+ * # Skill Content
17
+ * ...
18
+ */
19
+ export async function parseSkillMetadata(
20
+ skillMdPath: string,
21
+ source: 'user' | 'project'
22
+ ): Promise<SkillMetadata | null> {
23
+ try {
24
+ const content = await fs.readFile(skillMdPath, 'utf-8');
25
+
26
+ // Match YAML frontmatter between --- delimiters
27
+ const frontmatterPattern = /^---\s*\n(.*?)\n---\s*\n/s;
28
+ const match = content.match(frontmatterPattern);
29
+
30
+ if (!match) {
31
+ console.warn(`[Skills] No frontmatter found in ${skillMdPath}`);
32
+ return null;
33
+ }
34
+
35
+ const frontmatter = match[1];
36
+ if (!frontmatter) {
37
+ console.warn(`[Skills] Empty frontmatter in ${skillMdPath}`);
38
+ return null;
39
+ }
40
+
41
+ // Parse key-value pairs from YAML (simple parsing, no full YAML parser needed)
42
+ const metadata: Record<string, string> = {};
43
+ for (const line of frontmatter.split('\n')) {
44
+ const kvMatch = line.match(/^(\w+):\s*(.+)$/);
45
+ if (kvMatch) {
46
+ const [, key, value] = kvMatch;
47
+ if (key && value) {
48
+ metadata[key] = value.trim();
49
+ }
50
+ }
51
+ }
52
+
53
+ // Validate required fields
54
+ if (!metadata.name || !metadata.description) {
55
+ console.warn(
56
+ `[Skills] Missing required fields (name, description) in ${skillMdPath}`
57
+ );
58
+ return null;
59
+ }
60
+
61
+ return {
62
+ name: metadata.name,
63
+ description: metadata.description,
64
+ path: skillMdPath,
65
+ source,
66
+ };
67
+ } catch (error) {
68
+ console.warn(`[Skills] Failed to parse ${skillMdPath}:`, error);
69
+ return null;
70
+ }
71
+ }
72
+
73
+ /**
74
+ * List all skills in a directory.
75
+ * Scans for subdirectories containing SKILL.md files.
76
+ */
77
+ async function listSkillsInDirectory(
78
+ skillsDir: string,
79
+ source: 'user' | 'project'
80
+ ): Promise<SkillMetadata[]> {
81
+ try {
82
+ // Security: Resolve to prevent path traversal
83
+ const resolvedDir = path.resolve(skillsDir);
84
+
85
+ // Check if directory exists
86
+ try {
87
+ const stat = await fs.stat(resolvedDir);
88
+ if (!stat.isDirectory()) {
89
+ return [];
90
+ }
91
+ } catch {
92
+ return []; // Directory doesn't exist
93
+ }
94
+
95
+ const entries = await fs.readdir(resolvedDir, { withFileTypes: true });
96
+ const skills: SkillMetadata[] = [];
97
+
98
+ for (const entry of entries) {
99
+ // Skip non-directories and hidden directories
100
+ if (!entry.isDirectory() || entry.name.startsWith('.')) {
101
+ continue;
102
+ }
103
+
104
+ // Security: Skip symlinks to prevent traversal attacks
105
+ if (entry.isSymbolicLink()) {
106
+ console.warn(
107
+ `[Skills] Skipping symlink: ${path.join(resolvedDir, entry.name)}`
108
+ );
109
+ continue;
110
+ }
111
+
112
+ // Look for SKILL.md in subdirectory
113
+ const skillMdPath = path.join(resolvedDir, entry.name, 'SKILL.md');
114
+
115
+ try {
116
+ await fs.access(skillMdPath);
117
+ const metadata = await parseSkillMetadata(skillMdPath, source);
118
+ if (metadata) {
119
+ skills.push(metadata);
120
+ }
121
+ } catch {
122
+ // SKILL.md doesn't exist in this directory, skip
123
+ continue;
124
+ }
125
+ }
126
+
127
+ return skills;
128
+ } catch (error) {
129
+ console.warn(`[Skills] Failed to list skills in ${skillsDir}:`, error);
130
+ return [];
131
+ }
132
+ }
133
+
134
+ /**
135
+ * List all skills from user and project directories.
136
+ * Project skills override user skills with the same name.
137
+ *
138
+ * Supports two modes:
139
+ * 1. Legacy mode: Use userSkillsDir and projectSkillsDir directly (deprecated)
140
+ * 2. Agent mode: Use agentId to load from ~/.deepagents/{agentId}/skills/ and .deepagents/skills/
141
+ */
142
+ export async function listSkills(
143
+ options: SkillLoadOptions
144
+ ): Promise<SkillMetadata[]> {
145
+ const { userSkillsDir, projectSkillsDir, agentId, workingDirectory } = options;
146
+ const skillsMap = new Map<string, SkillMetadata>();
147
+
148
+ // Determine directories based on mode
149
+ let resolvedUserSkillsDir = userSkillsDir;
150
+ let resolvedProjectSkillsDir = projectSkillsDir;
151
+
152
+ if (agentId) {
153
+ // Agent mode: Load from .deepagents/{agentId}/skills/
154
+ resolvedUserSkillsDir = path.join(os.homedir(), '.deepagents', agentId, 'skills');
155
+
156
+ // Detect project root and use .deepagents/skills/ (shared across agents)
157
+ const gitRoot = await findGitRoot(workingDirectory || process.cwd());
158
+ if (gitRoot) {
159
+ resolvedProjectSkillsDir = path.join(gitRoot, '.deepagents', 'skills');
160
+ }
161
+
162
+ // Show deprecation warning if old params are used alongside agentId
163
+ if (userSkillsDir || projectSkillsDir) {
164
+ console.warn(
165
+ '[Skills] agentId parameter takes precedence over userSkillsDir/projectSkillsDir. ' +
166
+ 'The latter parameters are deprecated and will be ignored.'
167
+ );
168
+ }
169
+ } else if (!userSkillsDir && !projectSkillsDir) {
170
+ // No skills directories provided at all
171
+ return [];
172
+ }
173
+
174
+ // Load user skills first
175
+ if (resolvedUserSkillsDir) {
176
+ const userSkills = await listSkillsInDirectory(resolvedUserSkillsDir, 'user');
177
+ for (const skill of userSkills) {
178
+ skillsMap.set(skill.name, skill);
179
+ }
180
+ }
181
+
182
+ // Load project skills second (override user skills)
183
+ if (resolvedProjectSkillsDir) {
184
+ const projectSkills = await listSkillsInDirectory(resolvedProjectSkillsDir, 'project');
185
+ for (const skill of projectSkills) {
186
+ skillsMap.set(skill.name, skill); // Override user skill if exists
187
+ }
188
+ }
189
+
190
+ return Array.from(skillsMap.values());
191
+ }
@@ -0,0 +1,53 @@
1
+ /**
2
+ * Metadata extracted from SKILL.md frontmatter.
3
+ */
4
+ export interface SkillMetadata {
5
+ /**
6
+ * Unique skill name (kebab-case, e.g., 'web-research')
7
+ */
8
+ name: string;
9
+
10
+ /**
11
+ * Short description of what the skill does
12
+ */
13
+ description: string;
14
+
15
+ /**
16
+ * Absolute path to the SKILL.md file
17
+ */
18
+ path: string;
19
+
20
+ /**
21
+ * Source of the skill ('user' or 'project')
22
+ * Project skills override user skills with same name
23
+ */
24
+ source: 'user' | 'project';
25
+ }
26
+
27
+ /**
28
+ * Options for skill loading
29
+ */
30
+ export interface SkillLoadOptions {
31
+ /**
32
+ * User-level skills directory (e.g., ~/.deepagents/skills/)
33
+ */
34
+ userSkillsDir?: string;
35
+
36
+ /**
37
+ * Project-level skills directory (e.g., ./.deepagents/skills/)
38
+ */
39
+ projectSkillsDir?: string;
40
+
41
+ /**
42
+ * Optional agent ID for loading agent-specific skills.
43
+ * When provided, looks for skills in ~/.deepagents/{agentId}/skills/
44
+ * and .deepagents/skills/ (project-level, shared across agents).
45
+ */
46
+ agentId?: string;
47
+
48
+ /**
49
+ * Optional working directory for detecting project root.
50
+ * Only used when agentId is provided.
51
+ */
52
+ workingDirectory?: string;
53
+ }
@@ -0,0 +1,167 @@
1
+ /**
2
+ * Execute tool for running shell commands in sandbox backends.
3
+ *
4
+ * This tool is only available when the backend implements SandboxBackendProtocol.
5
+ */
6
+
7
+ import { tool } from "ai";
8
+ import { z } from "zod";
9
+ import type { SandboxBackendProtocol, EventCallback } from "../types";
10
+
11
+ /**
12
+ * Tool description for the execute tool.
13
+ */
14
+ const EXECUTE_TOOL_DESCRIPTION = `Execute a shell command in the sandbox environment.
15
+
16
+ Use this tool to:
17
+ - Run build commands (npm install, npm run build, bun install)
18
+ - Run tests (npm test, bun test, pytest)
19
+ - Execute scripts (node script.js, python script.py)
20
+ - Check system state (ls, cat, pwd, which)
21
+ - Install dependencies
22
+ - Run any shell command
23
+
24
+ The command runs in the sandbox's working directory. Commands have a timeout limit.
25
+
26
+ IMPORTANT:
27
+ - Always check the exit code to determine success (0 = success)
28
+ - Long-running commands may timeout
29
+ - Use && to chain commands that depend on each other
30
+ - Use ; to run commands sequentially regardless of success`;
31
+
32
+ /**
33
+ * Options for creating the execute tool.
34
+ */
35
+ export interface CreateExecuteToolOptions {
36
+ /** The sandbox backend to execute commands in */
37
+ backend: SandboxBackendProtocol;
38
+ /** Optional callback for emitting events */
39
+ onEvent?: EventCallback;
40
+ /** Optional custom description for the tool */
41
+ description?: string;
42
+ }
43
+
44
+ /**
45
+ * Create an execute tool for running shell commands.
46
+ *
47
+ * @param options - Options including the sandbox backend and optional event callback
48
+ * @returns An AI SDK tool that executes shell commands
49
+ *
50
+ * @example Basic usage
51
+ * ```typescript
52
+ * import { LocalSandbox, createExecuteTool } from 'deepagentsdk';
53
+ *
54
+ * const sandbox = new LocalSandbox({ cwd: './workspace' });
55
+ * const executeTool = createExecuteTool({ backend: sandbox });
56
+ *
57
+ * // Use with agent
58
+ * const agent = createDeepAgent({
59
+ * model: anthropic('claude-sonnet-4-20250514'),
60
+ * backend: sandbox,
61
+ * tools: { execute: executeTool },
62
+ * });
63
+ * ```
64
+ *
65
+ * @example With event streaming
66
+ * ```typescript
67
+ * const executeTool = createExecuteTool({
68
+ * backend: sandbox,
69
+ * onEvent: (event) => {
70
+ * if (event.type === 'execute-start') {
71
+ * console.log(`Running: ${event.command}`);
72
+ * } else if (event.type === 'execute-finish') {
73
+ * console.log(`Exit code: ${event.exitCode}`);
74
+ * }
75
+ * },
76
+ * });
77
+ * ```
78
+ */
79
+ export function createExecuteTool(options: CreateExecuteToolOptions) {
80
+ const { backend, onEvent, description } = options;
81
+
82
+ return tool({
83
+ description: description || EXECUTE_TOOL_DESCRIPTION,
84
+ inputSchema: z.object({
85
+ command: z
86
+ .string()
87
+ .describe("The shell command to execute (e.g., 'npm install', 'ls -la', 'cat file.txt')"),
88
+ }),
89
+ execute: async ({ command }) => {
90
+ // Emit execute-start event
91
+ if (onEvent) {
92
+ onEvent({
93
+ type: "execute-start",
94
+ command,
95
+ sandboxId: backend.id,
96
+ });
97
+ }
98
+
99
+ // Execute the command
100
+ const result = await backend.execute(command);
101
+
102
+ // Emit execute-finish event
103
+ if (onEvent) {
104
+ onEvent({
105
+ type: "execute-finish",
106
+ command,
107
+ exitCode: result.exitCode,
108
+ truncated: result.truncated,
109
+ sandboxId: backend.id,
110
+ });
111
+ }
112
+
113
+ // Format the response
114
+ const parts: string[] = [];
115
+
116
+ if (result.output) {
117
+ parts.push(result.output);
118
+ }
119
+
120
+ // Add exit code information
121
+ if (result.exitCode === 0) {
122
+ parts.push(`\n[Exit code: 0 (success)]`);
123
+ } else if (result.exitCode !== null) {
124
+ parts.push(`\n[Exit code: ${result.exitCode} (failure)]`);
125
+ } else {
126
+ parts.push(`\n[Exit code: unknown (possibly timed out)]`);
127
+ }
128
+
129
+ // Note if output was truncated
130
+ if (result.truncated) {
131
+ parts.push(`[Output truncated due to size limit]`);
132
+ }
133
+
134
+ return parts.join("");
135
+ },
136
+ });
137
+ }
138
+
139
+ /**
140
+ * Convenience function to create execute tool from just a backend.
141
+ * Useful for simple cases without event handling.
142
+ *
143
+ * @param backend - The sandbox backend
144
+ * @returns An AI SDK tool that executes shell commands
145
+ *
146
+ * @example
147
+ * ```typescript
148
+ * const sandbox = new LocalSandbox({ cwd: './workspace' });
149
+ * const tools = {
150
+ * execute: createExecuteToolFromBackend(sandbox),
151
+ * };
152
+ * ```
153
+ */
154
+ export function createExecuteToolFromBackend(backend: SandboxBackendProtocol) {
155
+ return createExecuteTool({ backend });
156
+ }
157
+
158
+ // ============================================================================
159
+ // Individual Tool Reference
160
+ // ============================================================================
161
+
162
+ /**
163
+ * Individual builtin tool reference for selective subagent configuration.
164
+ * This is a reference to the creator function, not an instance.
165
+ */
166
+ export const execute = createExecuteTool;
167
+