@samrahimi/smol-js 0.7.1 → 0.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +377 -49
- package/dist/cli.js +384 -29
- package/dist/cli.js.map +1 -1
- package/dist/index.d.mts +74 -2
- package/dist/index.d.ts +74 -2
- package/dist/index.js +386 -29
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +383 -27
- package/dist/index.mjs.map +1 -1
- package/package.json +6 -1
package/dist/index.d.mts
CHANGED
|
@@ -165,7 +165,7 @@ interface GenerateOptions {
|
|
|
165
165
|
}
|
|
166
166
|
interface YAMLAgentDefinition {
|
|
167
167
|
name: string;
|
|
168
|
-
type: 'ToolUseAgent' | 'CodeAgent';
|
|
168
|
+
type: 'ToolUseAgent' | 'CodeAgent' | 'TerminalAgent';
|
|
169
169
|
description?: string;
|
|
170
170
|
model?: YAMLModelDefinition;
|
|
171
171
|
tools?: string[];
|
|
@@ -926,6 +926,78 @@ declare class ToolUseAgent extends Agent {
|
|
|
926
926
|
addTool(tool: Tool): void;
|
|
927
927
|
}
|
|
928
928
|
|
|
929
|
+
/**
|
|
930
|
+
* TerminalAgent - Accomplishes tasks by reasoning about and executing shell commands.
|
|
931
|
+
*
|
|
932
|
+
* Unlike CodeAgent (sandboxed JS VM) or ToolUseAgent (structured function calls),
|
|
933
|
+
* TerminalAgent operates by having the LLM emit shell commands in fenced ```sh
|
|
934
|
+
* blocks. Each block is executed on the user's real macOS terminal with full
|
|
935
|
+
* stdout/stderr streaming.
|
|
936
|
+
*
|
|
937
|
+
* Key behaviours:
|
|
938
|
+
* - Verbose logging is always on. The user sees reasoning, pending commands,
|
|
939
|
+
* and live output.
|
|
940
|
+
* - A 5-second delay fires before every command batch so the user can read
|
|
941
|
+
* what is about to run and abort with Ctrl+C.
|
|
942
|
+
* - User-assigned tools are stripped. The agent always has final_answer and
|
|
943
|
+
* any AgentTools (sub-agent delegation) that were wired in via YAML.
|
|
944
|
+
* - Commands run via child_process with stdio piped so output is captured
|
|
945
|
+
* and streamed back to the agent as observations.
|
|
946
|
+
*/
|
|
947
|
+
|
|
948
|
+
interface TerminalAgentConfig extends AgentConfig {
|
|
949
|
+
/**
|
|
950
|
+
* Seconds to wait before executing each command batch.
|
|
951
|
+
* Gives the user time to read pending commands and Ctrl+C.
|
|
952
|
+
* Default: 5
|
|
953
|
+
*/
|
|
954
|
+
commandDelay?: number;
|
|
955
|
+
/**
|
|
956
|
+
* Maximum characters of command output to feed back to the LLM per batch.
|
|
957
|
+
* Very long outputs are truncated (head + tail preserved) to keep context
|
|
958
|
+
* manageable. Default: 8000
|
|
959
|
+
*/
|
|
960
|
+
maxOutputLength?: number;
|
|
961
|
+
}
|
|
962
|
+
declare class TerminalAgent extends Agent {
|
|
963
|
+
private readonly commandDelay;
|
|
964
|
+
private readonly maxOutputLength;
|
|
965
|
+
constructor(config: TerminalAgentConfig);
|
|
966
|
+
/**
|
|
967
|
+
* Build the system prompt. Includes delegation info if sub-agents are present.
|
|
968
|
+
*/
|
|
969
|
+
protected initializeSystemPrompt(): string;
|
|
970
|
+
/**
|
|
971
|
+
* Execute one step of the ReAct loop:
|
|
972
|
+
* 1. Send messages to LLM (with tool defs for final_answer / delegation)
|
|
973
|
+
* 2. Extract reasoning and ```sh blocks from the response
|
|
974
|
+
* 3. If tool calls present (final_answer or delegation), process them
|
|
975
|
+
* 4. Otherwise execute shell commands with the pre-execution delay
|
|
976
|
+
* 5. Feed stdout/stderr/exit-code back as observation
|
|
977
|
+
*/
|
|
978
|
+
protected executeStep(memoryStep: ActionStep): Promise<ActionOutput>;
|
|
979
|
+
/**
|
|
980
|
+
* Run a single shell command, capture stdout/stderr, return structured result.
|
|
981
|
+
*/
|
|
982
|
+
private runCommand;
|
|
983
|
+
/**
|
|
984
|
+
* Truncate long output, preserving head and tail so context stays useful.
|
|
985
|
+
*/
|
|
986
|
+
private truncateOutput;
|
|
987
|
+
/**
|
|
988
|
+
* Generate response, with streaming if available.
|
|
989
|
+
*/
|
|
990
|
+
private generateResponse;
|
|
991
|
+
/**
|
|
992
|
+
* Process tool calls (final_answer or AgentTool delegation).
|
|
993
|
+
*/
|
|
994
|
+
private processToolCalls;
|
|
995
|
+
/**
|
|
996
|
+
* Override: force final answer via tool call format when max steps hit.
|
|
997
|
+
*/
|
|
998
|
+
protected provideFinalAnswer(task: string): Promise<unknown>;
|
|
999
|
+
}
|
|
1000
|
+
|
|
929
1001
|
/**
|
|
930
1002
|
* OpenAI-compatible Model implementation
|
|
931
1003
|
*
|
|
@@ -1514,4 +1586,4 @@ declare class Orchestrator {
|
|
|
1514
1586
|
getRunId(): string | undefined;
|
|
1515
1587
|
}
|
|
1516
1588
|
|
|
1517
|
-
export { type ActionOutput, type ActionStep, Agent, type AgentConfig, type AgentConfig$1 as AgentConfigType, AgentLogger, AgentMemory, AgentTool, type AgentToolConfig, type ChatMessage, CodeAgent, type CodeAgentConfig, type CodeExecutionOutput, CurlTool, type CustomToolMetadata, type DiscoveredTool, ExaGetContentsTool, ExaResearchTool, ExaSearchTool, type ExecutorConfig, FINAL_ANSWER_PROMPT, type FinalAnswerStep, FinalAnswerTool, type GenerateOptions, type JSONAgentEndEvent, type JSONAgentObservationEvent, type JSONAgentStartEvent, type JSONAgentStepEvent, type JSONAgentThinkingEvent, type JSONAgentToolCallEvent, type JSONAgentToolResultEvent, type JSONErrorEvent, type JSONEvent, type JSONEventBase, type JSONEventType, type JSONLogEvent, type JSONOutputConfig, JSONOutputHandler, type JSONRunEndEvent, type JSONRunStartEvent, type JSONWorkflowLoadedEvent, type LoadedWorkflow, LocalExecutor, LogLevel, type MemoryStep, type MemoryStrategy, type MessageRole, Model, type ModelConfig, OpenAIModel, type OpenAIModelConfig, type OpenAIToolDefinition, Orchestrator, type OrchestratorConfig, type OrchestratorEvent, type OutputFormat, type PromptVariables, ProxyTool, type ProxyToolConfig, ReadFileTool, type RunResult, type StreamEvent, type SystemPromptStep, type TaskStep, type Timing, type TokenUsage, Tool, type ToolCall, type ToolCallResult, type ToolInput, type ToolInputType, type ToolInputs, ToolUseAgent, type ToolUseAgentConfig, type ToolUsePromptVariables, UserInputTool, WriteFileTool, type YAMLAgentDefinition, YAMLLoader, type YAMLModelDefinition, type YAMLToolDefinition, type YAMLWorkflowDefinition, agentAsTool, createTool, finalAnswerTool, formatToolDescriptions, generateSystemPrompt, generateToolUseSystemPrompt, getErrorRecoveryPrompt, loadCustomTools, scanCustomTools };
|
|
1589
|
+
export { type ActionOutput, type ActionStep, Agent, type AgentConfig, type AgentConfig$1 as AgentConfigType, AgentLogger, AgentMemory, AgentTool, type AgentToolConfig, type ChatMessage, CodeAgent, type CodeAgentConfig, type CodeExecutionOutput, CurlTool, type CustomToolMetadata, type DiscoveredTool, ExaGetContentsTool, ExaResearchTool, ExaSearchTool, type ExecutorConfig, FINAL_ANSWER_PROMPT, type FinalAnswerStep, FinalAnswerTool, type GenerateOptions, type JSONAgentEndEvent, type JSONAgentObservationEvent, type JSONAgentStartEvent, type JSONAgentStepEvent, type JSONAgentThinkingEvent, type JSONAgentToolCallEvent, type JSONAgentToolResultEvent, type JSONErrorEvent, type JSONEvent, type JSONEventBase, type JSONEventType, type JSONLogEvent, type JSONOutputConfig, JSONOutputHandler, type JSONRunEndEvent, type JSONRunStartEvent, type JSONWorkflowLoadedEvent, type LoadedWorkflow, LocalExecutor, LogLevel, type MemoryStep, type MemoryStrategy, type MessageRole, Model, type ModelConfig, OpenAIModel, type OpenAIModelConfig, type OpenAIToolDefinition, Orchestrator, type OrchestratorConfig, type OrchestratorEvent, type OutputFormat, type PromptVariables, ProxyTool, type ProxyToolConfig, ReadFileTool, type RunResult, type StreamEvent, type SystemPromptStep, type TaskStep, TerminalAgent, type TerminalAgentConfig, type Timing, type TokenUsage, Tool, type ToolCall, type ToolCallResult, type ToolInput, type ToolInputType, type ToolInputs, ToolUseAgent, type ToolUseAgentConfig, type ToolUsePromptVariables, UserInputTool, WriteFileTool, type YAMLAgentDefinition, YAMLLoader, type YAMLModelDefinition, type YAMLToolDefinition, type YAMLWorkflowDefinition, agentAsTool, createTool, finalAnswerTool, formatToolDescriptions, generateSystemPrompt, generateToolUseSystemPrompt, getErrorRecoveryPrompt, loadCustomTools, scanCustomTools };
|
package/dist/index.d.ts
CHANGED
|
@@ -165,7 +165,7 @@ interface GenerateOptions {
|
|
|
165
165
|
}
|
|
166
166
|
interface YAMLAgentDefinition {
|
|
167
167
|
name: string;
|
|
168
|
-
type: 'ToolUseAgent' | 'CodeAgent';
|
|
168
|
+
type: 'ToolUseAgent' | 'CodeAgent' | 'TerminalAgent';
|
|
169
169
|
description?: string;
|
|
170
170
|
model?: YAMLModelDefinition;
|
|
171
171
|
tools?: string[];
|
|
@@ -926,6 +926,78 @@ declare class ToolUseAgent extends Agent {
|
|
|
926
926
|
addTool(tool: Tool): void;
|
|
927
927
|
}
|
|
928
928
|
|
|
929
|
+
/**
|
|
930
|
+
* TerminalAgent - Accomplishes tasks by reasoning about and executing shell commands.
|
|
931
|
+
*
|
|
932
|
+
* Unlike CodeAgent (sandboxed JS VM) or ToolUseAgent (structured function calls),
|
|
933
|
+
* TerminalAgent operates by having the LLM emit shell commands in fenced ```sh
|
|
934
|
+
* blocks. Each block is executed on the user's real macOS terminal with full
|
|
935
|
+
* stdout/stderr streaming.
|
|
936
|
+
*
|
|
937
|
+
* Key behaviours:
|
|
938
|
+
* - Verbose logging is always on. The user sees reasoning, pending commands,
|
|
939
|
+
* and live output.
|
|
940
|
+
* - A 5-second delay fires before every command batch so the user can read
|
|
941
|
+
* what is about to run and abort with Ctrl+C.
|
|
942
|
+
* - User-assigned tools are stripped. The agent always has final_answer and
|
|
943
|
+
* any AgentTools (sub-agent delegation) that were wired in via YAML.
|
|
944
|
+
* - Commands run via child_process with stdio piped so output is captured
|
|
945
|
+
* and streamed back to the agent as observations.
|
|
946
|
+
*/
|
|
947
|
+
|
|
948
|
+
interface TerminalAgentConfig extends AgentConfig {
|
|
949
|
+
/**
|
|
950
|
+
* Seconds to wait before executing each command batch.
|
|
951
|
+
* Gives the user time to read pending commands and Ctrl+C.
|
|
952
|
+
* Default: 5
|
|
953
|
+
*/
|
|
954
|
+
commandDelay?: number;
|
|
955
|
+
/**
|
|
956
|
+
* Maximum characters of command output to feed back to the LLM per batch.
|
|
957
|
+
* Very long outputs are truncated (head + tail preserved) to keep context
|
|
958
|
+
* manageable. Default: 8000
|
|
959
|
+
*/
|
|
960
|
+
maxOutputLength?: number;
|
|
961
|
+
}
|
|
962
|
+
declare class TerminalAgent extends Agent {
|
|
963
|
+
private readonly commandDelay;
|
|
964
|
+
private readonly maxOutputLength;
|
|
965
|
+
constructor(config: TerminalAgentConfig);
|
|
966
|
+
/**
|
|
967
|
+
* Build the system prompt. Includes delegation info if sub-agents are present.
|
|
968
|
+
*/
|
|
969
|
+
protected initializeSystemPrompt(): string;
|
|
970
|
+
/**
|
|
971
|
+
* Execute one step of the ReAct loop:
|
|
972
|
+
* 1. Send messages to LLM (with tool defs for final_answer / delegation)
|
|
973
|
+
* 2. Extract reasoning and ```sh blocks from the response
|
|
974
|
+
* 3. If tool calls present (final_answer or delegation), process them
|
|
975
|
+
* 4. Otherwise execute shell commands with the pre-execution delay
|
|
976
|
+
* 5. Feed stdout/stderr/exit-code back as observation
|
|
977
|
+
*/
|
|
978
|
+
protected executeStep(memoryStep: ActionStep): Promise<ActionOutput>;
|
|
979
|
+
/**
|
|
980
|
+
* Run a single shell command, capture stdout/stderr, return structured result.
|
|
981
|
+
*/
|
|
982
|
+
private runCommand;
|
|
983
|
+
/**
|
|
984
|
+
* Truncate long output, preserving head and tail so context stays useful.
|
|
985
|
+
*/
|
|
986
|
+
private truncateOutput;
|
|
987
|
+
/**
|
|
988
|
+
* Generate response, with streaming if available.
|
|
989
|
+
*/
|
|
990
|
+
private generateResponse;
|
|
991
|
+
/**
|
|
992
|
+
* Process tool calls (final_answer or AgentTool delegation).
|
|
993
|
+
*/
|
|
994
|
+
private processToolCalls;
|
|
995
|
+
/**
|
|
996
|
+
* Override: force final answer via tool call format when max steps hit.
|
|
997
|
+
*/
|
|
998
|
+
protected provideFinalAnswer(task: string): Promise<unknown>;
|
|
999
|
+
}
|
|
1000
|
+
|
|
929
1001
|
/**
|
|
930
1002
|
* OpenAI-compatible Model implementation
|
|
931
1003
|
*
|
|
@@ -1514,4 +1586,4 @@ declare class Orchestrator {
|
|
|
1514
1586
|
getRunId(): string | undefined;
|
|
1515
1587
|
}
|
|
1516
1588
|
|
|
1517
|
-
export { type ActionOutput, type ActionStep, Agent, type AgentConfig, type AgentConfig$1 as AgentConfigType, AgentLogger, AgentMemory, AgentTool, type AgentToolConfig, type ChatMessage, CodeAgent, type CodeAgentConfig, type CodeExecutionOutput, CurlTool, type CustomToolMetadata, type DiscoveredTool, ExaGetContentsTool, ExaResearchTool, ExaSearchTool, type ExecutorConfig, FINAL_ANSWER_PROMPT, type FinalAnswerStep, FinalAnswerTool, type GenerateOptions, type JSONAgentEndEvent, type JSONAgentObservationEvent, type JSONAgentStartEvent, type JSONAgentStepEvent, type JSONAgentThinkingEvent, type JSONAgentToolCallEvent, type JSONAgentToolResultEvent, type JSONErrorEvent, type JSONEvent, type JSONEventBase, type JSONEventType, type JSONLogEvent, type JSONOutputConfig, JSONOutputHandler, type JSONRunEndEvent, type JSONRunStartEvent, type JSONWorkflowLoadedEvent, type LoadedWorkflow, LocalExecutor, LogLevel, type MemoryStep, type MemoryStrategy, type MessageRole, Model, type ModelConfig, OpenAIModel, type OpenAIModelConfig, type OpenAIToolDefinition, Orchestrator, type OrchestratorConfig, type OrchestratorEvent, type OutputFormat, type PromptVariables, ProxyTool, type ProxyToolConfig, ReadFileTool, type RunResult, type StreamEvent, type SystemPromptStep, type TaskStep, type Timing, type TokenUsage, Tool, type ToolCall, type ToolCallResult, type ToolInput, type ToolInputType, type ToolInputs, ToolUseAgent, type ToolUseAgentConfig, type ToolUsePromptVariables, UserInputTool, WriteFileTool, type YAMLAgentDefinition, YAMLLoader, type YAMLModelDefinition, type YAMLToolDefinition, type YAMLWorkflowDefinition, agentAsTool, createTool, finalAnswerTool, formatToolDescriptions, generateSystemPrompt, generateToolUseSystemPrompt, getErrorRecoveryPrompt, loadCustomTools, scanCustomTools };
|
|
1589
|
+
export { type ActionOutput, type ActionStep, Agent, type AgentConfig, type AgentConfig$1 as AgentConfigType, AgentLogger, AgentMemory, AgentTool, type AgentToolConfig, type ChatMessage, CodeAgent, type CodeAgentConfig, type CodeExecutionOutput, CurlTool, type CustomToolMetadata, type DiscoveredTool, ExaGetContentsTool, ExaResearchTool, ExaSearchTool, type ExecutorConfig, FINAL_ANSWER_PROMPT, type FinalAnswerStep, FinalAnswerTool, type GenerateOptions, type JSONAgentEndEvent, type JSONAgentObservationEvent, type JSONAgentStartEvent, type JSONAgentStepEvent, type JSONAgentThinkingEvent, type JSONAgentToolCallEvent, type JSONAgentToolResultEvent, type JSONErrorEvent, type JSONEvent, type JSONEventBase, type JSONEventType, type JSONLogEvent, type JSONOutputConfig, JSONOutputHandler, type JSONRunEndEvent, type JSONRunStartEvent, type JSONWorkflowLoadedEvent, type LoadedWorkflow, LocalExecutor, LogLevel, type MemoryStep, type MemoryStrategy, type MessageRole, Model, type ModelConfig, OpenAIModel, type OpenAIModelConfig, type OpenAIToolDefinition, Orchestrator, type OrchestratorConfig, type OrchestratorEvent, type OutputFormat, type PromptVariables, ProxyTool, type ProxyToolConfig, ReadFileTool, type RunResult, type StreamEvent, type SystemPromptStep, type TaskStep, TerminalAgent, type TerminalAgentConfig, type Timing, type TokenUsage, Tool, type ToolCall, type ToolCallResult, type ToolInput, type ToolInputType, type ToolInputs, ToolUseAgent, type ToolUseAgentConfig, type ToolUsePromptVariables, UserInputTool, WriteFileTool, type YAMLAgentDefinition, YAMLLoader, type YAMLModelDefinition, type YAMLToolDefinition, type YAMLWorkflowDefinition, agentAsTool, createTool, finalAnswerTool, formatToolDescriptions, generateSystemPrompt, generateToolUseSystemPrompt, getErrorRecoveryPrompt, loadCustomTools, scanCustomTools };
|
package/dist/index.js
CHANGED
|
@@ -49,6 +49,7 @@ __export(index_exports, {
|
|
|
49
49
|
Orchestrator: () => Orchestrator,
|
|
50
50
|
ProxyTool: () => ProxyTool,
|
|
51
51
|
ReadFileTool: () => ReadFileTool,
|
|
52
|
+
TerminalAgent: () => TerminalAgent,
|
|
52
53
|
Tool: () => Tool,
|
|
53
54
|
ToolUseAgent: () => ToolUseAgent,
|
|
54
55
|
UserInputTool: () => UserInputTool,
|
|
@@ -2108,6 +2109,369 @@ Please try a different approach.`
|
|
|
2108
2109
|
}
|
|
2109
2110
|
};
|
|
2110
2111
|
|
|
2112
|
+
// src/agents/TerminalAgent.ts
|
|
2113
|
+
var import_child_process = require("child_process");
|
|
2114
|
+
|
|
2115
|
+
// src/prompts/terminalAgent.ts
|
|
2116
|
+
function generateTerminalAgentSystemPrompt(variables) {
|
|
2117
|
+
const { customInstructions, hasSubAgents, subAgentDescriptions } = variables;
|
|
2118
|
+
let delegationSection = "";
|
|
2119
|
+
if (hasSubAgents) {
|
|
2120
|
+
delegationSection = `
|
|
2121
|
+
## Delegation to Sub-Agents
|
|
2122
|
+
|
|
2123
|
+
You can delegate tasks to specialized sub-agents when terminal commands alone
|
|
2124
|
+
are not sufficient. Available sub-agents:
|
|
2125
|
+
|
|
2126
|
+
${subAgentDescriptions}
|
|
2127
|
+
|
|
2128
|
+
To delegate, call the sub-agent tool with a clear task description. Wait for
|
|
2129
|
+
its result before continuing. Sub-agents handle their own tool calls internally.
|
|
2130
|
+
`;
|
|
2131
|
+
}
|
|
2132
|
+
return `You are a terminal operations agent running on macOS. You accomplish tasks
|
|
2133
|
+
by reasoning about what shell commands to run and executing them one or more at
|
|
2134
|
+
a time.
|
|
2135
|
+
|
|
2136
|
+
## How You Work
|
|
2137
|
+
|
|
2138
|
+
You follow a ReAct (Reasoning + Acting) loop:
|
|
2139
|
+
1. **Think**: Analyze the task and decide what commands to run next.
|
|
2140
|
+
2. **Act**: Emit one or more shell commands inside a fenced code block.
|
|
2141
|
+
3. **Observe**: Review the command output (stdout, stderr, exit code) that the
|
|
2142
|
+
framework feeds back to you.
|
|
2143
|
+
4. Repeat until the task is complete, then signal your final answer (see below).
|
|
2144
|
+
|
|
2145
|
+
## Emitting Commands
|
|
2146
|
+
|
|
2147
|
+
Place your shell commands inside a fenced sh code block. You may include
|
|
2148
|
+
multiple commands separated by newlines. Each command is executed
|
|
2149
|
+
sequentially in its own shell invocation on the user's macOS terminal:
|
|
2150
|
+
|
|
2151
|
+
\`\`\`sh
|
|
2152
|
+
echo "hello"
|
|
2153
|
+
\`\`\`
|
|
2154
|
+
|
|
2155
|
+
\`\`\`sh
|
|
2156
|
+
ls -la ~/Documents
|
|
2157
|
+
pwd
|
|
2158
|
+
\`\`\`
|
|
2159
|
+
|
|
2160
|
+
**Important**: Each code block is treated as a batch. Commands within a single
|
|
2161
|
+
block run sequentially. The output of the entire block is returned as one
|
|
2162
|
+
observation. If you need to inspect intermediate output before proceeding,
|
|
2163
|
+
use separate blocks across steps.
|
|
2164
|
+
|
|
2165
|
+
## Rules
|
|
2166
|
+
|
|
2167
|
+
1. **Safety first**: Before running anything destructive (rm, mv on important
|
|
2168
|
+
files, format commands, etc.), explain what you are about to do in your
|
|
2169
|
+
reasoning so the user can read it during the 5-second delay and abort with
|
|
2170
|
+
Ctrl+C if needed.
|
|
2171
|
+
|
|
2172
|
+
2. **Signalling completion**: When the task is done, do NOT emit any more
|
|
2173
|
+
\`\`\`sh blocks. Instead, write your summary on a line that starts with
|
|
2174
|
+
the exact marker:
|
|
2175
|
+
|
|
2176
|
+
FINAL_ANSWER: <your summary here>
|
|
2177
|
+
|
|
2178
|
+
Everything after "FINAL_ANSWER: " (including multiple lines) is captured
|
|
2179
|
+
as your final answer. This is how you tell the framework you are finished.
|
|
2180
|
+
|
|
2181
|
+
3. **Handle errors**: If a command fails (non-zero exit code or stderr output),
|
|
2182
|
+
analyze what went wrong and try a corrective approach. Do not repeat the
|
|
2183
|
+
exact same command.
|
|
2184
|
+
|
|
2185
|
+
4. **Be explicit about paths**: Use absolute paths or \`cd\` explicitly. Do not
|
|
2186
|
+
assume the working directory persists between steps (it does not).
|
|
2187
|
+
|
|
2188
|
+
5. **macOS conventions**: Use macOS / BSD variants of commands (e.g. \`gstat\`
|
|
2189
|
+
may not exist; use \`stat\` with the right flags). Prefer Homebrew paths
|
|
2190
|
+
(\`/opt/homebrew/bin\`) for installed tools. Use \`sw_vers\` for OS info.
|
|
2191
|
+
|
|
2192
|
+
6. **Prefer streaming-friendly commands**: Avoid commands that buffer all output
|
|
2193
|
+
until completion. Prefer tools that print as they go.
|
|
2194
|
+
|
|
2195
|
+
7. **No interactive prompts**: Do not run commands that wait for user input
|
|
2196
|
+
(e.g. \`ssh\` without key auth, interactive installers). If a command would
|
|
2197
|
+
prompt, pass flags to make it non-interactive or use \`yes |\` piping.
|
|
2198
|
+
${delegationSection}
|
|
2199
|
+
${customInstructions ? `## Additional Instructions
|
|
2200
|
+
|
|
2201
|
+
${customInstructions}` : ""}
|
|
2202
|
+
|
|
2203
|
+
Begin. Think about the task, then emit your first shell command(s).`;
|
|
2204
|
+
}
|
|
2205
|
+
|
|
2206
|
+
// src/agents/TerminalAgent.ts
|
|
2207
|
+
var SH_BLOCK_REGEX = /```(?:sh|bash|shell|zsh)\n([\s\S]*?)```/g;
|
|
2208
|
+
var THOUGHT_REGEX2 = /(?:Thought|Reasoning|Think):\s*([\s\S]*?)(?=```|FINAL_ANSWER:|$)/i;
|
|
2209
|
+
var FINAL_ANSWER_MARKER = /^FINAL_ANSWER:\s*([\s\S]*)$/m;
|
|
2210
|
+
var NEWLINE = "\n";
|
|
2211
|
+
var TerminalAgent = class extends Agent {
|
|
2212
|
+
commandDelay;
|
|
2213
|
+
maxOutputLength;
|
|
2214
|
+
constructor(config) {
|
|
2215
|
+
super({ ...config, verboseLevel: 2 /* DEBUG */ });
|
|
2216
|
+
this.commandDelay = config.commandDelay ?? 5;
|
|
2217
|
+
this.maxOutputLength = config.maxOutputLength ?? 8e3;
|
|
2218
|
+
const keepTools = /* @__PURE__ */ new Map();
|
|
2219
|
+
for (const [name, tool] of this.tools) {
|
|
2220
|
+
if (tool.constructor.name === "AgentTool") {
|
|
2221
|
+
keepTools.set(name, tool);
|
|
2222
|
+
}
|
|
2223
|
+
}
|
|
2224
|
+
this.tools = keepTools;
|
|
2225
|
+
if (!this.tools.has("final_answer")) {
|
|
2226
|
+
this.tools.set("final_answer", new FinalAnswerTool());
|
|
2227
|
+
}
|
|
2228
|
+
}
|
|
2229
|
+
/**
|
|
2230
|
+
* Build the system prompt. Includes delegation info if sub-agents are present.
|
|
2231
|
+
*/
|
|
2232
|
+
initializeSystemPrompt() {
|
|
2233
|
+
const agentTools = Array.from(this.tools.values()).filter(
|
|
2234
|
+
(t) => t.constructor.name === "AgentTool"
|
|
2235
|
+
);
|
|
2236
|
+
const hasSubAgents = agentTools.length > 0;
|
|
2237
|
+
const subAgentDescriptions = agentTools.map((t) => `- **${t.name}**: ${t.description}`).join("\n");
|
|
2238
|
+
return generateTerminalAgentSystemPrompt({
|
|
2239
|
+
customInstructions: this.config.customInstructions,
|
|
2240
|
+
hasSubAgents,
|
|
2241
|
+
subAgentDescriptions
|
|
2242
|
+
});
|
|
2243
|
+
}
|
|
2244
|
+
/**
|
|
2245
|
+
* Execute one step of the ReAct loop:
|
|
2246
|
+
* 1. Send messages to LLM (with tool defs for final_answer / delegation)
|
|
2247
|
+
* 2. Extract reasoning and ```sh blocks from the response
|
|
2248
|
+
* 3. If tool calls present (final_answer or delegation), process them
|
|
2249
|
+
* 4. Otherwise execute shell commands with the pre-execution delay
|
|
2250
|
+
* 5. Feed stdout/stderr/exit-code back as observation
|
|
2251
|
+
*/
|
|
2252
|
+
async executeStep(memoryStep) {
|
|
2253
|
+
const messages = this.memory.toMessages();
|
|
2254
|
+
memoryStep.modelInputMessages = [...messages];
|
|
2255
|
+
const actionSteps = this.memory.getActionSteps();
|
|
2256
|
+
const prevStep = actionSteps.length >= 2 ? actionSteps[actionSteps.length - 2] : void 0;
|
|
2257
|
+
if (prevStep?.error) {
|
|
2258
|
+
messages.push({
|
|
2259
|
+
role: "user",
|
|
2260
|
+
content: `Your previous action encountered an error: ${prevStep.error.message}
|
|
2261
|
+
Please try a different approach.`
|
|
2262
|
+
});
|
|
2263
|
+
}
|
|
2264
|
+
const delegationTools = Array.from(this.tools.values()).filter((t) => t.constructor.name === "AgentTool");
|
|
2265
|
+
const toolDefinitions = delegationTools.map((t) => t.toOpenAITool());
|
|
2266
|
+
this.logger.subheader("Agent thinking...");
|
|
2267
|
+
const response = await this.generateResponse(messages, toolDefinitions);
|
|
2268
|
+
memoryStep.modelOutputMessage = response;
|
|
2269
|
+
memoryStep.tokenUsage = response.tokenUsage;
|
|
2270
|
+
const content = response.content ?? "";
|
|
2271
|
+
const thoughtMatch = content.match(THOUGHT_REGEX2);
|
|
2272
|
+
if (thoughtMatch) {
|
|
2273
|
+
this.logger.reasoning(thoughtMatch[1].trim());
|
|
2274
|
+
this.emitEvent("agent_thinking", { step: this.currentStep, content: thoughtMatch[1].trim() });
|
|
2275
|
+
}
|
|
2276
|
+
const finalMatch = content.match(FINAL_ANSWER_MARKER);
|
|
2277
|
+
if (finalMatch) {
|
|
2278
|
+
const answer = finalMatch[1].trim();
|
|
2279
|
+
this.logger.finalAnswer(answer);
|
|
2280
|
+
return { output: answer, isFinalAnswer: true };
|
|
2281
|
+
}
|
|
2282
|
+
if (response.toolCalls && response.toolCalls.length > 0) {
|
|
2283
|
+
memoryStep.toolCalls = response.toolCalls;
|
|
2284
|
+
const results = await this.processToolCalls(response.toolCalls);
|
|
2285
|
+
memoryStep.toolResults = results;
|
|
2286
|
+
for (const result of results) {
|
|
2287
|
+
if (result.error) {
|
|
2288
|
+
this.logger.error(`Tool ${result.toolName} failed: ${result.error}`);
|
|
2289
|
+
} else {
|
|
2290
|
+
const str = typeof result.result === "string" ? result.result : JSON.stringify(result.result, null, 2);
|
|
2291
|
+
this.logger.output(`[${result.toolName}]: ${str.slice(0, 500)}${str.length > 500 ? "..." : ""}`);
|
|
2292
|
+
}
|
|
2293
|
+
}
|
|
2294
|
+
const obs = results.map((r) => r.error ? `[${r.toolName}] Error: ${r.error}` : `[${r.toolName}] Result: ${typeof r.result === "string" ? r.result : JSON.stringify(r.result)}`).join("\n");
|
|
2295
|
+
memoryStep.observation = `Observation:
|
|
2296
|
+
${obs}`;
|
|
2297
|
+
return { output: null, isFinalAnswer: false };
|
|
2298
|
+
}
|
|
2299
|
+
const blocks = [];
|
|
2300
|
+
let match;
|
|
2301
|
+
const regex = new RegExp(SH_BLOCK_REGEX.source, "g");
|
|
2302
|
+
while ((match = regex.exec(content)) !== null) {
|
|
2303
|
+
blocks.push(match[1].trim());
|
|
2304
|
+
}
|
|
2305
|
+
if (blocks.length === 0) {
|
|
2306
|
+
this.logger.warn("No shell commands or tool calls in response.");
|
|
2307
|
+
memoryStep.observation = "No shell command block was found in your response. Emit commands inside a ```sh code block, or call final_answer if the task is complete.";
|
|
2308
|
+
return { output: null, isFinalAnswer: false };
|
|
2309
|
+
}
|
|
2310
|
+
const allCommands = blocks.join("\n---\n");
|
|
2311
|
+
this.logger.code(allCommands, "sh");
|
|
2312
|
+
this.emitEvent("agent_observation", {
|
|
2313
|
+
step: this.currentStep,
|
|
2314
|
+
observation: `Pending commands:
|
|
2315
|
+
${allCommands}`
|
|
2316
|
+
});
|
|
2317
|
+
this.logger.waiting(this.commandDelay);
|
|
2318
|
+
await this.sleep(this.commandDelay * 1e3);
|
|
2319
|
+
const observations = [];
|
|
2320
|
+
let hitError = false;
|
|
2321
|
+
for (let i = 0; i < blocks.length; i++) {
|
|
2322
|
+
const block = blocks[i];
|
|
2323
|
+
const commands = block.split(NEWLINE).filter((l) => l.trim() && !l.trim().startsWith("#"));
|
|
2324
|
+
this.logger.subheader(`Executing command block ${blocks.length > 1 ? `${i + 1}/${blocks.length}` : ""}...`);
|
|
2325
|
+
for (const cmd of commands) {
|
|
2326
|
+
this.logger.info(` $ ${cmd}`);
|
|
2327
|
+
const result = this.runCommand(cmd);
|
|
2328
|
+
if (result.stdout) {
|
|
2329
|
+
this.logger.logs(result.stdout);
|
|
2330
|
+
}
|
|
2331
|
+
if (result.stderr) {
|
|
2332
|
+
this.logger.error(`stderr: ${result.stderr}`);
|
|
2333
|
+
}
|
|
2334
|
+
observations.push(
|
|
2335
|
+
`$ ${cmd}
|
|
2336
|
+
` + (result.stdout ? `stdout:
|
|
2337
|
+
${result.stdout}` : "") + (result.stderr ? `stderr:
|
|
2338
|
+
${result.stderr}` : "") + `exit code: ${result.exitCode}`
|
|
2339
|
+
);
|
|
2340
|
+
if (result.exitCode !== 0) {
|
|
2341
|
+
hitError = true;
|
|
2342
|
+
break;
|
|
2343
|
+
}
|
|
2344
|
+
}
|
|
2345
|
+
if (hitError) break;
|
|
2346
|
+
}
|
|
2347
|
+
const observation = observations.join("\n\n");
|
|
2348
|
+
memoryStep.observation = `Observation:
|
|
2349
|
+
${observation}`;
|
|
2350
|
+
if (hitError) {
|
|
2351
|
+
memoryStep.error = new Error("Command exited with non-zero status. See observation for details.");
|
|
2352
|
+
}
|
|
2353
|
+
return { output: observation, isFinalAnswer: false };
|
|
2354
|
+
}
|
|
2355
|
+
/**
|
|
2356
|
+
* Run a single shell command, capture stdout/stderr, return structured result.
|
|
2357
|
+
*/
|
|
2358
|
+
runCommand(cmd) {
|
|
2359
|
+
try {
|
|
2360
|
+
const stdout = (0, import_child_process.execSync)(cmd, {
|
|
2361
|
+
encoding: "utf8",
|
|
2362
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
2363
|
+
shell: "/bin/zsh",
|
|
2364
|
+
timeout: 12e4,
|
|
2365
|
+
// 2-minute per-command timeout
|
|
2366
|
+
maxBuffer: 50 * 1024 * 1024
|
|
2367
|
+
// 50 MB
|
|
2368
|
+
});
|
|
2369
|
+
return { stdout: this.truncateOutput(stdout), stderr: "", exitCode: 0 };
|
|
2370
|
+
} catch (err) {
|
|
2371
|
+
const e = err;
|
|
2372
|
+
return {
|
|
2373
|
+
stdout: this.truncateOutput(e.stdout ?? ""),
|
|
2374
|
+
stderr: this.truncateOutput(e.stderr ?? e.message),
|
|
2375
|
+
exitCode: e.status ?? 1
|
|
2376
|
+
};
|
|
2377
|
+
}
|
|
2378
|
+
}
|
|
2379
|
+
/**
|
|
2380
|
+
* Truncate long output, preserving head and tail so context stays useful.
|
|
2381
|
+
*/
|
|
2382
|
+
truncateOutput(output) {
|
|
2383
|
+
if (output.length <= this.maxOutputLength) return output;
|
|
2384
|
+
const half = Math.floor(this.maxOutputLength / 2);
|
|
2385
|
+
const head = output.slice(0, half);
|
|
2386
|
+
const tail = output.slice(output.length - half);
|
|
2387
|
+
const omitted = output.length - this.maxOutputLength;
|
|
2388
|
+
return `${head}
|
|
2389
|
+
|
|
2390
|
+
... [${omitted} characters omitted] ...
|
|
2391
|
+
|
|
2392
|
+
${tail}`;
|
|
2393
|
+
}
|
|
2394
|
+
/**
|
|
2395
|
+
* Generate response, with streaming if available.
|
|
2396
|
+
*/
|
|
2397
|
+
async generateResponse(messages, toolDefinitions) {
|
|
2398
|
+
if (this.config.streamOutputs && this.model.supportsStreaming() && this.model.generateStream) {
|
|
2399
|
+
let fullContent = "";
|
|
2400
|
+
const generator = this.model.generateStream(messages, {
|
|
2401
|
+
toolDefinitions,
|
|
2402
|
+
maxTokens: this.config.maxTokens,
|
|
2403
|
+
temperature: this.config.temperature
|
|
2404
|
+
});
|
|
2405
|
+
for await (const chunk of generator) {
|
|
2406
|
+
this.logger.streamChar(chunk);
|
|
2407
|
+
fullContent += chunk;
|
|
2408
|
+
}
|
|
2409
|
+
this.logger.streamEnd();
|
|
2410
|
+
return { role: "assistant", content: fullContent };
|
|
2411
|
+
}
|
|
2412
|
+
return this.model.generate(messages, {
|
|
2413
|
+
toolDefinitions,
|
|
2414
|
+
maxTokens: this.config.maxTokens,
|
|
2415
|
+
temperature: this.config.temperature
|
|
2416
|
+
});
|
|
2417
|
+
}
|
|
2418
|
+
/**
|
|
2419
|
+
* Process tool calls (final_answer or AgentTool delegation).
|
|
2420
|
+
*/
|
|
2421
|
+
async processToolCalls(toolCalls) {
|
|
2422
|
+
const results = [];
|
|
2423
|
+
for (const tc of toolCalls) {
|
|
2424
|
+
const toolName = tc.function.name;
|
|
2425
|
+
const tool = this.tools.get(toolName);
|
|
2426
|
+
if (!tool) {
|
|
2427
|
+
results.push({ toolCallId: tc.id, toolName, result: null, error: `Unknown tool: ${toolName}` });
|
|
2428
|
+
continue;
|
|
2429
|
+
}
|
|
2430
|
+
let args;
|
|
2431
|
+
try {
|
|
2432
|
+
args = typeof tc.function.arguments === "string" ? JSON.parse(tc.function.arguments) : tc.function.arguments;
|
|
2433
|
+
} catch {
|
|
2434
|
+
results.push({ toolCallId: tc.id, toolName, result: null, error: "Failed to parse tool arguments" });
|
|
2435
|
+
continue;
|
|
2436
|
+
}
|
|
2437
|
+
this.logger.info(` Calling tool: ${toolName}(${JSON.stringify(args).slice(0, 100)}...)`);
|
|
2438
|
+
this.emitEvent("agent_tool_call", { step: this.currentStep, toolCallId: tc.id, toolName, arguments: args });
|
|
2439
|
+
try {
|
|
2440
|
+
const result = await tool.call(args);
|
|
2441
|
+
this.emitEvent("agent_tool_result", { step: this.currentStep, toolCallId: tc.id, toolName, result, duration: 0 });
|
|
2442
|
+
results.push({ toolCallId: tc.id, toolName, result });
|
|
2443
|
+
} catch (error) {
|
|
2444
|
+
const msg = `Tool execution error: ${error.message}`;
|
|
2445
|
+
this.emitEvent("agent_tool_result", { step: this.currentStep, toolCallId: tc.id, toolName, result: null, error: msg, duration: 0 });
|
|
2446
|
+
results.push({ toolCallId: tc.id, toolName, result: null, error: msg });
|
|
2447
|
+
}
|
|
2448
|
+
}
|
|
2449
|
+
return results;
|
|
2450
|
+
}
|
|
2451
|
+
/**
|
|
2452
|
+
* Override: force final answer via tool call format when max steps hit.
|
|
2453
|
+
*/
|
|
2454
|
+
async provideFinalAnswer(task) {
|
|
2455
|
+
this.logger.subheader("Generating final answer from accumulated context");
|
|
2456
|
+
const messages = this.memory.toMessages();
|
|
2457
|
+
messages.push({
|
|
2458
|
+
role: "user",
|
|
2459
|
+
content: `You have reached the maximum number of steps. Based on your work so far, provide the best answer for the task: "${task}". Call the final_answer tool with your response.`
|
|
2460
|
+
});
|
|
2461
|
+
const toolDefinitions = [new FinalAnswerTool().toOpenAITool()];
|
|
2462
|
+
const response = await this.model.generate(messages, { toolDefinitions, maxTokens: this.config.maxTokens, temperature: this.config.temperature });
|
|
2463
|
+
if (response.toolCalls && response.toolCalls.length > 0) {
|
|
2464
|
+
try {
|
|
2465
|
+
const args = typeof response.toolCalls[0].function.arguments === "string" ? JSON.parse(response.toolCalls[0].function.arguments) : response.toolCalls[0].function.arguments;
|
|
2466
|
+
return args.answer;
|
|
2467
|
+
} catch {
|
|
2468
|
+
return response.content;
|
|
2469
|
+
}
|
|
2470
|
+
}
|
|
2471
|
+
return response.content;
|
|
2472
|
+
}
|
|
2473
|
+
};
|
|
2474
|
+
|
|
2111
2475
|
// src/models/Model.ts
|
|
2112
2476
|
var Model = class {
|
|
2113
2477
|
/**
|
|
@@ -2847,11 +3211,11 @@ var ExaResearchTool = class extends Tool {
|
|
|
2847
3211
|
};
|
|
2848
3212
|
|
|
2849
3213
|
// src/tools/ProxyTool.ts
|
|
2850
|
-
var
|
|
3214
|
+
var import_child_process3 = require("child_process");
|
|
2851
3215
|
var path6 = __toESM(require("path"));
|
|
2852
3216
|
|
|
2853
3217
|
// src/utils/bunInstaller.ts
|
|
2854
|
-
var
|
|
3218
|
+
var import_child_process2 = require("child_process");
|
|
2855
3219
|
var path5 = __toESM(require("path"));
|
|
2856
3220
|
var fs5 = __toESM(require("fs"));
|
|
2857
3221
|
var os3 = __toESM(require("os"));
|
|
@@ -2872,7 +3236,7 @@ async function ensureBunAvailable() {
|
|
|
2872
3236
|
"\n[smol-js] Bun is required to run custom tools but was not found. Installing Bun automatically...\n"
|
|
2873
3237
|
);
|
|
2874
3238
|
try {
|
|
2875
|
-
(0,
|
|
3239
|
+
(0, import_child_process2.execSync)("curl --proto =https --tlsv1.2 -sSf https://bun.sh | bash", {
|
|
2876
3240
|
stdio: "inherit",
|
|
2877
3241
|
shell: "/bin/bash",
|
|
2878
3242
|
env: { ...process.env, HOME: os3.homedir() }
|
|
@@ -2897,7 +3261,7 @@ Details: ${err.message}`
|
|
|
2897
3261
|
function whichBun() {
|
|
2898
3262
|
try {
|
|
2899
3263
|
const cmd = process.platform === "win32" ? "where bun" : "which bun";
|
|
2900
|
-
const result = (0,
|
|
3264
|
+
const result = (0, import_child_process2.execSync)(cmd, { encoding: "utf8", stdio: "pipe" }).trim();
|
|
2901
3265
|
const first = result.split("\n")[0]?.trim();
|
|
2902
3266
|
if (first && fs5.existsSync(first)) return first;
|
|
2903
3267
|
return null;
|
|
@@ -2950,7 +3314,7 @@ var ProxyTool = class extends Tool {
|
|
|
2950
3314
|
}
|
|
2951
3315
|
const serializedArgs = JSON.stringify(args);
|
|
2952
3316
|
return new Promise((resolve7, reject) => {
|
|
2953
|
-
const child = (0,
|
|
3317
|
+
const child = (0, import_child_process3.spawn)(this.bunPath, ["run", this.harnessPath, this.toolPath, serializedArgs], {
|
|
2954
3318
|
stdio: ["pipe", "pipe", "pipe"],
|
|
2955
3319
|
env: { ...process.env }
|
|
2956
3320
|
});
|
|
@@ -3321,32 +3685,24 @@ var YAMLLoader = class {
|
|
|
3321
3685
|
}
|
|
3322
3686
|
}
|
|
3323
3687
|
const maxContextLength = definition.maxContextLength ?? globalMaxContextLength;
|
|
3688
|
+
const sharedConfig = {
|
|
3689
|
+
model,
|
|
3690
|
+
tools: agentTools,
|
|
3691
|
+
maxSteps: definition.maxSteps,
|
|
3692
|
+
customInstructions: definition.customInstructions,
|
|
3693
|
+
persistent: definition.persistent,
|
|
3694
|
+
maxContextLength,
|
|
3695
|
+
memoryStrategy: definition.memoryStrategy,
|
|
3696
|
+
maxTokens: definition.maxTokens,
|
|
3697
|
+
temperature: definition.temperature,
|
|
3698
|
+
name
|
|
3699
|
+
};
|
|
3324
3700
|
if (definition.type === "CodeAgent") {
|
|
3325
|
-
return new CodeAgent(
|
|
3326
|
-
|
|
3327
|
-
|
|
3328
|
-
maxSteps: definition.maxSteps,
|
|
3329
|
-
customInstructions: definition.customInstructions,
|
|
3330
|
-
persistent: definition.persistent,
|
|
3331
|
-
maxContextLength,
|
|
3332
|
-
memoryStrategy: definition.memoryStrategy,
|
|
3333
|
-
maxTokens: definition.maxTokens,
|
|
3334
|
-
temperature: definition.temperature,
|
|
3335
|
-
name
|
|
3336
|
-
});
|
|
3701
|
+
return new CodeAgent(sharedConfig);
|
|
3702
|
+
} else if (definition.type === "TerminalAgent") {
|
|
3703
|
+
return new TerminalAgent(sharedConfig);
|
|
3337
3704
|
} else {
|
|
3338
|
-
return new ToolUseAgent(
|
|
3339
|
-
model,
|
|
3340
|
-
tools: agentTools,
|
|
3341
|
-
maxSteps: definition.maxSteps,
|
|
3342
|
-
customInstructions: definition.customInstructions,
|
|
3343
|
-
persistent: definition.persistent,
|
|
3344
|
-
maxContextLength,
|
|
3345
|
-
memoryStrategy: definition.memoryStrategy,
|
|
3346
|
-
maxTokens: definition.maxTokens,
|
|
3347
|
-
temperature: definition.temperature,
|
|
3348
|
-
name
|
|
3349
|
-
});
|
|
3705
|
+
return new ToolUseAgent(sharedConfig);
|
|
3350
3706
|
}
|
|
3351
3707
|
}
|
|
3352
3708
|
};
|
|
@@ -3746,6 +4102,7 @@ var Orchestrator = class {
|
|
|
3746
4102
|
Orchestrator,
|
|
3747
4103
|
ProxyTool,
|
|
3748
4104
|
ReadFileTool,
|
|
4105
|
+
TerminalAgent,
|
|
3749
4106
|
Tool,
|
|
3750
4107
|
ToolUseAgent,
|
|
3751
4108
|
UserInputTool,
|