@samrahimi/smol-js 0.7.1 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -2021,6 +2021,369 @@ Please try a different approach.`
2021
2021
  }
2022
2022
  };
2023
2023
 
2024
+ // src/agents/TerminalAgent.ts
2025
+ var import_child_process = require("child_process");
2026
+
2027
+ // src/prompts/terminalAgent.ts
2028
+ function generateTerminalAgentSystemPrompt(variables) {
2029
+ const { customInstructions, hasSubAgents, subAgentDescriptions } = variables;
2030
+ let delegationSection = "";
2031
+ if (hasSubAgents) {
2032
+ delegationSection = `
2033
+ ## Delegation to Sub-Agents
2034
+
2035
+ You can delegate tasks to specialized sub-agents when terminal commands alone
2036
+ are not sufficient. Available sub-agents:
2037
+
2038
+ ${subAgentDescriptions}
2039
+
2040
+ To delegate, call the sub-agent tool with a clear task description. Wait for
2041
+ its result before continuing. Sub-agents handle their own tool calls internally.
2042
+ `;
2043
+ }
2044
+ return `You are a terminal operations agent running on macOS. You accomplish tasks
2045
+ by reasoning about what shell commands to run and executing them one or more at
2046
+ a time.
2047
+
2048
+ ## How You Work
2049
+
2050
+ You follow a ReAct (Reasoning + Acting) loop:
2051
+ 1. **Think**: Analyze the task and decide what commands to run next.
2052
+ 2. **Act**: Emit one or more shell commands inside a fenced code block.
2053
+ 3. **Observe**: Review the command output (stdout, stderr, exit code) that the
2054
+ framework feeds back to you.
2055
+ 4. Repeat until the task is complete, then signal your final answer (see below).
2056
+
2057
+ ## Emitting Commands
2058
+
2059
+ Place your shell commands inside a fenced sh code block. You may include
2060
+ multiple commands separated by newlines. Each command is executed
2061
+ sequentially in its own shell invocation on the user's macOS terminal:
2062
+
2063
+ \`\`\`sh
2064
+ echo "hello"
2065
+ \`\`\`
2066
+
2067
+ \`\`\`sh
2068
+ ls -la ~/Documents
2069
+ pwd
2070
+ \`\`\`
2071
+
2072
+ **Important**: Each code block is treated as a batch. Commands within a single
2073
+ block run sequentially. The output of the entire block is returned as one
2074
+ observation. If you need to inspect intermediate output before proceeding,
2075
+ use separate blocks across steps.
2076
+
2077
+ ## Rules
2078
+
2079
+ 1. **Safety first**: Before running anything destructive (rm, mv on important
2080
+ files, format commands, etc.), explain what you are about to do in your
2081
+ reasoning so the user can read it during the 5-second delay and abort with
2082
+ Ctrl+C if needed.
2083
+
2084
+ 2. **Signalling completion**: When the task is done, do NOT emit any more
2085
+ \`\`\`sh blocks. Instead, write your summary on a line that starts with
2086
+ the exact marker:
2087
+
2088
+ FINAL_ANSWER: <your summary here>
2089
+
2090
+ Everything after "FINAL_ANSWER: " (including multiple lines) is captured
2091
+ as your final answer. This is how you tell the framework you are finished.
2092
+
2093
+ 3. **Handle errors**: If a command fails (non-zero exit code or stderr output),
2094
+ analyze what went wrong and try a corrective approach. Do not repeat the
2095
+ exact same command.
2096
+
2097
+ 4. **Be explicit about paths**: Use absolute paths or \`cd\` explicitly. Do not
2098
+ assume the working directory persists between steps (it does not).
2099
+
2100
+ 5. **macOS conventions**: Use macOS / BSD variants of commands (e.g. \`gstat\`
2101
+ may not exist; use \`stat\` with the right flags). Prefer Homebrew paths
2102
+ (\`/opt/homebrew/bin\`) for installed tools. Use \`sw_vers\` for OS info.
2103
+
2104
+ 6. **Prefer streaming-friendly commands**: Avoid commands that buffer all output
2105
+ until completion. Prefer tools that print as they go.
2106
+
2107
+ 7. **No interactive prompts**: Do not run commands that wait for user input
2108
+ (e.g. \`ssh\` without key auth, interactive installers). If a command would
2109
+ prompt, pass flags to make it non-interactive or use \`yes |\` piping.
2110
+ ${delegationSection}
2111
+ ${customInstructions ? `## Additional Instructions
2112
+
2113
+ ${customInstructions}` : ""}
2114
+
2115
+ Begin. Think about the task, then emit your first shell command(s).`;
2116
+ }
2117
+
2118
+ // src/agents/TerminalAgent.ts
2119
+ var SH_BLOCK_REGEX = /```(?:sh|bash|shell|zsh)\n([\s\S]*?)```/g;
2120
+ var THOUGHT_REGEX2 = /(?:Thought|Reasoning|Think):\s*([\s\S]*?)(?=```|FINAL_ANSWER:|$)/i;
2121
+ var FINAL_ANSWER_MARKER = /^FINAL_ANSWER:\s*([\s\S]*)$/m;
2122
+ var NEWLINE = "\n";
2123
+ var TerminalAgent = class extends Agent {
2124
+ commandDelay;
2125
+ maxOutputLength;
2126
+ constructor(config) {
2127
+ super({ ...config, verboseLevel: 2 /* DEBUG */ });
2128
+ this.commandDelay = config.commandDelay ?? 5;
2129
+ this.maxOutputLength = config.maxOutputLength ?? 8e3;
2130
+ const keepTools = /* @__PURE__ */ new Map();
2131
+ for (const [name, tool] of this.tools) {
2132
+ if (tool.constructor.name === "AgentTool") {
2133
+ keepTools.set(name, tool);
2134
+ }
2135
+ }
2136
+ this.tools = keepTools;
2137
+ if (!this.tools.has("final_answer")) {
2138
+ this.tools.set("final_answer", new FinalAnswerTool());
2139
+ }
2140
+ }
2141
+ /**
2142
+ * Build the system prompt. Includes delegation info if sub-agents are present.
2143
+ */
2144
+ initializeSystemPrompt() {
2145
+ const agentTools = Array.from(this.tools.values()).filter(
2146
+ (t) => t.constructor.name === "AgentTool"
2147
+ );
2148
+ const hasSubAgents = agentTools.length > 0;
2149
+ const subAgentDescriptions = agentTools.map((t) => `- **${t.name}**: ${t.description}`).join("\n");
2150
+ return generateTerminalAgentSystemPrompt({
2151
+ customInstructions: this.config.customInstructions,
2152
+ hasSubAgents,
2153
+ subAgentDescriptions
2154
+ });
2155
+ }
2156
+ /**
2157
+ * Execute one step of the ReAct loop:
2158
+ * 1. Send messages to LLM (with tool defs for final_answer / delegation)
2159
+ * 2. Extract reasoning and ```sh blocks from the response
2160
+ * 3. If tool calls present (final_answer or delegation), process them
2161
+ * 4. Otherwise execute shell commands with the pre-execution delay
2162
+ * 5. Feed stdout/stderr/exit-code back as observation
2163
+ */
2164
+ async executeStep(memoryStep) {
2165
+ const messages = this.memory.toMessages();
2166
+ memoryStep.modelInputMessages = [...messages];
2167
+ const actionSteps = this.memory.getActionSteps();
2168
+ const prevStep = actionSteps.length >= 2 ? actionSteps[actionSteps.length - 2] : void 0;
2169
+ if (prevStep?.error) {
2170
+ messages.push({
2171
+ role: "user",
2172
+ content: `Your previous action encountered an error: ${prevStep.error.message}
2173
+ Please try a different approach.`
2174
+ });
2175
+ }
2176
+ const delegationTools = Array.from(this.tools.values()).filter((t) => t.constructor.name === "AgentTool");
2177
+ const toolDefinitions = delegationTools.map((t) => t.toOpenAITool());
2178
+ this.logger.subheader("Agent thinking...");
2179
+ const response = await this.generateResponse(messages, toolDefinitions);
2180
+ memoryStep.modelOutputMessage = response;
2181
+ memoryStep.tokenUsage = response.tokenUsage;
2182
+ const content = response.content ?? "";
2183
+ const thoughtMatch = content.match(THOUGHT_REGEX2);
2184
+ if (thoughtMatch) {
2185
+ this.logger.reasoning(thoughtMatch[1].trim());
2186
+ this.emitEvent("agent_thinking", { step: this.currentStep, content: thoughtMatch[1].trim() });
2187
+ }
2188
+ const finalMatch = content.match(FINAL_ANSWER_MARKER);
2189
+ if (finalMatch) {
2190
+ const answer = finalMatch[1].trim();
2191
+ this.logger.finalAnswer(answer);
2192
+ return { output: answer, isFinalAnswer: true };
2193
+ }
2194
+ if (response.toolCalls && response.toolCalls.length > 0) {
2195
+ memoryStep.toolCalls = response.toolCalls;
2196
+ const results = await this.processToolCalls(response.toolCalls);
2197
+ memoryStep.toolResults = results;
2198
+ for (const result of results) {
2199
+ if (result.error) {
2200
+ this.logger.error(`Tool ${result.toolName} failed: ${result.error}`);
2201
+ } else {
2202
+ const str = typeof result.result === "string" ? result.result : JSON.stringify(result.result, null, 2);
2203
+ this.logger.output(`[${result.toolName}]: ${str.slice(0, 500)}${str.length > 500 ? "..." : ""}`);
2204
+ }
2205
+ }
2206
+ const obs = results.map((r) => r.error ? `[${r.toolName}] Error: ${r.error}` : `[${r.toolName}] Result: ${typeof r.result === "string" ? r.result : JSON.stringify(r.result)}`).join("\n");
2207
+ memoryStep.observation = `Observation:
2208
+ ${obs}`;
2209
+ return { output: null, isFinalAnswer: false };
2210
+ }
2211
+ const blocks = [];
2212
+ let match;
2213
+ const regex = new RegExp(SH_BLOCK_REGEX.source, "g");
2214
+ while ((match = regex.exec(content)) !== null) {
2215
+ blocks.push(match[1].trim());
2216
+ }
2217
+ if (blocks.length === 0) {
2218
+ this.logger.warn("No shell commands or tool calls in response.");
2219
+ memoryStep.observation = "No shell command block was found in your response. Emit commands inside a ```sh code block, or call final_answer if the task is complete.";
2220
+ return { output: null, isFinalAnswer: false };
2221
+ }
2222
+ const allCommands = blocks.join("\n---\n");
2223
+ this.logger.code(allCommands, "sh");
2224
+ this.emitEvent("agent_observation", {
2225
+ step: this.currentStep,
2226
+ observation: `Pending commands:
2227
+ ${allCommands}`
2228
+ });
2229
+ this.logger.waiting(this.commandDelay);
2230
+ await this.sleep(this.commandDelay * 1e3);
2231
+ const observations = [];
2232
+ let hitError = false;
2233
+ for (let i = 0; i < blocks.length; i++) {
2234
+ const block = blocks[i];
2235
+ const commands = block.split(NEWLINE).filter((l) => l.trim() && !l.trim().startsWith("#"));
2236
+ this.logger.subheader(`Executing command block ${blocks.length > 1 ? `${i + 1}/${blocks.length}` : ""}...`);
2237
+ for (const cmd of commands) {
2238
+ this.logger.info(` $ ${cmd}`);
2239
+ const result = this.runCommand(cmd);
2240
+ if (result.stdout) {
2241
+ this.logger.logs(result.stdout);
2242
+ }
2243
+ if (result.stderr) {
2244
+ this.logger.error(`stderr: ${result.stderr}`);
2245
+ }
2246
+ observations.push(
2247
+ `$ ${cmd}
2248
+ ` + (result.stdout ? `stdout:
2249
+ ${result.stdout}` : "") + (result.stderr ? `stderr:
2250
+ ${result.stderr}` : "") + `exit code: ${result.exitCode}`
2251
+ );
2252
+ if (result.exitCode !== 0) {
2253
+ hitError = true;
2254
+ break;
2255
+ }
2256
+ }
2257
+ if (hitError) break;
2258
+ }
2259
+ const observation = observations.join("\n\n");
2260
+ memoryStep.observation = `Observation:
2261
+ ${observation}`;
2262
+ if (hitError) {
2263
+ memoryStep.error = new Error("Command exited with non-zero status. See observation for details.");
2264
+ }
2265
+ return { output: observation, isFinalAnswer: false };
2266
+ }
2267
+ /**
2268
+ * Run a single shell command, capture stdout/stderr, return structured result.
2269
+ */
2270
+ runCommand(cmd) {
2271
+ try {
2272
+ const stdout = (0, import_child_process.execSync)(cmd, {
2273
+ encoding: "utf8",
2274
+ stdio: ["pipe", "pipe", "pipe"],
2275
+ shell: "/bin/zsh",
2276
+ timeout: 12e4,
2277
+ // 2-minute per-command timeout
2278
+ maxBuffer: 50 * 1024 * 1024
2279
+ // 50 MB
2280
+ });
2281
+ return { stdout: this.truncateOutput(stdout), stderr: "", exitCode: 0 };
2282
+ } catch (err) {
2283
+ const e = err;
2284
+ return {
2285
+ stdout: this.truncateOutput(e.stdout ?? ""),
2286
+ stderr: this.truncateOutput(e.stderr ?? e.message),
2287
+ exitCode: e.status ?? 1
2288
+ };
2289
+ }
2290
+ }
2291
+ /**
2292
+ * Truncate long output, preserving head and tail so context stays useful.
2293
+ */
2294
+ truncateOutput(output) {
2295
+ if (output.length <= this.maxOutputLength) return output;
2296
+ const half = Math.floor(this.maxOutputLength / 2);
2297
+ const head = output.slice(0, half);
2298
+ const tail = output.slice(output.length - half);
2299
+ const omitted = output.length - this.maxOutputLength;
2300
+ return `${head}
2301
+
2302
+ ... [${omitted} characters omitted] ...
2303
+
2304
+ ${tail}`;
2305
+ }
2306
+ /**
2307
+ * Generate response, with streaming if available.
2308
+ */
2309
+ async generateResponse(messages, toolDefinitions) {
2310
+ if (this.config.streamOutputs && this.model.supportsStreaming() && this.model.generateStream) {
2311
+ let fullContent = "";
2312
+ const generator = this.model.generateStream(messages, {
2313
+ toolDefinitions,
2314
+ maxTokens: this.config.maxTokens,
2315
+ temperature: this.config.temperature
2316
+ });
2317
+ for await (const chunk of generator) {
2318
+ this.logger.streamChar(chunk);
2319
+ fullContent += chunk;
2320
+ }
2321
+ this.logger.streamEnd();
2322
+ return { role: "assistant", content: fullContent };
2323
+ }
2324
+ return this.model.generate(messages, {
2325
+ toolDefinitions,
2326
+ maxTokens: this.config.maxTokens,
2327
+ temperature: this.config.temperature
2328
+ });
2329
+ }
2330
+ /**
2331
+ * Process tool calls (final_answer or AgentTool delegation).
2332
+ */
2333
+ async processToolCalls(toolCalls) {
2334
+ const results = [];
2335
+ for (const tc of toolCalls) {
2336
+ const toolName = tc.function.name;
2337
+ const tool = this.tools.get(toolName);
2338
+ if (!tool) {
2339
+ results.push({ toolCallId: tc.id, toolName, result: null, error: `Unknown tool: ${toolName}` });
2340
+ continue;
2341
+ }
2342
+ let args;
2343
+ try {
2344
+ args = typeof tc.function.arguments === "string" ? JSON.parse(tc.function.arguments) : tc.function.arguments;
2345
+ } catch {
2346
+ results.push({ toolCallId: tc.id, toolName, result: null, error: "Failed to parse tool arguments" });
2347
+ continue;
2348
+ }
2349
+ this.logger.info(` Calling tool: ${toolName}(${JSON.stringify(args).slice(0, 100)}...)`);
2350
+ this.emitEvent("agent_tool_call", { step: this.currentStep, toolCallId: tc.id, toolName, arguments: args });
2351
+ try {
2352
+ const result = await tool.call(args);
2353
+ this.emitEvent("agent_tool_result", { step: this.currentStep, toolCallId: tc.id, toolName, result, duration: 0 });
2354
+ results.push({ toolCallId: tc.id, toolName, result });
2355
+ } catch (error) {
2356
+ const msg = `Tool execution error: ${error.message}`;
2357
+ this.emitEvent("agent_tool_result", { step: this.currentStep, toolCallId: tc.id, toolName, result: null, error: msg, duration: 0 });
2358
+ results.push({ toolCallId: tc.id, toolName, result: null, error: msg });
2359
+ }
2360
+ }
2361
+ return results;
2362
+ }
2363
+ /**
2364
+ * Override: force final answer via tool call format when max steps hit.
2365
+ */
2366
+ async provideFinalAnswer(task) {
2367
+ this.logger.subheader("Generating final answer from accumulated context");
2368
+ const messages = this.memory.toMessages();
2369
+ messages.push({
2370
+ role: "user",
2371
+ content: `You have reached the maximum number of steps. Based on your work so far, provide the best answer for the task: "${task}". Call the final_answer tool with your response.`
2372
+ });
2373
+ const toolDefinitions = [new FinalAnswerTool().toOpenAITool()];
2374
+ const response = await this.model.generate(messages, { toolDefinitions, maxTokens: this.config.maxTokens, temperature: this.config.temperature });
2375
+ if (response.toolCalls && response.toolCalls.length > 0) {
2376
+ try {
2377
+ const args = typeof response.toolCalls[0].function.arguments === "string" ? JSON.parse(response.toolCalls[0].function.arguments) : response.toolCalls[0].function.arguments;
2378
+ return args.answer;
2379
+ } catch {
2380
+ return response.content;
2381
+ }
2382
+ }
2383
+ return response.content;
2384
+ }
2385
+ };
2386
+
2024
2387
  // src/tools/AgentTool.ts
2025
2388
  var AgentTool = class extends Tool {
2026
2389
  name;
@@ -2933,32 +3296,24 @@ var YAMLLoader = class {
2933
3296
  }
2934
3297
  }
2935
3298
  const maxContextLength = definition.maxContextLength ?? globalMaxContextLength;
3299
+ const sharedConfig = {
3300
+ model,
3301
+ tools: agentTools,
3302
+ maxSteps: definition.maxSteps,
3303
+ customInstructions: definition.customInstructions,
3304
+ persistent: definition.persistent,
3305
+ maxContextLength,
3306
+ memoryStrategy: definition.memoryStrategy,
3307
+ maxTokens: definition.maxTokens,
3308
+ temperature: definition.temperature,
3309
+ name
3310
+ };
2936
3311
  if (definition.type === "CodeAgent") {
2937
- return new CodeAgent({
2938
- model,
2939
- tools: agentTools,
2940
- maxSteps: definition.maxSteps,
2941
- customInstructions: definition.customInstructions,
2942
- persistent: definition.persistent,
2943
- maxContextLength,
2944
- memoryStrategy: definition.memoryStrategy,
2945
- maxTokens: definition.maxTokens,
2946
- temperature: definition.temperature,
2947
- name
2948
- });
3312
+ return new CodeAgent(sharedConfig);
3313
+ } else if (definition.type === "TerminalAgent") {
3314
+ return new TerminalAgent(sharedConfig);
2949
3315
  } else {
2950
- return new ToolUseAgent({
2951
- model,
2952
- tools: agentTools,
2953
- maxSteps: definition.maxSteps,
2954
- customInstructions: definition.customInstructions,
2955
- persistent: definition.persistent,
2956
- maxContextLength,
2957
- memoryStrategy: definition.memoryStrategy,
2958
- maxTokens: definition.maxTokens,
2959
- temperature: definition.temperature,
2960
- name
2961
- });
3316
+ return new ToolUseAgent(sharedConfig);
2962
3317
  }
2963
3318
  }
2964
3319
  };
@@ -3340,11 +3695,11 @@ var fs7 = __toESM(require("fs"));
3340
3695
  var path8 = __toESM(require("path"));
3341
3696
 
3342
3697
  // src/tools/ProxyTool.ts
3343
- var import_child_process2 = require("child_process");
3698
+ var import_child_process3 = require("child_process");
3344
3699
  var path7 = __toESM(require("path"));
3345
3700
 
3346
3701
  // src/utils/bunInstaller.ts
3347
- var import_child_process = require("child_process");
3702
+ var import_child_process2 = require("child_process");
3348
3703
  var path6 = __toESM(require("path"));
3349
3704
  var fs6 = __toESM(require("fs"));
3350
3705
  var os3 = __toESM(require("os"));
@@ -3365,7 +3720,7 @@ async function ensureBunAvailable() {
3365
3720
  "\n[smol-js] Bun is required to run custom tools but was not found. Installing Bun automatically...\n"
3366
3721
  );
3367
3722
  try {
3368
- (0, import_child_process.execSync)("curl --proto =https --tlsv1.2 -sSf https://bun.sh | bash", {
3723
+ (0, import_child_process2.execSync)("curl --proto =https --tlsv1.2 -sSf https://bun.sh | bash", {
3369
3724
  stdio: "inherit",
3370
3725
  shell: "/bin/bash",
3371
3726
  env: { ...process.env, HOME: os3.homedir() }
@@ -3390,7 +3745,7 @@ Details: ${err.message}`
3390
3745
  function whichBun() {
3391
3746
  try {
3392
3747
  const cmd = process.platform === "win32" ? "where bun" : "which bun";
3393
- const result = (0, import_child_process.execSync)(cmd, { encoding: "utf8", stdio: "pipe" }).trim();
3748
+ const result = (0, import_child_process2.execSync)(cmd, { encoding: "utf8", stdio: "pipe" }).trim();
3394
3749
  const first = result.split("\n")[0]?.trim();
3395
3750
  if (first && fs6.existsSync(first)) return first;
3396
3751
  return null;
@@ -3443,7 +3798,7 @@ var ProxyTool = class extends Tool {
3443
3798
  }
3444
3799
  const serializedArgs = JSON.stringify(args);
3445
3800
  return new Promise((resolve8, reject) => {
3446
- const child = (0, import_child_process2.spawn)(this.bunPath, ["run", this.harnessPath, this.toolPath, serializedArgs], {
3801
+ const child = (0, import_child_process3.spawn)(this.bunPath, ["run", this.harnessPath, this.toolPath, serializedArgs], {
3447
3802
  stdio: ["pipe", "pipe", "pipe"],
3448
3803
  env: { ...process.env }
3449
3804
  });