@samrahimi/smol-js 0.7.1 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -2040,6 +2040,369 @@ Please try a different approach.`
2040
2040
  }
2041
2041
  };
2042
2042
 
2043
+ // src/agents/TerminalAgent.ts
2044
+ import { execSync } from "child_process";
2045
+
2046
+ // src/prompts/terminalAgent.ts
2047
+ function generateTerminalAgentSystemPrompt(variables) {
2048
+ const { customInstructions, hasSubAgents, subAgentDescriptions } = variables;
2049
+ let delegationSection = "";
2050
+ if (hasSubAgents) {
2051
+ delegationSection = `
2052
+ ## Delegation to Sub-Agents
2053
+
2054
+ You can delegate tasks to specialized sub-agents when terminal commands alone
2055
+ are not sufficient. Available sub-agents:
2056
+
2057
+ ${subAgentDescriptions}
2058
+
2059
+ To delegate, call the sub-agent tool with a clear task description. Wait for
2060
+ its result before continuing. Sub-agents handle their own tool calls internally.
2061
+ `;
2062
+ }
2063
+ return `You are a terminal operations agent running on macOS. You accomplish tasks
2064
+ by reasoning about what shell commands to run and executing them one or more at
2065
+ a time.
2066
+
2067
+ ## How You Work
2068
+
2069
+ You follow a ReAct (Reasoning + Acting) loop:
2070
+ 1. **Think**: Analyze the task and decide what commands to run next.
2071
+ 2. **Act**: Emit one or more shell commands inside a fenced code block.
2072
+ 3. **Observe**: Review the command output (stdout, stderr, exit code) that the
2073
+ framework feeds back to you.
2074
+ 4. Repeat until the task is complete, then signal your final answer (see below).
2075
+
2076
+ ## Emitting Commands
2077
+
2078
+ Place your shell commands inside a fenced sh code block. You may include
2079
+ multiple commands separated by newlines. Each command is executed
2080
+ sequentially in its own shell invocation on the user's macOS terminal:
2081
+
2082
+ \`\`\`sh
2083
+ echo "hello"
2084
+ \`\`\`
2085
+
2086
+ \`\`\`sh
2087
+ ls -la ~/Documents
2088
+ pwd
2089
+ \`\`\`
2090
+
2091
+ **Important**: Each code block is treated as a batch. Commands within a single
2092
+ block run sequentially. The output of the entire block is returned as one
2093
+ observation. If you need to inspect intermediate output before proceeding,
2094
+ use separate blocks across steps.
2095
+
2096
+ ## Rules
2097
+
2098
+ 1. **Safety first**: Before running anything destructive (rm, mv on important
2099
+ files, format commands, etc.), explain what you are about to do in your
2100
+ reasoning so the user can read it during the 5-second delay and abort with
2101
+ Ctrl+C if needed.
2102
+
2103
+ 2. **Signalling completion**: When the task is done, do NOT emit any more
2104
+ \`\`\`sh blocks. Instead, write your summary on a line that starts with
2105
+ the exact marker:
2106
+
2107
+ FINAL_ANSWER: <your summary here>
2108
+
2109
+ Everything after "FINAL_ANSWER: " (including multiple lines) is captured
2110
+ as your final answer. This is how you tell the framework you are finished.
2111
+
2112
+ 3. **Handle errors**: If a command fails (non-zero exit code or stderr output),
2113
+ analyze what went wrong and try a corrective approach. Do not repeat the
2114
+ exact same command.
2115
+
2116
+ 4. **Be explicit about paths**: Use absolute paths or \`cd\` explicitly. Do not
2117
+ assume the working directory persists between steps (it does not).
2118
+
2119
+ 5. **macOS conventions**: Use macOS / BSD variants of commands (e.g. \`gstat\`
2120
+ may not exist; use \`stat\` with the right flags). Prefer Homebrew paths
2121
+ (\`/opt/homebrew/bin\`) for installed tools. Use \`sw_vers\` for OS info.
2122
+
2123
+ 6. **Prefer streaming-friendly commands**: Avoid commands that buffer all output
2124
+ until completion. Prefer tools that print as they go.
2125
+
2126
+ 7. **No interactive prompts**: Do not run commands that wait for user input
2127
+ (e.g. \`ssh\` without key auth, interactive installers). If a command would
2128
+ prompt, pass flags to make it non-interactive or use \`yes |\` piping.
2129
+ ${delegationSection}
2130
+ ${customInstructions ? `## Additional Instructions
2131
+
2132
+ ${customInstructions}` : ""}
2133
+
2134
+ Begin. Think about the task, then emit your first shell command(s).`;
2135
+ }
2136
+
2137
+ // src/agents/TerminalAgent.ts
2138
+ var SH_BLOCK_REGEX = /```(?:sh|bash|shell|zsh)\n([\s\S]*?)```/g;
2139
+ var THOUGHT_REGEX2 = /(?:Thought|Reasoning|Think):\s*([\s\S]*?)(?=```|FINAL_ANSWER:|$)/i;
2140
+ var FINAL_ANSWER_MARKER = /^FINAL_ANSWER:\s*([\s\S]*)$/m;
2141
+ var NEWLINE = "\n";
2142
+ var TerminalAgent = class extends Agent {
2143
+ commandDelay;
2144
+ maxOutputLength;
2145
+ constructor(config) {
2146
+ super({ ...config, verboseLevel: 2 /* DEBUG */ });
2147
+ this.commandDelay = config.commandDelay ?? 5;
2148
+ this.maxOutputLength = config.maxOutputLength ?? 8e3;
2149
+ const keepTools = /* @__PURE__ */ new Map();
2150
+ for (const [name, tool] of this.tools) {
2151
+ if (tool.constructor.name === "AgentTool") {
2152
+ keepTools.set(name, tool);
2153
+ }
2154
+ }
2155
+ this.tools = keepTools;
2156
+ if (!this.tools.has("final_answer")) {
2157
+ this.tools.set("final_answer", new FinalAnswerTool());
2158
+ }
2159
+ }
2160
+ /**
2161
+ * Build the system prompt. Includes delegation info if sub-agents are present.
2162
+ */
2163
+ initializeSystemPrompt() {
2164
+ const agentTools = Array.from(this.tools.values()).filter(
2165
+ (t) => t.constructor.name === "AgentTool"
2166
+ );
2167
+ const hasSubAgents = agentTools.length > 0;
2168
+ const subAgentDescriptions = agentTools.map((t) => `- **${t.name}**: ${t.description}`).join("\n");
2169
+ return generateTerminalAgentSystemPrompt({
2170
+ customInstructions: this.config.customInstructions,
2171
+ hasSubAgents,
2172
+ subAgentDescriptions
2173
+ });
2174
+ }
2175
+ /**
2176
+ * Execute one step of the ReAct loop:
2177
+ * 1. Send messages to LLM (with tool defs for final_answer / delegation)
2178
+ * 2. Extract reasoning and ```sh blocks from the response
2179
+ * 3. If tool calls present (final_answer or delegation), process them
2180
+ * 4. Otherwise execute shell commands with the pre-execution delay
2181
+ * 5. Feed stdout/stderr/exit-code back as observation
2182
+ */
2183
+ async executeStep(memoryStep) {
2184
+ const messages = this.memory.toMessages();
2185
+ memoryStep.modelInputMessages = [...messages];
2186
+ const actionSteps = this.memory.getActionSteps();
2187
+ const prevStep = actionSteps.length >= 2 ? actionSteps[actionSteps.length - 2] : void 0;
2188
+ if (prevStep?.error) {
2189
+ messages.push({
2190
+ role: "user",
2191
+ content: `Your previous action encountered an error: ${prevStep.error.message}
2192
+ Please try a different approach.`
2193
+ });
2194
+ }
2195
+ const delegationTools = Array.from(this.tools.values()).filter((t) => t.constructor.name === "AgentTool");
2196
+ const toolDefinitions = delegationTools.map((t) => t.toOpenAITool());
2197
+ this.logger.subheader("Agent thinking...");
2198
+ const response = await this.generateResponse(messages, toolDefinitions);
2199
+ memoryStep.modelOutputMessage = response;
2200
+ memoryStep.tokenUsage = response.tokenUsage;
2201
+ const content = response.content ?? "";
2202
+ const thoughtMatch = content.match(THOUGHT_REGEX2);
2203
+ if (thoughtMatch) {
2204
+ this.logger.reasoning(thoughtMatch[1].trim());
2205
+ this.emitEvent("agent_thinking", { step: this.currentStep, content: thoughtMatch[1].trim() });
2206
+ }
2207
+ const finalMatch = content.match(FINAL_ANSWER_MARKER);
2208
+ if (finalMatch) {
2209
+ const answer = finalMatch[1].trim();
2210
+ this.logger.finalAnswer(answer);
2211
+ return { output: answer, isFinalAnswer: true };
2212
+ }
2213
+ if (response.toolCalls && response.toolCalls.length > 0) {
2214
+ memoryStep.toolCalls = response.toolCalls;
2215
+ const results = await this.processToolCalls(response.toolCalls);
2216
+ memoryStep.toolResults = results;
2217
+ for (const result of results) {
2218
+ if (result.error) {
2219
+ this.logger.error(`Tool ${result.toolName} failed: ${result.error}`);
2220
+ } else {
2221
+ const str = typeof result.result === "string" ? result.result : JSON.stringify(result.result, null, 2);
2222
+ this.logger.output(`[${result.toolName}]: ${str.slice(0, 500)}${str.length > 500 ? "..." : ""}`);
2223
+ }
2224
+ }
2225
+ const obs = results.map((r) => r.error ? `[${r.toolName}] Error: ${r.error}` : `[${r.toolName}] Result: ${typeof r.result === "string" ? r.result : JSON.stringify(r.result)}`).join("\n");
2226
+ memoryStep.observation = `Observation:
2227
+ ${obs}`;
2228
+ return { output: null, isFinalAnswer: false };
2229
+ }
2230
+ const blocks = [];
2231
+ let match;
2232
+ const regex = new RegExp(SH_BLOCK_REGEX.source, "g");
2233
+ while ((match = regex.exec(content)) !== null) {
2234
+ blocks.push(match[1].trim());
2235
+ }
2236
+ if (blocks.length === 0) {
2237
+ this.logger.warn("No shell commands or tool calls in response.");
2238
+ memoryStep.observation = "No shell command block was found in your response. Emit commands inside a ```sh code block, or call final_answer if the task is complete.";
2239
+ return { output: null, isFinalAnswer: false };
2240
+ }
2241
+ const allCommands = blocks.join("\n---\n");
2242
+ this.logger.code(allCommands, "sh");
2243
+ this.emitEvent("agent_observation", {
2244
+ step: this.currentStep,
2245
+ observation: `Pending commands:
2246
+ ${allCommands}`
2247
+ });
2248
+ this.logger.waiting(this.commandDelay);
2249
+ await this.sleep(this.commandDelay * 1e3);
2250
+ const observations = [];
2251
+ let hitError = false;
2252
+ for (let i = 0; i < blocks.length; i++) {
2253
+ const block = blocks[i];
2254
+ const commands = block.split(NEWLINE).filter((l) => l.trim() && !l.trim().startsWith("#"));
2255
+ this.logger.subheader(`Executing command block ${blocks.length > 1 ? `${i + 1}/${blocks.length}` : ""}...`);
2256
+ for (const cmd of commands) {
2257
+ this.logger.info(` $ ${cmd}`);
2258
+ const result = this.runCommand(cmd);
2259
+ if (result.stdout) {
2260
+ this.logger.logs(result.stdout);
2261
+ }
2262
+ if (result.stderr) {
2263
+ this.logger.error(`stderr: ${result.stderr}`);
2264
+ }
2265
+ observations.push(
2266
+ `$ ${cmd}
2267
+ ` + (result.stdout ? `stdout:
2268
+ ${result.stdout}` : "") + (result.stderr ? `stderr:
2269
+ ${result.stderr}` : "") + `exit code: ${result.exitCode}`
2270
+ );
2271
+ if (result.exitCode !== 0) {
2272
+ hitError = true;
2273
+ break;
2274
+ }
2275
+ }
2276
+ if (hitError) break;
2277
+ }
2278
+ const observation = observations.join("\n\n");
2279
+ memoryStep.observation = `Observation:
2280
+ ${observation}`;
2281
+ if (hitError) {
2282
+ memoryStep.error = new Error("Command exited with non-zero status. See observation for details.");
2283
+ }
2284
+ return { output: observation, isFinalAnswer: false };
2285
+ }
2286
+ /**
2287
+ * Run a single shell command, capture stdout/stderr, return structured result.
2288
+ */
2289
+ runCommand(cmd) {
2290
+ try {
2291
+ const stdout = execSync(cmd, {
2292
+ encoding: "utf8",
2293
+ stdio: ["pipe", "pipe", "pipe"],
2294
+ shell: "/bin/zsh",
2295
+ timeout: 12e4,
2296
+ // 2-minute per-command timeout
2297
+ maxBuffer: 50 * 1024 * 1024
2298
+ // 50 MB
2299
+ });
2300
+ return { stdout: this.truncateOutput(stdout), stderr: "", exitCode: 0 };
2301
+ } catch (err) {
2302
+ const e = err;
2303
+ return {
2304
+ stdout: this.truncateOutput(e.stdout ?? ""),
2305
+ stderr: this.truncateOutput(e.stderr ?? e.message),
2306
+ exitCode: e.status ?? 1
2307
+ };
2308
+ }
2309
+ }
2310
+ /**
2311
+ * Truncate long output, preserving head and tail so context stays useful.
2312
+ */
2313
+ truncateOutput(output) {
2314
+ if (output.length <= this.maxOutputLength) return output;
2315
+ const half = Math.floor(this.maxOutputLength / 2);
2316
+ const head = output.slice(0, half);
2317
+ const tail = output.slice(output.length - half);
2318
+ const omitted = output.length - this.maxOutputLength;
2319
+ return `${head}
2320
+
2321
+ ... [${omitted} characters omitted] ...
2322
+
2323
+ ${tail}`;
2324
+ }
2325
+ /**
2326
+ * Generate response, with streaming if available.
2327
+ */
2328
+ async generateResponse(messages, toolDefinitions) {
2329
+ if (this.config.streamOutputs && this.model.supportsStreaming() && this.model.generateStream) {
2330
+ let fullContent = "";
2331
+ const generator = this.model.generateStream(messages, {
2332
+ toolDefinitions,
2333
+ maxTokens: this.config.maxTokens,
2334
+ temperature: this.config.temperature
2335
+ });
2336
+ for await (const chunk of generator) {
2337
+ this.logger.streamChar(chunk);
2338
+ fullContent += chunk;
2339
+ }
2340
+ this.logger.streamEnd();
2341
+ return { role: "assistant", content: fullContent };
2342
+ }
2343
+ return this.model.generate(messages, {
2344
+ toolDefinitions,
2345
+ maxTokens: this.config.maxTokens,
2346
+ temperature: this.config.temperature
2347
+ });
2348
+ }
2349
+ /**
2350
+ * Process tool calls (final_answer or AgentTool delegation).
2351
+ */
2352
+ async processToolCalls(toolCalls) {
2353
+ const results = [];
2354
+ for (const tc of toolCalls) {
2355
+ const toolName = tc.function.name;
2356
+ const tool = this.tools.get(toolName);
2357
+ if (!tool) {
2358
+ results.push({ toolCallId: tc.id, toolName, result: null, error: `Unknown tool: ${toolName}` });
2359
+ continue;
2360
+ }
2361
+ let args;
2362
+ try {
2363
+ args = typeof tc.function.arguments === "string" ? JSON.parse(tc.function.arguments) : tc.function.arguments;
2364
+ } catch {
2365
+ results.push({ toolCallId: tc.id, toolName, result: null, error: "Failed to parse tool arguments" });
2366
+ continue;
2367
+ }
2368
+ this.logger.info(` Calling tool: ${toolName}(${JSON.stringify(args).slice(0, 100)}...)`);
2369
+ this.emitEvent("agent_tool_call", { step: this.currentStep, toolCallId: tc.id, toolName, arguments: args });
2370
+ try {
2371
+ const result = await tool.call(args);
2372
+ this.emitEvent("agent_tool_result", { step: this.currentStep, toolCallId: tc.id, toolName, result, duration: 0 });
2373
+ results.push({ toolCallId: tc.id, toolName, result });
2374
+ } catch (error) {
2375
+ const msg = `Tool execution error: ${error.message}`;
2376
+ this.emitEvent("agent_tool_result", { step: this.currentStep, toolCallId: tc.id, toolName, result: null, error: msg, duration: 0 });
2377
+ results.push({ toolCallId: tc.id, toolName, result: null, error: msg });
2378
+ }
2379
+ }
2380
+ return results;
2381
+ }
2382
+ /**
2383
+ * Override: force final answer via tool call format when max steps hit.
2384
+ */
2385
+ async provideFinalAnswer(task) {
2386
+ this.logger.subheader("Generating final answer from accumulated context");
2387
+ const messages = this.memory.toMessages();
2388
+ messages.push({
2389
+ role: "user",
2390
+ content: `You have reached the maximum number of steps. Based on your work so far, provide the best answer for the task: "${task}". Call the final_answer tool with your response.`
2391
+ });
2392
+ const toolDefinitions = [new FinalAnswerTool().toOpenAITool()];
2393
+ const response = await this.model.generate(messages, { toolDefinitions, maxTokens: this.config.maxTokens, temperature: this.config.temperature });
2394
+ if (response.toolCalls && response.toolCalls.length > 0) {
2395
+ try {
2396
+ const args = typeof response.toolCalls[0].function.arguments === "string" ? JSON.parse(response.toolCalls[0].function.arguments) : response.toolCalls[0].function.arguments;
2397
+ return args.answer;
2398
+ } catch {
2399
+ return response.content;
2400
+ }
2401
+ }
2402
+ return response.content;
2403
+ }
2404
+ };
2405
+
2043
2406
  // src/models/Model.ts
2044
2407
  var Model = class {
2045
2408
  /**
@@ -2783,7 +3146,7 @@ import { spawn } from "child_process";
2783
3146
  import * as path6 from "path";
2784
3147
 
2785
3148
  // src/utils/bunInstaller.ts
2786
- import { execSync } from "child_process";
3149
+ import { execSync as execSync2 } from "child_process";
2787
3150
  import * as path5 from "path";
2788
3151
  import * as fs5 from "fs";
2789
3152
  import * as os3 from "os";
@@ -2804,7 +3167,7 @@ async function ensureBunAvailable() {
2804
3167
  "\n[smol-js] Bun is required to run custom tools but was not found. Installing Bun automatically...\n"
2805
3168
  );
2806
3169
  try {
2807
- execSync("curl --proto =https --tlsv1.2 -sSf https://bun.sh | bash", {
3170
+ execSync2("curl --proto =https --tlsv1.2 -sSf https://bun.sh | bash", {
2808
3171
  stdio: "inherit",
2809
3172
  shell: "/bin/bash",
2810
3173
  env: { ...process.env, HOME: os3.homedir() }
@@ -2829,7 +3192,7 @@ Details: ${err.message}`
2829
3192
  function whichBun() {
2830
3193
  try {
2831
3194
  const cmd = process.platform === "win32" ? "where bun" : "which bun";
2832
- const result = execSync(cmd, { encoding: "utf8", stdio: "pipe" }).trim();
3195
+ const result = execSync2(cmd, { encoding: "utf8", stdio: "pipe" }).trim();
2833
3196
  const first = result.split("\n")[0]?.trim();
2834
3197
  if (first && fs5.existsSync(first)) return first;
2835
3198
  return null;
@@ -3253,32 +3616,24 @@ var YAMLLoader = class {
3253
3616
  }
3254
3617
  }
3255
3618
  const maxContextLength = definition.maxContextLength ?? globalMaxContextLength;
3619
+ const sharedConfig = {
3620
+ model,
3621
+ tools: agentTools,
3622
+ maxSteps: definition.maxSteps,
3623
+ customInstructions: definition.customInstructions,
3624
+ persistent: definition.persistent,
3625
+ maxContextLength,
3626
+ memoryStrategy: definition.memoryStrategy,
3627
+ maxTokens: definition.maxTokens,
3628
+ temperature: definition.temperature,
3629
+ name
3630
+ };
3256
3631
  if (definition.type === "CodeAgent") {
3257
- return new CodeAgent({
3258
- model,
3259
- tools: agentTools,
3260
- maxSteps: definition.maxSteps,
3261
- customInstructions: definition.customInstructions,
3262
- persistent: definition.persistent,
3263
- maxContextLength,
3264
- memoryStrategy: definition.memoryStrategy,
3265
- maxTokens: definition.maxTokens,
3266
- temperature: definition.temperature,
3267
- name
3268
- });
3632
+ return new CodeAgent(sharedConfig);
3633
+ } else if (definition.type === "TerminalAgent") {
3634
+ return new TerminalAgent(sharedConfig);
3269
3635
  } else {
3270
- return new ToolUseAgent({
3271
- model,
3272
- tools: agentTools,
3273
- maxSteps: definition.maxSteps,
3274
- customInstructions: definition.customInstructions,
3275
- persistent: definition.persistent,
3276
- maxContextLength,
3277
- memoryStrategy: definition.memoryStrategy,
3278
- maxTokens: definition.maxTokens,
3279
- temperature: definition.temperature,
3280
- name
3281
- });
3636
+ return new ToolUseAgent(sharedConfig);
3282
3637
  }
3283
3638
  }
3284
3639
  };
@@ -3677,6 +4032,7 @@ export {
3677
4032
  Orchestrator,
3678
4033
  ProxyTool,
3679
4034
  ReadFileTool,
4035
+ TerminalAgent,
3680
4036
  Tool,
3681
4037
  ToolUseAgent,
3682
4038
  UserInputTool,