0agent 1.0.56 → 1.0.58

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/chat.js CHANGED
@@ -1181,9 +1181,11 @@ emitKeypressEvents(process.stdin, rl);
1181
1181
  process.stdin.on('keypress', (_char, key) => {
1182
1182
  if (!key || _paletteOpen) return;
1183
1183
  if (key.name === 'escape' && pendingResolve) {
1184
- // Cancel the running session cleanly
1185
- process.stdout.write(`\r\x1b[2K\n ${fmt(C.yellow, '↩')} Cancelled\n`);
1186
1184
  spinner.stop();
1185
+ process.stdout.write(`\r\x1b[2K`);
1186
+ process.stdout.write(`\n ${fmt(C.yellow, '⏹')} ${fmt(C.bold, 'Task stopped.')} All background processes killed.\n`);
1187
+ process.stdout.write(` ${fmt(C.dim, 'Send a new message to start fresh.')}\n\n`);
1188
+
1187
1189
  if (sessionId) {
1188
1190
  fetch(`${BASE_URL}/api/sessions/${sessionId}`, { method: 'DELETE' }).catch(() => {});
1189
1191
  }
@@ -1195,7 +1197,6 @@ process.stdin.on('keypress', (_char, key) => {
1195
1197
  messageQueue.length = 0;
1196
1198
 
1197
1199
  // Kill any OS-level processes spawned by GUI/shell capabilities
1198
- // (python3 GUI scripts, bash subprocesses) so nothing keeps running
1199
1200
  import('node:child_process').then(({ execSync: _exec }) => {
1200
1201
  try { _exec('pkill -f "0agent_gui_" 2>/dev/null; pkill -f "0agent-bg-" 2>/dev/null; true', { stdio: 'ignore' }); } catch {}
1201
1202
  }).catch(() => {});
package/dist/daemon.mjs CHANGED
@@ -2135,10 +2135,10 @@ var init_BrowserCapability = __esm({
2135
2135
  "use strict";
2136
2136
  BrowserCapability = class {
2137
2137
  name = "browser_open";
2138
- description = "Open a URL in the system browser or extract page content. Use when scrape_url fails on JS-heavy pages.";
2138
+ description = "Headless browser for scraping JS-heavy pages. NOT for user-facing browser automation.";
2139
2139
  toolDefinition = {
2140
2140
  name: "browser_open",
2141
- description: `Open or read a URL using the system browser. Use action="open" to launch the URL visibly in the user's default browser. Use action="read" (default) to extract page content headlessly. Use when scrape_url fails on JS-heavy pages.`,
2141
+ description: `Headless browser \u2014 ONLY for reading/scraping page content when scrape_url fails on JS-heavy pages. action="read" (default): extract text headlessly (invisible, no real browser window opened). NEVER use this when the task involves the user's real browser or visible UI \u2014 use gui_automation with open_url instead. Do NOT use alongside gui_automation for the same URL \u2014 pick one.`,
2142
2142
  input_schema: {
2143
2143
  type: "object",
2144
2144
  properties: {
@@ -2599,13 +2599,13 @@ var init_GUICapability = __esm({
2599
2599
  description = "Automate desktop GUI \u2014 click, type, screenshot, hotkeys, find text on screen.";
2600
2600
  toolDefinition = {
2601
2601
  name: "gui_automation",
2602
- description: "Desktop GUI automation \u2014 ONLY use for tasks that explicitly require controlling the screen. DO NOT use for coding, research, file edits, or any task that does not need the desktop UI. Actions: click, type, hotkey, scroll, find_and_click, open_url, open_app. screenshot: use sparingly \u2014 only when you cannot proceed without seeing the current screen. Max 2 per task. To open a website use open_url (reuses existing browser tab, never opens duplicate windows).",
2602
+ description: "Desktop GUI automation \u2014 ONLY for tasks that explicitly require controlling the screen. DO NOT use for coding, research, file edits, or tasks that do not need the desktop UI. DO NOT use alongside browser_open for the same URL \u2014 pick one tool and finish the task in it. wait: pause N seconds for UI/page to load \u2014 use after every navigation or click that triggers a page load. screenshot: only when you cannot proceed without seeing the screen. Max 2 per task. open_url: opens in existing browser tab, never duplicates windows.",
2603
2603
  input_schema: {
2604
2604
  type: "object",
2605
2605
  properties: {
2606
2606
  action: {
2607
2607
  type: "string",
2608
- description: '"screenshot" | "click" | "double_click" | "right_click" | "move" | "type" | "hotkey" | "scroll" | "drag" | "find_and_click" | "get_screen_size" | "get_cursor_pos" | "open_url" | "open_app"'
2608
+ description: '"screenshot" | "click" | "double_click" | "right_click" | "move" | "type" | "hotkey" | "scroll" | "drag" | "find_and_click" | "get_screen_size" | "get_cursor_pos" | "wait" | "open_url" | "open_app"'
2609
2609
  },
2610
2610
  x: { type: "number", description: "X coordinate (pixels from left)" },
2611
2611
  y: { type: "number", description: "Y coordinate (pixels from top)" },
@@ -2617,6 +2617,7 @@ var init_GUICapability = __esm({
2617
2617
  amount: { type: "number", description: "Scroll clicks (default 3)" },
2618
2618
  app: { type: "string", description: 'App name to open e.g. "Safari", "Terminal", "Chrome"' },
2619
2619
  url: { type: "string", description: 'URL to open e.g. "https://example.com" (use with open_url)' },
2620
+ seconds: { type: "number", description: "Seconds to wait (use with wait action, default 2)" },
2620
2621
  interval: { type: "number", description: "Seconds to wait between actions (default 0.05)" },
2621
2622
  duration: { type: "number", description: "Seconds for mouse movement animation (default 0.2)" }
2622
2623
  },
@@ -2628,7 +2629,7 @@ var init_GUICapability = __esm({
2628
2629
  const start = Date.now();
2629
2630
  const script = this._buildScript(action, input);
2630
2631
  if (!script) {
2631
- return { success: false, output: `Unknown GUI action: "${action}". Valid: screenshot, click, double_click, right_click, move, type, hotkey, scroll, drag, find_and_click, get_screen_size, get_cursor_pos, open_url, open_app`, duration_ms: 0 };
2632
+ return { success: false, output: `Unknown GUI action: "${action}". Valid: screenshot, click, double_click, right_click, move, type, hotkey, scroll, drag, find_and_click, get_screen_size, get_cursor_pos, wait, open_url, open_app`, duration_ms: 0 };
2632
2633
  }
2633
2634
  if (signal?.aborted) {
2634
2635
  return { success: false, output: "Cancelled.", duration_ms: 0 };
@@ -2718,6 +2719,7 @@ var init_GUICapability = __esm({
2718
2719
  const amount = input.amount != null ? Number(input.amount) : 3;
2719
2720
  const app = input.app != null ? String(input.app) : "";
2720
2721
  const url = input.url != null ? String(input.url) : "";
2722
+ const seconds = input.seconds != null ? Number(input.seconds) : 2;
2721
2723
  const interval = input.interval != null ? Number(input.interval) : 0.05;
2722
2724
  const duration = input.duration != null ? Number(input.duration) : 0.2;
2723
2725
  const header = `
@@ -2737,6 +2739,11 @@ print(f"Screen size: {w} x {h}")
2737
2739
  return header + `
2738
2740
  x, y = pyautogui.position()
2739
2741
  print(f"Cursor position: ({x}, {y})")
2742
+ `;
2743
+ case "wait":
2744
+ return header + `
2745
+ time.sleep(${seconds})
2746
+ print(f"Waited ${seconds}s")
2740
2747
  `;
2741
2748
  case "screenshot": {
2742
2749
  return header + `
@@ -3497,6 +3504,19 @@ content = element.text if element else page.get_all_text()` : `content = page.ge
3497
3504
  `- Use relative paths from the working directory`,
3498
3505
  `- Be concise in your final response: state what was done and where to find it`,
3499
3506
  ``,
3507
+ `\u2550\u2550\u2550 EXECUTION DISCIPLINE \u2014 follow strictly \u2550\u2550\u2550`,
3508
+ `- SEQUENTIAL: complete each step fully before starting the next. Never start step 2 while step 1 is still in progress.`,
3509
+ `- NO DUPLICATION: before any action, review the conversation above. If you already did it (opened a URL, clicked a button, sent a message), DO NOT do it again.`,
3510
+ `- ONE BROWSER ONLY: never use both gui_automation and browser_open for the same task.`,
3511
+ ` \xB7 Use gui_automation (open_url) when the task involves the user's real visible browser.`,
3512
+ ` \xB7 Use browser_open ONLY for silent scraping/content-extraction where no visible browser is needed.`,
3513
+ ` \xB7 Never open the same URL in both. Pick one and finish the task in it.`,
3514
+ `- WAIT FOR LOADS: after every navigation, click, or app open \u2014 wait for the UI to fully load before the next action.`,
3515
+ ` \xB7 Use gui_automation({action:"wait", seconds:2}) after opening URLs or clicking buttons that trigger navigation.`,
3516
+ ` \xB7 Web apps (WhatsApp, Gmail, etc.) need 3\u20135 seconds. Native apps need 1\u20132 seconds.`,
3517
+ ` \xB7 If an action produced no visible change, wait and try once more \u2014 do not spam the same action.`,
3518
+ `\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550`,
3519
+ ``,
3500
3520
  `GUI Automation (gui_automation tool) \u2014 ONLY use when the task explicitly requires controlling the desktop UI:`,
3501
3521
  `- DO NOT take screenshots for general tasks, coding, research, or anything that doesn't need the screen`,
3502
3522
  `- Only screenshot when you genuinely cannot proceed without seeing the current screen state`,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "0agent",
3
- "version": "1.0.56",
3
+ "version": "1.0.58",
4
4
  "description": "A persistent, learning AI agent that runs on your machine. An agent that learns.",
5
5
  "private": false,
6
6
  "license": "Apache-2.0",