0agent 1.0.56 → 1.0.57
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/daemon.mjs +25 -5
- package/package.json +1 -1
package/dist/daemon.mjs
CHANGED
|
@@ -2135,10 +2135,10 @@ var init_BrowserCapability = __esm({
|
|
|
2135
2135
|
"use strict";
|
|
2136
2136
|
BrowserCapability = class {
|
|
2137
2137
|
name = "browser_open";
|
|
2138
|
-
description = "
|
|
2138
|
+
description = "Headless browser for scraping JS-heavy pages. NOT for user-facing browser automation.";
|
|
2139
2139
|
toolDefinition = {
|
|
2140
2140
|
name: "browser_open",
|
|
2141
|
-
description: `
|
|
2141
|
+
description: `Headless browser \u2014 ONLY for reading/scraping page content when scrape_url fails on JS-heavy pages. action="read" (default): extract text headlessly (invisible, no real browser window opened). NEVER use this when the task involves the user's real browser or visible UI \u2014 use gui_automation with open_url instead. Do NOT use alongside gui_automation for the same URL \u2014 pick one.`,
|
|
2142
2142
|
input_schema: {
|
|
2143
2143
|
type: "object",
|
|
2144
2144
|
properties: {
|
|
@@ -2599,13 +2599,13 @@ var init_GUICapability = __esm({
|
|
|
2599
2599
|
description = "Automate desktop GUI \u2014 click, type, screenshot, hotkeys, find text on screen.";
|
|
2600
2600
|
toolDefinition = {
|
|
2601
2601
|
name: "gui_automation",
|
|
2602
|
-
description: "Desktop GUI automation \u2014 ONLY
|
|
2602
|
+
description: "Desktop GUI automation \u2014 ONLY for tasks that explicitly require controlling the screen. DO NOT use for coding, research, file edits, or tasks that do not need the desktop UI. DO NOT use alongside browser_open for the same URL \u2014 pick one tool and finish the task in it. wait: pause N seconds for UI/page to load \u2014 use after every navigation or click that triggers a page load. screenshot: only when you cannot proceed without seeing the screen. Max 2 per task. open_url: opens in existing browser tab, never duplicates windows.",
|
|
2603
2603
|
input_schema: {
|
|
2604
2604
|
type: "object",
|
|
2605
2605
|
properties: {
|
|
2606
2606
|
action: {
|
|
2607
2607
|
type: "string",
|
|
2608
|
-
description: '"screenshot" | "click" | "double_click" | "right_click" | "move" | "type" | "hotkey" | "scroll" | "drag" | "find_and_click" | "get_screen_size" | "get_cursor_pos" | "open_url" | "open_app"'
|
|
2608
|
+
description: '"screenshot" | "click" | "double_click" | "right_click" | "move" | "type" | "hotkey" | "scroll" | "drag" | "find_and_click" | "get_screen_size" | "get_cursor_pos" | "wait" | "open_url" | "open_app"'
|
|
2609
2609
|
},
|
|
2610
2610
|
x: { type: "number", description: "X coordinate (pixels from left)" },
|
|
2611
2611
|
y: { type: "number", description: "Y coordinate (pixels from top)" },
|
|
@@ -2617,6 +2617,7 @@ var init_GUICapability = __esm({
|
|
|
2617
2617
|
amount: { type: "number", description: "Scroll clicks (default 3)" },
|
|
2618
2618
|
app: { type: "string", description: 'App name to open e.g. "Safari", "Terminal", "Chrome"' },
|
|
2619
2619
|
url: { type: "string", description: 'URL to open e.g. "https://example.com" (use with open_url)' },
|
|
2620
|
+
seconds: { type: "number", description: "Seconds to wait (use with wait action, default 2)" },
|
|
2620
2621
|
interval: { type: "number", description: "Seconds to wait between actions (default 0.05)" },
|
|
2621
2622
|
duration: { type: "number", description: "Seconds for mouse movement animation (default 0.2)" }
|
|
2622
2623
|
},
|
|
@@ -2628,7 +2629,7 @@ var init_GUICapability = __esm({
|
|
|
2628
2629
|
const start = Date.now();
|
|
2629
2630
|
const script = this._buildScript(action, input);
|
|
2630
2631
|
if (!script) {
|
|
2631
|
-
return { success: false, output: `Unknown GUI action: "${action}". Valid: screenshot, click, double_click, right_click, move, type, hotkey, scroll, drag, find_and_click, get_screen_size, get_cursor_pos, open_url, open_app`, duration_ms: 0 };
|
|
2632
|
+
return { success: false, output: `Unknown GUI action: "${action}". Valid: screenshot, click, double_click, right_click, move, type, hotkey, scroll, drag, find_and_click, get_screen_size, get_cursor_pos, wait, open_url, open_app`, duration_ms: 0 };
|
|
2632
2633
|
}
|
|
2633
2634
|
if (signal?.aborted) {
|
|
2634
2635
|
return { success: false, output: "Cancelled.", duration_ms: 0 };
|
|
@@ -2718,6 +2719,7 @@ var init_GUICapability = __esm({
|
|
|
2718
2719
|
const amount = input.amount != null ? Number(input.amount) : 3;
|
|
2719
2720
|
const app = input.app != null ? String(input.app) : "";
|
|
2720
2721
|
const url = input.url != null ? String(input.url) : "";
|
|
2722
|
+
const seconds = input.seconds != null ? Number(input.seconds) : 2;
|
|
2721
2723
|
const interval = input.interval != null ? Number(input.interval) : 0.05;
|
|
2722
2724
|
const duration = input.duration != null ? Number(input.duration) : 0.2;
|
|
2723
2725
|
const header = `
|
|
@@ -2737,6 +2739,11 @@ print(f"Screen size: {w} x {h}")
|
|
|
2737
2739
|
return header + `
|
|
2738
2740
|
x, y = pyautogui.position()
|
|
2739
2741
|
print(f"Cursor position: ({x}, {y})")
|
|
2742
|
+
`;
|
|
2743
|
+
case "wait":
|
|
2744
|
+
return header + `
|
|
2745
|
+
time.sleep(${seconds})
|
|
2746
|
+
print(f"Waited ${seconds}s")
|
|
2740
2747
|
`;
|
|
2741
2748
|
case "screenshot": {
|
|
2742
2749
|
return header + `
|
|
@@ -3497,6 +3504,19 @@ content = element.text if element else page.get_all_text()` : `content = page.ge
|
|
|
3497
3504
|
`- Use relative paths from the working directory`,
|
|
3498
3505
|
`- Be concise in your final response: state what was done and where to find it`,
|
|
3499
3506
|
``,
|
|
3507
|
+
`\u2550\u2550\u2550 EXECUTION DISCIPLINE \u2014 follow strictly \u2550\u2550\u2550`,
|
|
3508
|
+
`- SEQUENTIAL: complete each step fully before starting the next. Never start step 2 while step 1 is still in progress.`,
|
|
3509
|
+
`- NO DUPLICATION: before any action, review the conversation above. If you already did it (opened a URL, clicked a button, sent a message), DO NOT do it again.`,
|
|
3510
|
+
`- ONE BROWSER ONLY: never use both gui_automation and browser_open for the same task.`,
|
|
3511
|
+
` \xB7 Use gui_automation (open_url) when the task involves the user's real visible browser.`,
|
|
3512
|
+
` \xB7 Use browser_open ONLY for silent scraping/content-extraction where no visible browser is needed.`,
|
|
3513
|
+
` \xB7 Never open the same URL in both. Pick one and finish the task in it.`,
|
|
3514
|
+
`- WAIT FOR LOADS: after every navigation, click, or app open \u2014 wait for the UI to fully load before the next action.`,
|
|
3515
|
+
` \xB7 Use gui_automation({action:"wait", seconds:2}) after opening URLs or clicking buttons that trigger navigation.`,
|
|
3516
|
+
` \xB7 Web apps (WhatsApp, Gmail, etc.) need 3\u20135 seconds. Native apps need 1\u20132 seconds.`,
|
|
3517
|
+
` \xB7 If an action produced no visible change, wait and try once more \u2014 do not spam the same action.`,
|
|
3518
|
+
`\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550`,
|
|
3519
|
+
``,
|
|
3500
3520
|
`GUI Automation (gui_automation tool) \u2014 ONLY use when the task explicitly requires controlling the desktop UI:`,
|
|
3501
3521
|
`- DO NOT take screenshots for general tasks, coding, research, or anything that doesn't need the screen`,
|
|
3502
3522
|
`- Only screenshot when you genuinely cannot proceed without seeing the current screen state`,
|