0agent 1.0.69 → 1.0.71
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/daemon.mjs +111 -24
- package/package.json +1 -1
package/dist/daemon.mjs
CHANGED
|
@@ -3027,7 +3027,7 @@ var init_GUICapability = __esm({
|
|
|
3027
3027
|
properties: {
|
|
3028
3028
|
action: {
|
|
3029
3029
|
type: "string",
|
|
3030
|
-
description: 'Browser (no Screen Recording): "click_text"|"type_in"|"get_elements"|"read_element"|"get_media_state"|"scroll_to"|"exec_js"|"browser_state"|"cdp_screenshot" | Native apps: "accessibility_click" | Navigation: "open_url"|"open_app" | Mouse/KB (Screen Recording for screenshots): "screenshot"|"click"|"double_click"|"right_click"|"move"|"type"|"hotkey"|"scroll"|"drag"|"find_and_click"|"get_screen_size"|"get_cursor_pos"|"wait"'
|
|
3030
|
+
description: 'Browser (no Screen Recording): "click_text"|"type_in"|"get_elements"|"read_element"|"get_media_state"|"scroll_to"|"exec_js"|"browser_state"|"cdp_screenshot" | Native apps (no Screen Recording): "app_type"|"accessibility_click" | Navigation: "open_url"|"open_app" | Mouse/KB (Screen Recording for screenshots): "screenshot"|"click"|"double_click"|"right_click"|"move"|"type"|"hotkey"|"scroll"|"drag"|"find_and_click"|"get_screen_size"|"get_cursor_pos"|"wait"'
|
|
3031
3031
|
},
|
|
3032
3032
|
js: { type: "string", description: `JavaScript to execute in Chrome tab (use with exec_js). Example: "document.querySelector('video').paused"` },
|
|
3033
3033
|
selector: { type: "string", description: 'CSS selector for read_element, type_in, scroll_to (e.g. "input[type=search]", ".title", "video")' },
|
|
@@ -3292,7 +3292,7 @@ print(f"Moved to ({${x}}, {${y}})")
|
|
|
3292
3292
|
case "type": {
|
|
3293
3293
|
if (!text) return null;
|
|
3294
3294
|
return header + `
|
|
3295
|
-
pyautogui.
|
|
3295
|
+
pyautogui.typewrite(${JSON.stringify(text)}, interval=${interval})
|
|
3296
3296
|
print("Typed successfully")
|
|
3297
3297
|
`;
|
|
3298
3298
|
}
|
|
@@ -3303,24 +3303,58 @@ print("Typed successfully")
|
|
|
3303
3303
|
const pyKeys = JSON.stringify(parts);
|
|
3304
3304
|
if (targetApp && platform2() === "darwin") {
|
|
3305
3305
|
const safeApp = targetApp.replace(/'/g, "\\'");
|
|
3306
|
-
const
|
|
3307
|
-
const
|
|
3306
|
+
const mainKey = parts[parts.length - 1] ?? "";
|
|
3307
|
+
const modParts = parts.slice(0, -1);
|
|
3308
|
+
const KEY_CODES = {
|
|
3309
|
+
down: 125,
|
|
3310
|
+
up: 126,
|
|
3311
|
+
left: 123,
|
|
3312
|
+
right: 124,
|
|
3313
|
+
enter: 36,
|
|
3314
|
+
return: 36,
|
|
3315
|
+
escape: 53,
|
|
3316
|
+
esc: 53,
|
|
3317
|
+
tab: 48,
|
|
3318
|
+
delete: 51,
|
|
3319
|
+
backspace: 51,
|
|
3320
|
+
"delete-forward": 117,
|
|
3321
|
+
space: 49,
|
|
3322
|
+
home: 115,
|
|
3323
|
+
end: 119,
|
|
3324
|
+
pageup: 116,
|
|
3325
|
+
pagedown: 121,
|
|
3326
|
+
f1: 122,
|
|
3327
|
+
f2: 120,
|
|
3328
|
+
f3: 99,
|
|
3329
|
+
f4: 118,
|
|
3330
|
+
f5: 96,
|
|
3331
|
+
f6: 97,
|
|
3332
|
+
f7: 98,
|
|
3333
|
+
f8: 100,
|
|
3334
|
+
f9: 101,
|
|
3335
|
+
f10: 109,
|
|
3336
|
+
f11: 103,
|
|
3337
|
+
f12: 111
|
|
3338
|
+
};
|
|
3339
|
+
const keyCode = KEY_CODES[mainKey];
|
|
3340
|
+
const modifiers = modParts.map((k) => {
|
|
3308
3341
|
if (k === "command") return "command down";
|
|
3309
3342
|
if (k === "ctrl") return "control down";
|
|
3310
3343
|
if (k === "shift") return "shift down";
|
|
3311
3344
|
if (k === "option") return "option down";
|
|
3312
3345
|
return "";
|
|
3313
3346
|
}).filter(Boolean).join(", ");
|
|
3314
|
-
const
|
|
3347
|
+
const usingClause = modifiers ? ` using {${modifiers}}` : "";
|
|
3348
|
+
const keyStatement = keyCode !== void 0 ? `key code ${keyCode}${usingClause}` : `keystroke "${mainKey}"${usingClause}`;
|
|
3315
3349
|
return header + `
|
|
3316
3350
|
import subprocess, time
|
|
3317
|
-
# Focus target app first
|
|
3318
3351
|
subprocess.run(['osascript', '-e', 'tell application "${safeApp}" to activate'], capture_output=True)
|
|
3319
3352
|
time.sleep(0.3)
|
|
3320
|
-
|
|
3321
|
-
|
|
3322
|
-
|
|
3323
|
-
end tell
|
|
3353
|
+
as_script = """tell application "System Events"
|
|
3354
|
+
tell process "${safeApp}"
|
|
3355
|
+
${keyStatement}
|
|
3356
|
+
end tell
|
|
3357
|
+
end tell"""
|
|
3324
3358
|
r = subprocess.run(['osascript', '-e', as_script], capture_output=True, text=True)
|
|
3325
3359
|
if r.returncode == 0:
|
|
3326
3360
|
print(f"Sent ${parts.join("+")} to ${safeApp}")
|
|
@@ -3587,6 +3621,48 @@ time.sleep(1.5)
|
|
|
3587
3621
|
`;
|
|
3588
3622
|
}
|
|
3589
3623
|
// ── New high-level browser actions — no Screen Recording needed ───────────
|
|
3624
|
+
case "app_type": {
|
|
3625
|
+
const appName = String(input.app ?? "").trim();
|
|
3626
|
+
const typeText = String(input.text ?? text ?? "").trim();
|
|
3627
|
+
if (!appName || !typeText) return null;
|
|
3628
|
+
const osName = platform2();
|
|
3629
|
+
if (osName !== "darwin") return header + `print("app_type requires macOS")`;
|
|
3630
|
+
const safeApp = appName.replace(/'/g, "\\'");
|
|
3631
|
+
const textJson = JSON.stringify(typeText);
|
|
3632
|
+
return header + `
|
|
3633
|
+
import subprocess, time, json
|
|
3634
|
+
|
|
3635
|
+
text_to_type = json.loads(${textJson})
|
|
3636
|
+
|
|
3637
|
+
# Step 1: copy to clipboard (handles unicode, special chars, long text)
|
|
3638
|
+
cp = subprocess.run(['pbcopy'], input=text_to_type.encode('utf-8'), capture_output=True)
|
|
3639
|
+
if cp.returncode != 0:
|
|
3640
|
+
print(f"Clipboard copy failed: {cp.stderr.decode()[:100]}")
|
|
3641
|
+
sys.exit(1)
|
|
3642
|
+
|
|
3643
|
+
# Step 2: bring app to front
|
|
3644
|
+
subprocess.run(['osascript', '-e', 'tell application "${safeApp}" to activate'], capture_output=True)
|
|
3645
|
+
time.sleep(0.4)
|
|
3646
|
+
|
|
3647
|
+
# Step 3: paste via AppleScript System Events (targets the specific process, not OS focus)
|
|
3648
|
+
paste_script = """tell application "System Events"
|
|
3649
|
+
tell process "${safeApp}"
|
|
3650
|
+
keystroke "v" using command down
|
|
3651
|
+
end tell
|
|
3652
|
+
end tell"""
|
|
3653
|
+
r = subprocess.run(['osascript', '-e', paste_script], capture_output=True, text=True)
|
|
3654
|
+
if r.returncode == 0:
|
|
3655
|
+
print(f"Typed in ${safeApp}: {text_to_type[:60]}")
|
|
3656
|
+
else:
|
|
3657
|
+
# Accessibility permission might be needed
|
|
3658
|
+
err = r.stderr.strip()
|
|
3659
|
+
if 'not allowed' in err.lower() or 'accessibility' in err.lower():
|
|
3660
|
+
subprocess.run(['open', 'x-apple.systempreferences:com.apple.preference.security?Privacy_Accessibility'], capture_output=True)
|
|
3661
|
+
print(f"Accessibility permission needed for ${safeApp}. System Settings opened \u2014 Privacy & Security \u2192 Accessibility \u2192 enable Terminal.")
|
|
3662
|
+
else:
|
|
3663
|
+
print(f"app_type error: {err[:200]}")
|
|
3664
|
+
`;
|
|
3665
|
+
}
|
|
3590
3666
|
case "click_text": {
|
|
3591
3667
|
if (!text) return null;
|
|
3592
3668
|
if (platform2() !== "darwin") return header + `print("click_text requires macOS + Chrome")`;
|
|
@@ -4709,20 +4785,31 @@ content = element.text if element else page.get_all_text()` : `content = page.ge
|
|
|
4709
4785
|
lines.push(
|
|
4710
4786
|
``,
|
|
4711
4787
|
`Browser/GUI actions \u2014 ALL work without Screen Recording permission:`,
|
|
4712
|
-
|
|
4713
|
-
|
|
4714
|
-
|
|
4715
|
-
|
|
4716
|
-
|
|
4717
|
-
|
|
4718
|
-
|
|
4719
|
-
|
|
4720
|
-
|
|
4721
|
-
|
|
4722
|
-
|
|
4723
|
-
|
|
4724
|
-
`
|
|
4725
|
-
`
|
|
4788
|
+
`BROWSER (Chrome): click_text {text} | type_in {selector,text} | get_elements | read_element {selector}`,
|
|
4789
|
+
` get_media_state | scroll_to {selector|direction} | exec_js {js} | browser_state | cdp_screenshot`,
|
|
4790
|
+
`NATIVE APPS (no Screen Recording \u2014 use these for WhatsApp, iMessage, Finder):`,
|
|
4791
|
+
` app_type {app:"WhatsApp", text:"hi"} \u2014 types via clipboard paste \u2192 cmd+v into the app.`,
|
|
4792
|
+
` Uses macOS clipboard so unicode/emoji/special chars all work. Target app gets the text`,
|
|
4793
|
+
` regardless of OS keyboard focus. ALWAYS use this for native app text input.`,
|
|
4794
|
+
` accessibility_click {app:"WhatsApp", element:"Send"} \u2014 click button via Accessibility API.`,
|
|
4795
|
+
` hotkey {keys:"cmd+f", app:"WhatsApp"} \u2014 send hotkey to specific app (not Terminal).`,
|
|
4796
|
+
`NATIVE APP SEARCH NAVIGATION (generalised \u2014 works for WhatsApp, Spotlight, file dialogs, any search):`,
|
|
4797
|
+
` After typing in a search box, results appear in a dropdown/list.`,
|
|
4798
|
+
` Navigate them with: hotkey {keys:"down", app:"AppName"} to move to first result,`,
|
|
4799
|
+
` then hotkey {keys:"enter", app:"AppName"} to select/open it.`,
|
|
4800
|
+
` If first down+enter doesn't work, try: down, down, enter (some apps skip a header row).`,
|
|
4801
|
+
` Use wait {seconds:1} after typing to give results time to load before navigating.`,
|
|
4802
|
+
`WHATSAPP EXACT WORKFLOW:`,
|
|
4803
|
+
` 1. open_app {app:"WhatsApp"}`,
|
|
4804
|
+
` 2. hotkey {keys:"cmd+f", app:"WhatsApp"} \u2014 open search bar`,
|
|
4805
|
+
` 3. app_type {app:"WhatsApp", text:"ContactName"} \u2014 type contact name`,
|
|
4806
|
+
` 4. wait {seconds:1} \u2014 wait for search results to load`,
|
|
4807
|
+
` 5. hotkey {keys:"down", app:"WhatsApp"} \u2014 move focus to first result`,
|
|
4808
|
+
` 6. hotkey {keys:"enter", app:"WhatsApp"} \u2014 open the conversation`,
|
|
4809
|
+
` 7. app_type {app:"WhatsApp", text:"your message"} \u2014 type message`,
|
|
4810
|
+
` 8. hotkey {keys:"enter", app:"WhatsApp"} \u2014 send`,
|
|
4811
|
+
`NEVER use bare 'type' action for native apps \u2014 it goes to Terminal not the app.`,
|
|
4812
|
+
`ALWAYS verify: browser_state after web nav, get_media_state after play/pause.`
|
|
4726
4813
|
);
|
|
4727
4814
|
}
|
|
4728
4815
|
if (isSelfMod && this.agentRoot) {
|