0agent 1.0.69 → 1.0.70
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/daemon.mjs +62 -16
- package/package.json +1 -1
package/dist/daemon.mjs
CHANGED
|
@@ -3027,7 +3027,7 @@ var init_GUICapability = __esm({
|
|
|
3027
3027
|
properties: {
|
|
3028
3028
|
action: {
|
|
3029
3029
|
type: "string",
|
|
3030
|
-
description: 'Browser (no Screen Recording): "click_text"|"type_in"|"get_elements"|"read_element"|"get_media_state"|"scroll_to"|"exec_js"|"browser_state"|"cdp_screenshot" | Native apps: "accessibility_click" | Navigation: "open_url"|"open_app" | Mouse/KB (Screen Recording for screenshots): "screenshot"|"click"|"double_click"|"right_click"|"move"|"type"|"hotkey"|"scroll"|"drag"|"find_and_click"|"get_screen_size"|"get_cursor_pos"|"wait"'
|
|
3030
|
+
description: 'Browser (no Screen Recording): "click_text"|"type_in"|"get_elements"|"read_element"|"get_media_state"|"scroll_to"|"exec_js"|"browser_state"|"cdp_screenshot" | Native apps (no Screen Recording): "app_type"|"accessibility_click" | Navigation: "open_url"|"open_app" | Mouse/KB (Screen Recording for screenshots): "screenshot"|"click"|"double_click"|"right_click"|"move"|"type"|"hotkey"|"scroll"|"drag"|"find_and_click"|"get_screen_size"|"get_cursor_pos"|"wait"'
|
|
3031
3031
|
},
|
|
3032
3032
|
js: { type: "string", description: `JavaScript to execute in Chrome tab (use with exec_js). Example: "document.querySelector('video').paused"` },
|
|
3033
3033
|
selector: { type: "string", description: 'CSS selector for read_element, type_in, scroll_to (e.g. "input[type=search]", ".title", "video")' },
|
|
@@ -3292,7 +3292,7 @@ print(f"Moved to ({${x}}, {${y}})")
|
|
|
3292
3292
|
case "type": {
|
|
3293
3293
|
if (!text) return null;
|
|
3294
3294
|
return header + `
|
|
3295
|
-
pyautogui.
|
|
3295
|
+
pyautogui.typewrite(${JSON.stringify(text)}, interval=${interval})
|
|
3296
3296
|
print("Typed successfully")
|
|
3297
3297
|
`;
|
|
3298
3298
|
}
|
|
@@ -3587,6 +3587,48 @@ time.sleep(1.5)
|
|
|
3587
3587
|
`;
|
|
3588
3588
|
}
|
|
3589
3589
|
// ── New high-level browser actions — no Screen Recording needed ───────────
|
|
3590
|
+
case "app_type": {
|
|
3591
|
+
const appName = String(input.app ?? "").trim();
|
|
3592
|
+
const typeText = String(input.text ?? text ?? "").trim();
|
|
3593
|
+
if (!appName || !typeText) return null;
|
|
3594
|
+
const osName = platform2();
|
|
3595
|
+
if (osName !== "darwin") return header + `print("app_type requires macOS")`;
|
|
3596
|
+
const safeApp = appName.replace(/'/g, "\\'");
|
|
3597
|
+
const textJson = JSON.stringify(typeText);
|
|
3598
|
+
return header + `
|
|
3599
|
+
import subprocess, time, json
|
|
3600
|
+
|
|
3601
|
+
text_to_type = json.loads(${textJson})
|
|
3602
|
+
|
|
3603
|
+
# Step 1: copy to clipboard (handles unicode, special chars, long text)
|
|
3604
|
+
cp = subprocess.run(['pbcopy'], input=text_to_type.encode('utf-8'), capture_output=True)
|
|
3605
|
+
if cp.returncode != 0:
|
|
3606
|
+
print(f"Clipboard copy failed: {cp.stderr.decode()[:100]}")
|
|
3607
|
+
sys.exit(1)
|
|
3608
|
+
|
|
3609
|
+
# Step 2: bring app to front
|
|
3610
|
+
subprocess.run(['osascript', '-e', 'tell application "${safeApp}" to activate'], capture_output=True)
|
|
3611
|
+
time.sleep(0.4)
|
|
3612
|
+
|
|
3613
|
+
# Step 3: paste via AppleScript System Events (targets the specific process, not OS focus)
|
|
3614
|
+
paste_script = """tell application "System Events"
|
|
3615
|
+
tell process "${safeApp}"
|
|
3616
|
+
keystroke "v" using command down
|
|
3617
|
+
end tell
|
|
3618
|
+
end tell"""
|
|
3619
|
+
r = subprocess.run(['osascript', '-e', paste_script], capture_output=True, text=True)
|
|
3620
|
+
if r.returncode == 0:
|
|
3621
|
+
print(f"Typed in ${safeApp}: {text_to_type[:60]}")
|
|
3622
|
+
else:
|
|
3623
|
+
# Accessibility permission might be needed
|
|
3624
|
+
err = r.stderr.strip()
|
|
3625
|
+
if 'not allowed' in err.lower() or 'accessibility' in err.lower():
|
|
3626
|
+
subprocess.run(['open', 'x-apple.systempreferences:com.apple.preference.security?Privacy_Accessibility'], capture_output=True)
|
|
3627
|
+
print(f"Accessibility permission needed for ${safeApp}. System Settings opened \u2014 Privacy & Security \u2192 Accessibility \u2192 enable Terminal.")
|
|
3628
|
+
else:
|
|
3629
|
+
print(f"app_type error: {err[:200]}")
|
|
3630
|
+
`;
|
|
3631
|
+
}
|
|
3590
3632
|
case "click_text": {
|
|
3591
3633
|
if (!text) return null;
|
|
3592
3634
|
if (platform2() !== "darwin") return header + `print("click_text requires macOS + Chrome")`;
|
|
@@ -4709,20 +4751,24 @@ content = element.text if element else page.get_all_text()` : `content = page.ge
|
|
|
4709
4751
|
lines.push(
|
|
4710
4752
|
``,
|
|
4711
4753
|
`Browser/GUI actions \u2014 ALL work without Screen Recording permission:`,
|
|
4712
|
-
|
|
4713
|
-
|
|
4714
|
-
|
|
4715
|
-
|
|
4716
|
-
|
|
4717
|
-
|
|
4718
|
-
|
|
4719
|
-
|
|
4720
|
-
|
|
4721
|
-
|
|
4722
|
-
|
|
4723
|
-
|
|
4724
|
-
`
|
|
4725
|
-
`
|
|
4754
|
+
`BROWSER (Chrome): click_text {text} | type_in {selector,text} | get_elements | read_element {selector}`,
|
|
4755
|
+
` get_media_state | scroll_to {selector|direction} | exec_js {js} | browser_state | cdp_screenshot`,
|
|
4756
|
+
`NATIVE APPS (no Screen Recording \u2014 use these for WhatsApp, iMessage, Finder):`,
|
|
4757
|
+
` app_type {app:"WhatsApp", text:"hi"} \u2014 types via clipboard paste \u2192 cmd+v into the app.`,
|
|
4758
|
+
` Uses macOS clipboard so unicode/emoji/special chars all work. Target app gets the text`,
|
|
4759
|
+
` regardless of OS keyboard focus. ALWAYS use this for native app text input.`,
|
|
4760
|
+
` accessibility_click {app:"WhatsApp", element:"Send"} \u2014 click button via Accessibility API.`,
|
|
4761
|
+
` hotkey {keys:"cmd+f", app:"WhatsApp"} \u2014 send hotkey to specific app (not Terminal).`,
|
|
4762
|
+
`WHATSAPP WORKFLOW (use this exact sequence):`,
|
|
4763
|
+
` 1. open_app {app:"WhatsApp"}`,
|
|
4764
|
+
` 2. hotkey {keys:"cmd+f", app:"WhatsApp"} \u2014 open search`,
|
|
4765
|
+
` 3. app_type {app:"WhatsApp", text:"ContactName"} \u2014 search for contact`,
|
|
4766
|
+
` 4. hotkey {keys:"enter", app:"WhatsApp"} \u2014 open the conversation`,
|
|
4767
|
+
` 5. app_type {app:"WhatsApp", text:"your message"} \u2014 type message`,
|
|
4768
|
+
` 6. hotkey {keys:"enter", app:"WhatsApp"} \u2014 send`,
|
|
4769
|
+
` 7. accessibility_click {app:"WhatsApp", element:"Send"} \u2014 if enter doesn't send`,
|
|
4770
|
+
`NEVER use bare 'type' action for native apps \u2014 it goes to Terminal not the app.`,
|
|
4771
|
+
`ALWAYS verify: browser_state after web nav, get_media_state after play/pause, read_element for page content.`
|
|
4726
4772
|
);
|
|
4727
4773
|
}
|
|
4728
4774
|
if (isSelfMod && this.agentRoot) {
|