@amaster.ai/pi-computer-use 0.1.2-beta.2 → 0.1.2-beta.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/README.md +136 -0
  2. package/bin/darwin-arm64/.version +2 -0
  3. package/bin/darwin-arm64/CuaDriver.app/Contents/CodeResources +0 -0
  4. package/bin/darwin-arm64/CuaDriver.app/Contents/Info.plist +32 -0
  5. package/bin/darwin-arm64/CuaDriver.app/Contents/MacOS/cua-driver +0 -0
  6. package/bin/darwin-arm64/CuaDriver.app/Contents/Resources/Skills/cua-driver/README.md +140 -0
  7. package/bin/darwin-arm64/CuaDriver.app/Contents/Resources/Skills/cua-driver/RECORDING.md +113 -0
  8. package/bin/darwin-arm64/CuaDriver.app/Contents/Resources/Skills/cua-driver/SKILL.md +887 -0
  9. package/bin/darwin-arm64/CuaDriver.app/Contents/Resources/Skills/cua-driver/TESTS.md +232 -0
  10. package/bin/darwin-arm64/CuaDriver.app/Contents/Resources/Skills/cua-driver/WEB_APPS.md +471 -0
  11. package/bin/darwin-arm64/CuaDriver.app/Contents/_CodeSignature/CodeResources +172 -0
  12. package/bin/darwin-x64/.version +2 -0
  13. package/bin/darwin-x64/CuaDriver.app/Contents/CodeResources +0 -0
  14. package/bin/darwin-x64/CuaDriver.app/Contents/Info.plist +32 -0
  15. package/bin/darwin-x64/CuaDriver.app/Contents/MacOS/cua-driver +0 -0
  16. package/bin/darwin-x64/CuaDriver.app/Contents/Resources/Skills/cua-driver/README.md +140 -0
  17. package/bin/darwin-x64/CuaDriver.app/Contents/Resources/Skills/cua-driver/RECORDING.md +113 -0
  18. package/bin/darwin-x64/CuaDriver.app/Contents/Resources/Skills/cua-driver/SKILL.md +887 -0
  19. package/bin/darwin-x64/CuaDriver.app/Contents/Resources/Skills/cua-driver/TESTS.md +232 -0
  20. package/bin/darwin-x64/CuaDriver.app/Contents/Resources/Skills/cua-driver/WEB_APPS.md +471 -0
  21. package/bin/darwin-x64/CuaDriver.app/Contents/_CodeSignature/CodeResources +172 -0
  22. package/bin/linux-x64/.version +2 -0
  23. package/bin/linux-x64/cua-driver +0 -0
  24. package/bin/win32-arm64/.version +2 -0
  25. package/bin/win32-arm64/cua-driver-uia.exe +0 -0
  26. package/bin/win32-arm64/cua-driver.exe +0 -0
  27. package/bin/win32-x64/.version +2 -0
  28. package/bin/win32-x64/cua-driver-uia.exe +0 -0
  29. package/bin/win32-x64/cua-driver.exe +0 -0
  30. package/dist/config.d.ts +5 -19
  31. package/dist/config.d.ts.map +1 -1
  32. package/dist/config.js +1 -6
  33. package/dist/config.js.map +1 -1
  34. package/dist/index.d.ts.map +1 -1
  35. package/dist/index.js +578 -71
  36. package/dist/index.js.map +1 -1
  37. package/dist/mcp-client.d.ts +22 -0
  38. package/dist/mcp-client.d.ts.map +1 -0
  39. package/dist/mcp-client.js +91 -0
  40. package/dist/mcp-client.js.map +1 -0
  41. package/dist/vision.d.ts.map +1 -1
  42. package/dist/vision.js +19 -0
  43. package/dist/vision.js.map +1 -1
  44. package/package.json +20 -6
  45. package/preview.png +0 -0
  46. package/scripts/postinstall.js +29 -0
  47. package/dist/__tests__/computer-client.test.d.ts +0 -2
  48. package/dist/__tests__/computer-client.test.d.ts.map +0 -1
  49. package/dist/__tests__/computer-client.test.js +0 -174
  50. package/dist/__tests__/computer-client.test.js.map +0 -1
  51. package/dist/__tests__/index.test.d.ts +0 -2
  52. package/dist/__tests__/index.test.d.ts.map +0 -1
  53. package/dist/__tests__/index.test.js +0 -385
  54. package/dist/__tests__/index.test.js.map +0 -1
  55. package/dist/__tests__/server-process.test.d.ts +0 -2
  56. package/dist/__tests__/server-process.test.d.ts.map +0 -1
  57. package/dist/__tests__/server-process.test.js +0 -127
  58. package/dist/__tests__/server-process.test.js.map +0 -1
  59. package/dist/__tests__/vision.test.d.ts +0 -2
  60. package/dist/__tests__/vision.test.d.ts.map +0 -1
  61. package/dist/__tests__/vision.test.js +0 -36
  62. package/dist/__tests__/vision.test.js.map +0 -1
  63. package/dist/actions.d.ts +0 -15
  64. package/dist/actions.d.ts.map +0 -1
  65. package/dist/actions.js +0 -45
  66. package/dist/actions.js.map +0 -1
  67. package/dist/computer-client.d.ts +0 -13
  68. package/dist/computer-client.d.ts.map +0 -1
  69. package/dist/computer-client.js +0 -109
  70. package/dist/computer-client.js.map +0 -1
  71. package/dist/server-process.d.ts +0 -9
  72. package/dist/server-process.d.ts.map +0 -1
  73. package/dist/server-process.js +0 -76
  74. package/dist/server-process.js.map +0 -1
@@ -0,0 +1,471 @@
1
+ # Driving web-rendered apps
2
+
3
+ Covers apps whose UI is rendered in a web runtime inside a native
4
+ macOS shell:
5
+
6
+ - **Chromium-family browsers** — Chrome, Edge, Brave, Arc, Vivaldi,
7
+ Opera
8
+ - **WebKit** — Safari
9
+ - **Electron apps** — Slack, Discord, VS Code, Notion, Figma (desktop),
10
+ and most "native" chat / productivity apps
11
+ - **Tauri apps** — use macOS's built-in WKWebView; native menu bar +
12
+ web content, similar to Electron in driving patterns
13
+
14
+ These apps share two traits that drive the rest of this file:
15
+
16
+ 1. Their AX tree is **sparse** until explicitly enabled, and even
17
+ then can be incomplete.
18
+ 2. Their web content is routed through a renderer with its own input
19
+ filters — synthetic events need specific delivery paths to land.
20
+
21
+ ## Sparse AX trees — populate on first snapshot
22
+
23
+ Chromium and Electron apps ship with their web accessibility tree
24
+ disabled by default. CuaDriver flips it on automatically the first
25
+ time you snapshot such an app — the first `get_window_state` call for
26
+ that pid takes up to ~500 ms while Chromium builds the tree,
27
+ subsequent calls are fast. Because `launch_app` runs hidden, the
28
+ Chromium activation nudges (`AXManualAccessibility`,
29
+ `AXEnhancedUserInterface`, `AXObserver` registration) all happen
30
+ during the `get_window_state` snapshot itself — no explicit activation
31
+ is needed to populate the tree.
32
+
33
+ If the first snapshot still looks sparse (just the window frame and
34
+ menubar), **retry once** — Chromium occasionally needs a second call
35
+ to finish populating.
36
+
37
+ If it stays sparse after a retry, the target's AX tree genuinely
38
+ doesn't expose the UI you want. Prefer these before reaching for
39
+ pixels:
40
+
41
+ 1. Look for native entry points Chromium apps usually keep AX-visible:
42
+ menu bar items (`AXMenuBarItem`) — expand them via the two-snapshot
43
+ flow in SKILL.md's menu section, cmd-k style command palettes
44
+ (often AX-exposed), toolbar buttons in the window chrome.
45
+ 2. Use keyboard shortcuts delivered straight to the pid —
46
+ `hotkey({pid, keys: ["cmd", "enter"]})`, `hotkey({pid, keys:
47
+ ["cmd", "k"]})`, etc. Posted via `CGEvent.postToPid`, reaches the
48
+ target regardless of AX state, no activation required.
49
+ 3. For typing into web inputs, use `type_text` — it automatically
50
+ falls back to CGEvent synthesis when the input doesn't implement
51
+ `AXSelectedText`, reaching any focused keyboard receiver including
52
+ Unicode / emoji.
53
+ 4. If none of the above reaches the target, tell the user this
54
+ interaction isn't reachable from the driver today and ask for
55
+ guidance.
56
+
57
+ ## Navigate to a URL
58
+
59
+ **Primary path — `launch_app` with `urls`:**
60
+
61
+ ```
62
+ launch_app({bundle_id: "com.google.Chrome", urls: ["https://cua.ai"]})
63
+ ```
64
+
65
+ Opens the URL in a new tab/window on the existing Chrome pid (or
66
+ starts Chrome if it isn't running). Fully backgrounded — the
67
+ driver's `FocusRestoreGuard` catches Chrome's internal
68
+ `NSApp.activate(ignoringOtherApps:)` during `application(_:open:)`
69
+ and clobbers the frontmost back to what it was before the call.
70
+ No omnibox dance, no focus-steal, no `⌘L` flash. This is the
71
+ default recommendation — use it even when Chrome is already
72
+ running.
73
+
74
+ Caveat: the new window is **hidden-launched** (the whole point of
75
+ cua-driver). If the user needs to see the page on screen, tell
76
+ them to Cmd-Tab / click the Dock icon; the driver never unhides.
77
+ AX reads + element-indexed actions against the hidden window
78
+ work normally, so for agents that just need to extract / click
79
+ things on the loaded page, no unhide is required.
80
+
81
+ **Last-resort path — omnibox via `⌘L`:** forbidden under the
82
+ no-foreground contract (see SKILL.md) because `⌘L` activates
83
+ Chrome even when delivered to a backgrounded pid. Keep this
84
+ documented only as historical context:
85
+
86
+ ```
87
+ # DON'T DO THIS — ⌘L steals focus. Use launch_app above.
88
+ hotkey({pid, keys: ["cmd", "l"]})
89
+ type_text({pid, text: "https://cua.ai", delay_ms: 30})
90
+ get_window_state({pid, window_id})
91
+ click({pid, window_id, element_index: <suggestion>})
92
+ ```
93
+
94
+ **Why not AX `set_value` + `press_key return` on the omnibox?**
95
+ Empirically, Chrome's omnibox commit logic requires a "user-typed"
96
+ signal that neither a raw AX value set nor `CGEvent.postToPid`
97
+ keystrokes reliably supply from a backgrounded pid. The URL
98
+ lands in the omnibox but Return fires as a no-op on the page
99
+ body instead of committing navigation. `launch_app({urls})` side-
100
+ steps this entirely by handing the URL to Chrome through the
101
+ canonical Apple Events / LaunchServices `open` path the app
102
+ itself honors.
103
+
104
+ Minor caveats for the rare case a `⌘L` flow is still needed
105
+ (last-resort only, with user buy-in on the focus flash):
106
+ - Don't drop `delay_ms` below ~25 for keystroked typing on
107
+ Chromium — below that, autocomplete insertions interleave with
108
+ your characters and you get garbage like `"exuample.comn"`
109
+ instead of `"example.com"`.
110
+ - Chrome exposes omnibox suggestions as clickable AXMenuItems in
111
+ a dropdown popup. Clicking the first match via AXPress is
112
+ more reliable than pressing Return (which may not commit).
113
+
114
+ ## Tabs vs windows — prefer windows for backgrounded drive
115
+
116
+ Browsers (Chrome, Dia, Arc, Brave, Edge, Safari) structure their
117
+ surface area as {windows → tabs → page content}. Picking the
118
+ right level for cua-driver is critical:
119
+
120
+ - **Tabs** share a window. Only the focused tab's `AXWebArea` is
121
+ populated; switching tabs to drive a different one is visibly
122
+ disruptive. `hotkey ⌘<N>` posts the real shortcut, the window
123
+ re-renders, the user sees the flip. There is no AX path to
124
+ read a background tab's DOM.
125
+
126
+ - **Windows** are independent AX trees. Each has its own `window_id`,
127
+ its own `AXWebArea` with the page's content, and can be driven
128
+ backgrounded via `get_window_state({pid, window_id})` + element-
129
+ indexed clicks without activating or raising the window.
130
+ `launch_app({bundle_id, urls: [url]})` opens each URL in a new
131
+ window (tested against Chrome; other browsers vary).
132
+
133
+ **Rule of thumb:** if the user needs to drive content across URLs
134
+ in the background, open each URL in its own **window** via
135
+ `launch_app({urls: [...]})` and address them by `window_id`. Only
136
+ reach for tab shortcuts when the user explicitly asked for "do it
137
+ in a specific tab" (rare).
138
+
139
+ **Read-only tab enumeration is fine.** Walk the window's toolbar /
140
+ tab-strip in the AX tree for `AXTab` / `AXRadioButton` elements
141
+ and read their `AXTitle`s. You can discover which tabs exist and
142
+ what URLs/titles they carry without switching to any of them.
143
+ Only *activating* a specific tab is visible.
144
+
145
+ ## Keyboard commits on minimized windows
146
+
147
+ When the target window is **minimized** (genie'd into the Dock):
148
+
149
+ - **AX reads** (`get_window_state`), element-indexed AX **clicks**, and
150
+ AX **value writes** (`set_value`) all still work — they land on
151
+ the minimized AX tree and don't deminiaturize the window.
152
+ - **Keyboard commit events** — Return after typing into a text
153
+ field, Space to toggle a checkbox, Tab to move focus — often
154
+ **don't actually fire the element's handler**. The keystroke
155
+ reaches the app via `SLEventPostToPid` but the app's renderer-side
156
+ input focus isn't established on the intended field (setting
157
+ `AXFocused=true` on a minimized window's descendants doesn't
158
+ propagate to real keyboard focus). Symptom: macOS system-alert
159
+ beep, or silent no-op. Example: `hotkey cmd+L` +
160
+ `type_text URL` + `press_key return` on minimized Chrome —
161
+ the URL lands in the omnibox AX value but Return doesn't commit
162
+ the navigation.
163
+ - **Primary workaround — use `set_value` to commit directly**: For
164
+ text fields, `set_value({pid, window_id, element_index, value})`
165
+ sets the entire field value at once, bypassing keyboard commits.
166
+ For a URL in Chrome: find the omnibox via `get_window_state`, then
167
+ `set_value({pid, window_id, element_index: <omnibox>, value: "https://…"})`.
168
+ The value is committed to the AX tree and rendered. Chrome
169
+ auto-navigates when the omnibox value changes via AX on many
170
+ versions.
171
+ - **Secondary workaround — find a clickable equivalent**: If
172
+ `set_value` doesn't auto-commit, find a button and AX-click it
173
+ instead. For a URL, click the "Go" button if exposed; for a form,
174
+ click Submit; for a toggle, AX-click the checkbox. Clicks route
175
+ through AXPress, which doesn't need renderer focus.
176
+ - **Last resort — tell the user the window needs to be un-minimized**:
177
+ Only if neither `set_value` nor clickable equivalents work. Don't
178
+ silently deminiaturize the window — layout-disrupting side-effect
179
+ on many apps.
180
+
181
+ ## Scroll the main page
182
+
183
+ ```
184
+ snap = get_window_state({pid, window_id})
185
+ # Find the AXWebArea — typically one per tab.
186
+ scroll({pid, window_id, direction: "down", amount: 3, by: "page", element_index: <web_area>})
187
+ ```
188
+
189
+ Under the hood: `scroll` synthesizes PageUp / PageDown / arrow-key
190
+ keystrokes and posts them via the same auth-signed `SLEventPostToPid`
191
+ path `press_key` uses. That's why it reaches Chromium even when the
192
+ window is backgrounded. Wheel events posted via the same per-pid
193
+ SkyLight path are silently dropped by Chromium's renderer (no
194
+ Scroll-specific auth subclass exists — probe tests confirmed this),
195
+ so the working primitive is keyboard.
196
+
197
+ Granularity: `by: "page"` → PageDown/PageUp (one viewport height
198
+ per unit). `by: "line"` → arrow keys (fine-grained; a few pixels
199
+ per unit in web views, one line in text views). Horizontal `page`
200
+ falls back to Left/Right arrows since there's no standard
201
+ horizontal-page shortcut.
202
+
203
+ `element_index` is focused (`AXFocused=true`) before the
204
+ keystrokes fire — useful for directing the scroll into a specific
205
+ element. Without it, keys land wherever the pid's current focus is.
206
+
207
+ ## Jump to page bottom / top
208
+
209
+ ```
210
+ press_key({pid, window_id, element_index: <web_area>, key: "end"})
211
+ # or "home" / "pagedown" / "pageup"
212
+ ```
213
+
214
+ Targets the `AXWebArea` directly (not the omnibox). Routes keys
215
+ through SkyLight's `SLEventPostToPid` where available, falling back
216
+ to `CGEventPostToPid`. Works for most in-page shortcuts against a
217
+ backgrounded window.
218
+
219
+ ## Click something inside a page
220
+
221
+ ```
222
+ click({pid, window_id, element_index: <some_AXLink_or_AXButton>})
223
+ ```
224
+
225
+ Standard element-indexed click. Chromium exposes `AXLink` /
226
+ `AXButton` / `AXTextField` / etc. under the `AXWebArea` — walk the
227
+ tree to find your target, snapshot, click.
228
+
229
+ For a **context menu** on a browser-chrome element (links, buttons,
230
+ toolbar items — anything that advertises `AXShowMenu`), use
231
+ `right_click({pid, window_id, element_index})`. Pure AX RPC,
232
+ identical to `click({pid, window_id, element_index, action: "show_menu"})`.
233
+
234
+ For a context menu on **web content itself** (right-clicking an
235
+ image, a selection, the page background), try `right_click({pid, x,
236
+ y})` — synthesizes a `rightMouseDown`/`rightMouseUp` via auth-signed
237
+ `SLEventPostToPid`. **Known limitation**: Chromium web content
238
+ coerces the event back to a left-click — this appears to affect
239
+ every non-HID-tap synthesis path. Prefer `element_index` whenever
240
+ the target is AX-addressable.
241
+
242
+ ## Enable "Allow JavaScript from Apple Events" — browser support matrix
243
+
244
+ | Browser | `execute javascript` supported | Setting needed | Programmatic path |
245
+ |---|---|---|---|
246
+ | Chrome | ✅ Full | ✅ Yes | Edit Preferences JSON (see below) |
247
+ | Brave | ✅ Full | ✅ Yes | Edit Preferences JSON (same key, different path) |
248
+ | Edge | ✅ Full | ✅ Yes | Edit Preferences JSON (same key, different path) |
249
+ | Safari | ✅ Full (`do JavaScript`) | ✅ Yes | UI automation only — `defaults write` broken |
250
+ | Arc | ⚠️ No return values | No toggle | No reliable path |
251
+ | Firefox | ❌ Not supported | N/A | N/A |
252
+
253
+ ### Chrome / Brave / Edge — Preferences JSON
254
+
255
+ Required for `osascript execute javascript` calls. All three are
256
+ Chromium-based and share the same preference key and mechanism.
257
+ Each browser stores preferences per-profile.
258
+
259
+ ### Why menu clicks don't work
260
+
261
+ The menu item (`View → Developer → Allow JavaScript from Apple Events`)
262
+ is a security-sensitive toggle. Verified experimentally:
263
+
264
+ - `AXPress` — advertised actions are `[AXCancel, AXPick]`, not
265
+ `AXPress`; Chrome's command dispatch silently discards it.
266
+ - `AXPick` on a leaf item — opens submenus correctly but does NOT
267
+ commit a leaf toggle; the item is "selected" but not activated.
268
+ - System Events `click theItem` / `click at {x, y}` — returns the
269
+ menu item reference (found it) but Chrome requires a genuine
270
+ trusted user event to flip this flag; synthetic AppleEvent-routed
271
+ clicks are rejected.
272
+ - `CGEvent.post(tap: .cghidEventTap)` while the menu is open —
273
+ Chrome's event loop is occupied processing the menu; the event
274
+ either races or Chrome treats it as untrusted for this toggle.
275
+
276
+ Additionally, when Chrome is **backgrounded**, the Developer submenu
277
+ items appear with `AXEnabled = false` (Chrome's `commandDispatch`
278
+ marks them DISABLED) — any action dispatched returns `.success` at
279
+ the AX layer but is silently discarded. This is the root cause of the
280
+ "ghost click" pattern: the driver reports ✅ but nothing changes.
281
+
282
+ ### Correct path — write the Preferences JSON directly
283
+
284
+ Quit Chrome first, then write the flag, then relaunch:
285
+
286
+ ```bash
287
+ # 1. Quit Chrome
288
+ osascript -e 'quit app "Google Chrome"'
289
+ sleep 1
290
+
291
+ # 2. Write the flag into the active profile's Preferences.
292
+ # Chrome stores this in TWO places — both must be set.
293
+ python3 -c "
294
+ import json, os
295
+ prefs_path = os.path.expanduser(
296
+ '~/Library/Application Support/Google/Chrome/Default/Preferences')
297
+ data = json.load(open(prefs_path))
298
+ data.setdefault('browser', {})['allow_javascript_apple_events'] = True
299
+ data.setdefault('account_values', {}).setdefault('browser', {})['allow_javascript_apple_events'] = True
300
+ json.dump(data, open(prefs_path, 'w'))
301
+ print('allow_javascript_apple_events enabled in Default profile')
302
+ "
303
+
304
+ # 3. Relaunch Chrome and wait for sync to stabilise.
305
+ # Chrome sync fires ~1-2 s after launch and may briefly pull
306
+ # an older value from the server before Chrome pushes our local
307
+ # True back. Either test before sync fires (<1 s) or after it
308
+ # settles (>4 s). Waiting exactly ~2 s lands in the race window
309
+ # and is the most likely way to see a false negative.
310
+ open -a "Google Chrome"
311
+ sleep 5
312
+
313
+ # 4. Verify
314
+ osascript -e 'tell application "Google Chrome"
315
+ tell active tab of front window
316
+ execute javascript "1+1"
317
+ end tell
318
+ end tell'
319
+ # → 2
320
+ ```
321
+
322
+ **Which profile?** Chrome writes to whichever profile is active.
323
+ If you're unsure, write to all non-system profiles:
324
+
325
+ ```bash
326
+ python3 -c "
327
+ import json, glob, os
328
+ for p in glob.glob(os.path.expanduser(
329
+ '~/Library/Application Support/Google/Chrome/*/Preferences')):
330
+ profile = p.split('/')[-2]
331
+ if 'System' in profile or 'Guest' in profile:
332
+ continue
333
+ try:
334
+ data = json.load(open(p))
335
+ data.setdefault('browser', {})['allow_javascript_apple_events'] = True
336
+ data.setdefault('account_values', {}).setdefault('browser', {})['allow_javascript_apple_events'] = True
337
+ json.dump(data, open(p, 'w'))
338
+ print(f'wrote to {profile}')
339
+ except Exception as e:
340
+ print(f'skipped {profile}: {e}')
341
+ "
342
+ ```
343
+
344
+ Chrome overwrites its Preferences file on every clean exit, so the
345
+ write must happen while Chrome is **not running** — otherwise Chrome
346
+ will stomp the change when it quits.
347
+
348
+ **Sync note (Chrome only):** Chrome syncs `browser.allow_javascript_apple_events`
349
+ via Google account (confirmed in `chrome_syncable_prefs_database.cc`).
350
+ Writing both `browser` and `account_values.browser` to the local file
351
+ causes Chrome to push `true` to the sync server on next launch,
352
+ making the change durable. Brave and Edge use their own sync systems
353
+ and likely do NOT sync this Mac-only pref — treat as local-only for
354
+ those browsers.
355
+
356
+ ### Brave
357
+
358
+ Same Chromium pref key, different profile directory:
359
+
360
+ ```bash
361
+ osascript -e 'quit app "Brave Browser"' && sleep 1
362
+ python3 -c "
363
+ import json, glob, os
364
+ for p in glob.glob(os.path.expanduser(
365
+ '~/Library/Application Support/BraveSoftware/Brave-Browser/*/Preferences')):
366
+ profile = p.split('/')[-2]
367
+ if 'System' in profile or 'Guest' in profile:
368
+ continue
369
+ try:
370
+ data = json.load(open(p))
371
+ data.setdefault('browser', {})['allow_javascript_apple_events'] = True
372
+ json.dump(data, open(p, 'w'))
373
+ print(f'wrote to {profile}')
374
+ except Exception as e:
375
+ print(f'skipped {profile}: {e}')
376
+ "
377
+ open -a "Brave Browser" && sleep 5
378
+ ```
379
+
380
+ ### Edge
381
+
382
+ Same Chromium pref key, different profile directory:
383
+
384
+ ```bash
385
+ osascript -e 'quit app "Microsoft Edge"' && sleep 1
386
+ python3 -c "
387
+ import json, glob, os
388
+ for p in glob.glob(os.path.expanduser(
389
+ '~/Library/Application Support/Microsoft Edge/*/Preferences')):
390
+ profile = p.split('/')[-2]
391
+ if 'System' in profile or 'Guest' in profile:
392
+ continue
393
+ try:
394
+ data = json.load(open(p))
395
+ data.setdefault('browser', {})['allow_javascript_apple_events'] = True
396
+ json.dump(data, open(p, 'w'))
397
+ print(f'wrote to {profile}')
398
+ except Exception as e:
399
+ print(f'skipped {profile}: {e}')
400
+ "
401
+ open -a "Microsoft Edge" && sleep 5
402
+ ```
403
+
404
+ ### Safari
405
+
406
+ Safari uses `do JavaScript "..." in document 1` (different AppleScript
407
+ verb from Chrome's `execute javascript`). The setting is under
408
+ `Develop → Allow JavaScript from Apple Events` (requires the Develop
409
+ menu to be enabled first via Settings → Advanced → "Show features for
410
+ web developers").
411
+
412
+ `defaults write -app Safari AllowJavaScriptFromAppleEvents 1` **does
413
+ not work** — Safari ignores the defaults key and routes this toggle
414
+ through macOS's security framework, which shows a password-confirmation
415
+ dialog. The only working programmatic path is UI automation:
416
+
417
+ ```applescript
418
+ -- Requires Accessibility permission for the calling process.
419
+ -- Safari must already be running with the Develop menu visible.
420
+ tell application "Safari" to activate
421
+ delay 0.3
422
+ tell application "System Events"
423
+ tell process "Safari"
424
+ click menu item "Allow JavaScript from Apple Events" of menu 1 ¬
425
+ of menu bar item "Develop" of menu bar 1
426
+ delay 0.3
427
+ -- Safari shows a confirmation dialog — click Allow
428
+ click button "Allow" of window 1
429
+ end tell
430
+ end tell
431
+ ```
432
+
433
+ ### Arc
434
+
435
+ Arc has an AppleScript dictionary and accepts `execute javascript`, but
436
+ **never returns a value** — the call always returns `missing value`.
437
+ There is no "Allow JavaScript from Apple Events" toggle. The only
438
+ workaround is to write results to the clipboard inside the JS and read
439
+ it back:
440
+
441
+ ```applescript
442
+ tell application "Arc"
443
+ tell active tab of front window
444
+ execute javascript "navigator.clipboard.writeText(document.title)"
445
+ end tell
446
+ end tell
447
+ delay 0.3
448
+ set theTitle to the clipboard
449
+ ```
450
+
451
+ Arc's AppleScript JS support is confirmed broken for return values
452
+ (as of 2025). If you need JS results from Arc, use a WebExtension
453
+ with Native Messaging instead.
454
+
455
+ ### Firefox
456
+
457
+ Firefox has no `execute javascript` capability. Bugzilla #287447
458
+ (filed 2004) tracks this and remains unresolved. Use WebDriver /
459
+ Playwright / a WebExtension with Native Messaging for Firefox.
460
+
461
+ ## Typing into a web input
462
+
463
+ ```
464
+ type_text({pid, window_id, element_index: <input_field>, text: "…"})
465
+ ```
466
+
467
+ If it silently drops (some web inputs don't implement
468
+ `AXSelectedText`), `type_text` automatically falls back to CGEvent
469
+ synthesis — pure CGEvent keystrokes delivered to the pid, reaching
470
+ any focused keyboard receiver. You can also click the field first
471
+ to ensure focus before typing.
@@ -0,0 +1,172 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
3
+ <plist version="1.0">
4
+ <dict>
5
+ <key>files</key>
6
+ <dict>
7
+ <key>Resources/Skills/cua-driver/README.md</key>
8
+ <data>
9
+ gykz5FIHurE9/pRn6P944NB1SNA=
10
+ </data>
11
+ <key>Resources/Skills/cua-driver/RECORDING.md</key>
12
+ <data>
13
+ 5NRFllA3teqQ+mxW9O39/oCjwjc=
14
+ </data>
15
+ <key>Resources/Skills/cua-driver/SKILL.md</key>
16
+ <data>
17
+ f8U4M3l1xHf1BPq+CYfG2CqlxAE=
18
+ </data>
19
+ <key>Resources/Skills/cua-driver/TESTS.md</key>
20
+ <data>
21
+ XR9Xlg8xq0vob2XCbXhYP6+YL/w=
22
+ </data>
23
+ <key>Resources/Skills/cua-driver/WEB_APPS.md</key>
24
+ <data>
25
+ I7JWlQpLgUTEsEtIOBO4AWpD1nE=
26
+ </data>
27
+ </dict>
28
+ <key>files2</key>
29
+ <dict>
30
+ <key>Resources/Skills/cua-driver/README.md</key>
31
+ <dict>
32
+ <key>hash2</key>
33
+ <data>
34
+ Bh/rFSgIZEzMhYHfcqqSl0gaVpvHR19SkfQ64jbJlx8=
35
+ </data>
36
+ </dict>
37
+ <key>Resources/Skills/cua-driver/RECORDING.md</key>
38
+ <dict>
39
+ <key>hash2</key>
40
+ <data>
41
+ GfsfQshLjAljMozOHd2pTAs9/3SCAfdAHWJQ59c0NyY=
42
+ </data>
43
+ </dict>
44
+ <key>Resources/Skills/cua-driver/SKILL.md</key>
45
+ <dict>
46
+ <key>hash2</key>
47
+ <data>
48
+ XA5GxijQLmpFlvutU/8iNYINIXVPT5ZFSpAOkNf5YSU=
49
+ </data>
50
+ </dict>
51
+ <key>Resources/Skills/cua-driver/TESTS.md</key>
52
+ <dict>
53
+ <key>hash2</key>
54
+ <data>
55
+ vf/AT6pqsR51D+LzqWXBSiUedHsQQcaxzdC58LlbOxI=
56
+ </data>
57
+ </dict>
58
+ <key>Resources/Skills/cua-driver/WEB_APPS.md</key>
59
+ <dict>
60
+ <key>hash2</key>
61
+ <data>
62
+ bo1d4ZlCBLUHoX7ZuKicSJeDNAPlriEvDV19kJ+z9CE=
63
+ </data>
64
+ </dict>
65
+ </dict>
66
+ <key>rules</key>
67
+ <dict>
68
+ <key>^Resources/</key>
69
+ <true/>
70
+ <key>^Resources/.*\.lproj/</key>
71
+ <dict>
72
+ <key>optional</key>
73
+ <true/>
74
+ <key>weight</key>
75
+ <real>1000</real>
76
+ </dict>
77
+ <key>^Resources/.*\.lproj/locversion.plist$</key>
78
+ <dict>
79
+ <key>omit</key>
80
+ <true/>
81
+ <key>weight</key>
82
+ <real>1100</real>
83
+ </dict>
84
+ <key>^Resources/Base\.lproj/</key>
85
+ <dict>
86
+ <key>weight</key>
87
+ <real>1010</real>
88
+ </dict>
89
+ <key>^version.plist$</key>
90
+ <true/>
91
+ </dict>
92
+ <key>rules2</key>
93
+ <dict>
94
+ <key>.*\.dSYM($|/)</key>
95
+ <dict>
96
+ <key>weight</key>
97
+ <real>11</real>
98
+ </dict>
99
+ <key>^(.*/)?\.DS_Store$</key>
100
+ <dict>
101
+ <key>omit</key>
102
+ <true/>
103
+ <key>weight</key>
104
+ <real>2000</real>
105
+ </dict>
106
+ <key>^(Frameworks|SharedFrameworks|PlugIns|Plug-ins|XPCServices|Helpers|MacOS|Library/(Automator|Spotlight|LoginItems))/</key>
107
+ <dict>
108
+ <key>nested</key>
109
+ <true/>
110
+ <key>weight</key>
111
+ <real>10</real>
112
+ </dict>
113
+ <key>^.*</key>
114
+ <true/>
115
+ <key>^Info\.plist$</key>
116
+ <dict>
117
+ <key>omit</key>
118
+ <true/>
119
+ <key>weight</key>
120
+ <real>20</real>
121
+ </dict>
122
+ <key>^PkgInfo$</key>
123
+ <dict>
124
+ <key>omit</key>
125
+ <true/>
126
+ <key>weight</key>
127
+ <real>20</real>
128
+ </dict>
129
+ <key>^Resources/</key>
130
+ <dict>
131
+ <key>weight</key>
132
+ <real>20</real>
133
+ </dict>
134
+ <key>^Resources/.*\.lproj/</key>
135
+ <dict>
136
+ <key>optional</key>
137
+ <true/>
138
+ <key>weight</key>
139
+ <real>1000</real>
140
+ </dict>
141
+ <key>^Resources/.*\.lproj/locversion.plist$</key>
142
+ <dict>
143
+ <key>omit</key>
144
+ <true/>
145
+ <key>weight</key>
146
+ <real>1100</real>
147
+ </dict>
148
+ <key>^Resources/Base\.lproj/</key>
149
+ <dict>
150
+ <key>weight</key>
151
+ <real>1010</real>
152
+ </dict>
153
+ <key>^[^/]+$</key>
154
+ <dict>
155
+ <key>nested</key>
156
+ <true/>
157
+ <key>weight</key>
158
+ <real>10</real>
159
+ </dict>
160
+ <key>^embedded\.provisionprofile$</key>
161
+ <dict>
162
+ <key>weight</key>
163
+ <real>20</real>
164
+ </dict>
165
+ <key>^version\.plist$</key>
166
+ <dict>
167
+ <key>weight</key>
168
+ <real>20</real>
169
+ </dict>
170
+ </dict>
171
+ </dict>
172
+ </plist>
@@ -0,0 +1,2 @@
1
+ cua-driver-rs-v0.2.18
2
+ rust
Binary file
@@ -0,0 +1,2 @@
1
+ cua-driver-rs-v0.2.18
2
+ rust
Binary file
@@ -0,0 +1,2 @@
1
+ cua-driver-rs-v0.2.18
2
+ rust
Binary file
Binary file