agent-device 0.8.6 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/README.md +106 -22
  2. package/dist/src/224.js +2 -2
  3. package/dist/src/bin.js +65 -58
  4. package/dist/src/client-normalizers.d.ts +1 -2
  5. package/dist/src/client-shared.d.ts +1 -2
  6. package/dist/src/client-types.d.ts +0 -19
  7. package/dist/src/client.d.ts +1 -1
  8. package/dist/src/core/capabilities.d.ts +1 -1
  9. package/dist/src/core/click-button.d.ts +20 -0
  10. package/dist/src/core/dispatch-resolve.d.ts +7 -6
  11. package/dist/src/core/dispatch.d.ts +1 -0
  12. package/dist/src/daemon/context.d.ts +1 -0
  13. package/dist/src/daemon/handlers/interaction-common.d.ts +12 -0
  14. package/dist/src/daemon/handlers/interaction-fill.d.ts +3 -0
  15. package/dist/src/daemon/handlers/interaction-flags.d.ts +4 -0
  16. package/dist/src/daemon/handlers/interaction-get.d.ts +3 -0
  17. package/dist/src/daemon/handlers/interaction-is.d.ts +3 -0
  18. package/dist/src/daemon/handlers/interaction-press.d.ts +3 -0
  19. package/dist/src/daemon/handlers/interaction-scroll.d.ts +3 -0
  20. package/dist/src/daemon/handlers/interaction-selector.d.ts +27 -0
  21. package/dist/src/daemon/handlers/interaction-snapshot.d.ts +8 -0
  22. package/dist/src/daemon/handlers/interaction-targeting.d.ts +28 -0
  23. package/dist/src/daemon/handlers/interaction.d.ts +5 -12
  24. package/dist/src/daemon/handlers/session-device-utils.d.ts +1 -0
  25. package/dist/src/daemon/handlers/session-runtime.d.ts +3 -8
  26. package/dist/src/daemon/handlers/session.d.ts +8 -0
  27. package/dist/src/daemon/handlers/snapshot-alert.d.ts +13 -0
  28. package/dist/src/daemon/handlers/snapshot-capture.d.ts +27 -0
  29. package/dist/src/daemon/handlers/snapshot-session.d.ts +15 -0
  30. package/dist/src/daemon/handlers/snapshot-settings.d.ts +24 -0
  31. package/dist/src/daemon/handlers/snapshot-wait.d.ts +37 -0
  32. package/dist/src/daemon/handlers/snapshot.d.ts +4 -20
  33. package/dist/src/daemon/is-predicates.d.ts +2 -1
  34. package/dist/src/daemon/script-utils.d.ts +14 -2
  35. package/dist/src/daemon/selectors-build.d.ts +2 -1
  36. package/dist/src/daemon/selectors-match.d.ts +3 -2
  37. package/dist/src/daemon/selectors-resolve.d.ts +3 -2
  38. package/dist/src/daemon/session-open-script.d.ts +7 -0
  39. package/dist/src/daemon/session-store.d.ts +1 -0
  40. package/dist/src/daemon/snapshot-processing.d.ts +2 -1
  41. package/dist/src/daemon/types.d.ts +6 -5
  42. package/dist/src/daemon.js +35 -34
  43. package/dist/src/index.d.ts +1 -1
  44. package/dist/src/platforms/android/devices.d.ts +4 -0
  45. package/dist/src/platforms/android/sdk.d.ts +2 -0
  46. package/dist/src/platforms/ios/app-filter.d.ts +2 -0
  47. package/dist/src/platforms/ios/devices.d.ts +2 -1
  48. package/dist/src/platforms/ios/macos-apps.d.ts +12 -0
  49. package/dist/src/platforms/ios/runner-client.d.ts +3 -1
  50. package/dist/src/platforms/ios/runner-macos-products.d.ts +3 -0
  51. package/dist/src/platforms/ios/runner-xctestrun-products.d.ts +2 -0
  52. package/dist/src/platforms/ios/runner-xctestrun.d.ts +20 -2
  53. package/dist/src/utils/args.d.ts +1 -1
  54. package/dist/src/utils/cli-config.d.ts +2 -1
  55. package/dist/src/utils/command-schema.d.ts +7 -3
  56. package/dist/src/utils/device.d.ts +13 -5
  57. package/dist/src/utils/remote-config.d.ts +15 -0
  58. package/dist/src/utils/remote-open.d.ts +9 -0
  59. package/ios-runner/AgentDeviceRunner/AgentDeviceRunner.xcodeproj/project.pbxproj +58 -50
  60. package/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/AgentDeviceRunnerUITests.entitlements +10 -0
  61. package/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+CommandExecution.swift +35 -1
  62. package/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Interaction.swift +83 -9
  63. package/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Lifecycle.swift +39 -7
  64. package/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Models.swift +2 -0
  65. package/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+ScreenRecorder.swift +5 -6
  66. package/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Snapshot.swift +132 -112
  67. package/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+SystemModal.swift +4 -0
  68. package/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests.swift +22 -5
  69. package/package.json +3 -2
  70. package/skills/agent-device/SKILL.md +28 -9
  71. package/skills/agent-device/references/macos-desktop.md +89 -0
  72. package/skills/agent-device/references/snapshot-refs.md +11 -2
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agent-device",
3
- "version": "0.8.6",
3
+ "version": "0.10.0",
4
4
  "description": "Unified control plane for physical and virtual devices via an agent-driven CLI.",
5
5
  "license": "MIT",
6
6
  "author": "Callstack",
@@ -23,8 +23,9 @@
23
23
  "build": "rslib build",
24
24
  "clean:daemon": "rm -f ~/.agent-device/daemon.json && rm -f ~/.agent-device/daemon.lock",
25
25
  "build:node": "pnpm build && pnpm clean:daemon",
26
- "build:xcuitest": "pnpm build:xcuitest:ios",
26
+ "build:xcuitest": "pnpm build:xcuitest:ios && pnpm build:xcuitest:macos",
27
27
  "build:xcuitest:ios": "rm -rf ~/.agent-device/ios-runner/derived/device && xcodebuild build-for-testing -project ios-runner/AgentDeviceRunner/AgentDeviceRunner.xcodeproj -scheme AgentDeviceRunner -destination \"generic/platform=iOS Simulator\" -derivedDataPath ~/.agent-device/ios-runner/derived",
28
+ "build:xcuitest:macos": "rm -rf ~/.agent-device/ios-runner/derived/macos && xcodebuild build-for-testing -project ios-runner/AgentDeviceRunner/AgentDeviceRunner.xcodeproj -scheme AgentDeviceRunner -destination \"platform=macOS,arch=$(uname -m)\" -derivedDataPath ~/.agent-device/ios-runner/derived/macos CODE_SIGNING_ALLOWED=NO CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY=\"\" DEVELOPMENT_TEAM=\"\" COMPILER_INDEX_STORE_ENABLE=NO ENABLE_CODE_COVERAGE=NO",
28
29
  "build:xcuitest:tvos": "rm -rf ~/.agent-device/ios-runner/derived/tvos && xcodebuild build-for-testing -project ios-runner/AgentDeviceRunner/AgentDeviceRunner.xcodeproj -scheme AgentDeviceRunner -destination \"generic/platform=tvOS Simulator\" -derivedDataPath ~/.agent-device/ios-runner/derived/tvos",
29
30
  "build:all": "pnpm build:node && pnpm build:xcuitest",
30
31
  "ad": "node bin/agent-device.mjs",
@@ -1,9 +1,9 @@
1
1
  ---
2
2
  name: agent-device
3
- description: Automates interactions for iOS simulators/devices and Android emulators/devices. Use when navigating apps, taking snapshots/screenshots, tapping, typing, scrolling, or extracting UI info on mobile targets.
3
+ description: Automates interactions for Apple-platform apps (iOS, tvOS, macOS) and Android devices. Use when navigating apps, taking snapshots/screenshots, tapping, typing, scrolling, or extracting UI info across mobile, TV, and desktop targets.
4
4
  ---
5
5
 
6
- # Mobile Automation with agent-device
6
+ # Apple and Android Automation with agent-device
7
7
 
8
8
  For exploration, use snapshot refs. For deterministic replay, use selectors.
9
9
  For structured exploratory QA bug hunts and reporting, use [../dogfood/SKILL.md](../dogfood/SKILL.md).
@@ -22,18 +22,20 @@ Use this skill as a router, not a full manual.
22
22
  ## Decision Map
23
23
 
24
24
  - No target context yet: `devices` -> pick target -> `open`.
25
- - Normal UI task: `open` -> `snapshot -i` -> `press/fill` -> `diff snapshot -i` -> `close`
26
- - Debug/crash: `open <app>` -> `logs clear --restart` -> reproduce -> `network dump` -> `logs path` -> targeted `grep`
25
+ - Normal UI task: `open` -> `snapshot -i` -> `press/click/fill` -> `diff snapshot -i` -> `close`
26
+ - Debug/crash (iOS/Android): `open <app>` -> `logs clear --restart` -> reproduce -> `network dump` -> `logs path` -> targeted `grep`
27
27
  - Replay drift: `replay -u <path>` -> verify updated selectors
28
28
  - Remote multi-tenant run: allocate lease -> point client at remote daemon base URL -> run commands with tenant isolation flags -> heartbeat/release lease
29
29
  - Device-scope isolation run: set iOS simulator set / Android allowlist -> run selectors within scope only
30
+ - macOS desktop task: run the macOS desktop flow, then open [references/macos-desktop.md](references/macos-desktop.md) if context menus, Finder rows, or desktop-specific snapshot behavior matters
30
31
 
31
32
  ## Target Selection Rules
32
33
 
33
34
  - iOS local QA: use simulators unless the task explicitly requires a physical device.
34
35
  - iOS local QA in mixed simulator/device environments: run `ensure-simulator` first and pass `--device`, `--udid`, or `--ios-simulator-device-set` on later commands.
36
+ - macOS desktop app automation: use `--platform macos`, or `--platform apple --target desktop` when the caller wants one Apple-family selector path.
35
37
  - Android local QA: use `install` or `reinstall` for `.apk`/`.aab` files, then relaunch by installed package name.
36
- - Android React Native + Metro flows: set runtime hints with `runtime set` before `open <package> --relaunch`.
38
+ - Android React Native + Metro flows: prefer `open <package> --remote-config <path> --relaunch`.
37
39
  - In mixed-device environments, always pin the exact target with `--serial`, `--device`, `--udid`, or an isolation scope.
38
40
  - For session-bound automation runs, prefer a pre-bound session/platform instead of repeating selectors on every command: set `AGENT_DEVICE_SESSION`, set `AGENT_DEVICE_PLATFORM`, and the daemon will enforce the shared lock policy across CLI, typed client, and RPC entry points.
39
41
  - Use `--session-lock reject|strip` (or `AGENT_DEVICE_SESSION_LOCK`) only when you need to override the default reject behavior. Lock mode applies to nested `batch` steps too.
@@ -67,8 +69,7 @@ Use this when a physical iPhone is also connected and you want deterministic sim
67
69
 
68
70
  ```bash
69
71
  agent-device reinstall MyApp /path/to/app-debug.apk --platform android --serial emulator-5554
70
- agent-device runtime set --session qa-android --platform android --metro-host 10.0.2.2 --metro-port 8081
71
- agent-device open com.example.myapp --platform android --serial emulator-5554 --session qa-android --relaunch
72
+ agent-device open com.example.myapp --remote-config ./agent-device.remote.json --relaunch
72
73
  agent-device snapshot -i
73
74
  agent-device close
74
75
  ```
@@ -104,6 +105,20 @@ agent-device close --shutdown
104
105
 
105
106
  Use this when an Android emulator session must stay pinned while an agent or test runner issues plain CLI commands over time.
106
107
 
108
+ ### 1e) macOS Desktop Flow
109
+
110
+ ```bash
111
+ agent-device open TextEdit --platform macos
112
+ agent-device snapshot -i
113
+ agent-device fill @e3 "desktop smoke test"
114
+ agent-device screenshot /tmp/macos-textedit.png
115
+ agent-device close
116
+ ```
117
+
118
+ Use this for host Mac desktop apps. Prefer the Apple runner interaction flow (`open`, `snapshot`, `press`, `click`, `fill`, `scroll`, `back`, `record`, `screenshot`). macOS also supports `clipboard read|write`, `trigger-app-event` when a desktop deep-link template is configured, and only `settings appearance light|dark|toggle` under the `settings` command. Do not rely on mobile-only helpers like `install`, `push`, `logs`, or `network` on macOS.
119
+ Prefer selectors or snapshot refs (`@e...`) over raw x/y commands on macOS because the window origin can move between runs.
120
+ Open [references/macos-desktop.md](references/macos-desktop.md) when you need Finder-style list traversal, context-menu flows, or macOS-specific snapshot expectations.
121
+
107
122
  ### 2) Debug/Crash Flow
108
123
 
109
124
  ```bash
@@ -186,7 +201,7 @@ That includes bound-session defaults such as `sessionLock` / `AGENT_DEVICE_SESSI
186
201
  For Android emulators by AVD name, use `boot --platform android --device <avd-name>`.
187
202
  For Android emulators without GUI, add `--headless`.
188
203
  Use `--target mobile|tv` with `--platform` (required) to pick phone/tablet vs TV targets (AndroidTV/tvOS).
189
- For Android React Native + Metro flows, install or reinstall the APK first, set runtime hints with `runtime set`, then use `open <package> --relaunch`; do not use `open <apk|aab> --relaunch`.
204
+ For Android React Native + Metro flows, install or reinstall the APK first, then use `open <package> --remote-config <path> --relaunch`; do not use `open <apk|aab> --relaunch`.
190
205
  For local iOS QA in mixed simulator/device environments, use `ensure-simulator` and pass `--device` or `--udid` so automation does not attach to a physical device by accident.
191
206
  For session-bound automation, prefer `AGENT_DEVICE_SESSION` + `AGENT_DEVICE_PLATFORM`; that bound-session default now enables lock mode automatically.
192
207
 
@@ -225,6 +240,8 @@ agent-device is visible 'id="anchor"'
225
240
  ```
226
241
 
227
242
  `press` is canonical tap command; `click` is an alias.
243
+ On macOS, use `click --button secondary <@ref|selector>` to open a context menu before the next `snapshot -i`.
244
+ For desktop-specific heuristics and Finder guidance, see [references/macos-desktop.md](references/macos-desktop.md).
228
245
 
229
246
  ### Utilities
230
247
 
@@ -271,7 +288,7 @@ agent-device batch --steps-file /tmp/batch-steps.json --json
271
288
  - iOS `.ipa`: extract/install from `Payload/*.app`; when multiple app bundles are present, `<app>` is used as a bundle id/name hint.
272
289
  - iOS `appstate` is session-scoped; Android `appstate` is live foreground state. iOS responses include `device_udid` and `ios_simulator_device_set` for isolation verification.
273
290
  - iOS `open` responses include `device_udid` and `ios_simulator_device_set` to confirm which simulator handled the session.
274
- - Clipboard helpers: `clipboard read` / `clipboard write <text>` are supported on Android and iOS simulators; iOS physical devices are not supported yet.
291
+ - Clipboard helpers: `clipboard read` / `clipboard write <text>` are supported on macOS, Android, and iOS simulators; iOS physical devices are not supported yet.
275
292
  - Android keyboard helpers: `keyboard status|get|dismiss` report keyboard visibility/type and dismiss via keyevent when visible.
276
293
  - `network dump` is best-effort and parses HTTP(s) entries from the session app log file.
277
294
  - Biometric settings: iOS simulator supports `settings faceid|touchid <match|nonmatch|enroll|unenroll>`; Android supports `settings fingerprint <match|nonmatch>` where runtime tooling is available.
@@ -280,6 +297,7 @@ agent-device batch --steps-file /tmp/batch-steps.json --json
280
297
  - iOS simulator uses APNs-style payload JSON.
281
298
  - Android uses broadcast action + typed extras (string/boolean/number).
282
299
  - `trigger-app-event` requires app-defined deep-link hooks and URL template configuration (`AGENT_DEVICE_APP_EVENT_URL_TEMPLATE` or platform-specific variants).
300
+ - On macOS, set `AGENT_DEVICE_MACOS_APP_EVENT_URL_TEMPLATE` when the desktop app uses a different deep-link template than iOS/Android.
283
301
  - `trigger-app-event` requires an active session or explicit selectors (`--platform`, `--device`, `--udid`, `--serial`); on iOS physical devices, custom-scheme triggers require active app context.
284
302
  - Canonical trigger behavior and caveats are documented in [`website/docs/docs/commands.md`](../../website/docs/docs/commands.md) under **App event triggers**.
285
303
  - Permission settings are app-scoped and require an active session app:
@@ -319,6 +337,7 @@ agent-device batch --steps-file /tmp/batch-steps.json --json
319
337
  ## References
320
338
 
321
339
  - [references/snapshot-refs.md](references/snapshot-refs.md)
340
+ - [references/macos-desktop.md](references/macos-desktop.md)
322
341
  - [references/logs-and-debug.md](references/logs-and-debug.md)
323
342
  - [references/session-management.md](references/session-management.md)
324
343
  - [references/permissions.md](references/permissions.md)
@@ -0,0 +1,89 @@
1
+ # macOS Desktop Automation
2
+
3
+ Use this reference for host Mac apps such as Finder, TextEdit, System Settings, Preview, or browser apps running as normal desktop windows.
4
+
5
+ ## Mental model
6
+
7
+ - `snapshot -i` should describe UI that is visible to a human in the current front window.
8
+ - Context menus are not ambient UI. Open them explicitly with `click --button secondary`, then re-snapshot.
9
+ - Prefer refs for exploration and selectors for deterministic replay/assertions.
10
+ - Avoid raw `x y` coordinates unless refs/selectors are impossible.
11
+
12
+ ## Canonical flow
13
+
14
+ ```bash
15
+ agent-device open Finder --platform macos
16
+ agent-device snapshot -i
17
+ agent-device click @e66 --button secondary --platform macos
18
+ agent-device snapshot -i
19
+ agent-device close
20
+ ```
21
+
22
+ ## What to expect from snapshots
23
+
24
+ - `snapshot -i` prioritizes visible window content over dormant menu infrastructure.
25
+ - File rows, sidebar items, toolbar controls, search fields, and visible context menus should appear.
26
+ - Finder and other native apps may expose duplicate-looking structures such as row wrapper nodes, `cell` nodes, and child `text` or `text-field` nodes.
27
+ - Treat those as distinct AX nodes unless you have a stronger selector anchor.
28
+
29
+ ## Context menus
30
+
31
+ Use secondary click when the app exposes actions only through the contextual menu.
32
+
33
+ ```bash
34
+ agent-device click @e66 --button secondary --platform macos
35
+ agent-device snapshot -i
36
+ ```
37
+
38
+ Expected pattern:
39
+
40
+ 1. Snapshot visible content.
41
+ 2. Secondary-click the target row/item.
42
+ 3. Snapshot again.
43
+ 4. Interact with newly visible `menu-item` nodes.
44
+
45
+ Do not expect context-menu items to appear before the menu is opened.
46
+
47
+ ## Finder-specific guidance
48
+
49
+ - `snapshot -i` should still expose visible folder rows even when nothing is selected.
50
+ - Unselected folder contents should still be visible in `snapshot -i` through list/table rows.
51
+ - A file row may expose multiple nodes with the same label, including a row container, name cell, and child text/text-field.
52
+ - For opening a context menu, prefer the outer visible row/cell ref over a nested text child if both exist.
53
+ - After secondary click, expect actions such as `Rename`, `Quick Look`, `Copy`, `Compress`, and tag-related items in the next snapshot.
54
+
55
+ ## Raw snapshots
56
+
57
+ Use `snapshot --raw` only when debugging AX structure or collector issues.
58
+
59
+ ```bash
60
+ agent-device snapshot --raw --platform macos
61
+ ```
62
+
63
+ - Raw output is larger and less token-efficient.
64
+ - It is useful for verifying whether missing UI is absent from the AX tree or only filtered from interactive output.
65
+ - Do not use raw output as the default agent loop when `snapshot -i` already shows the visible window content you need.
66
+
67
+ ## Selector guidance
68
+
69
+ Good macOS selectors usually anchor on one of:
70
+
71
+ - `label="Downloads"`
72
+ - `label="failed-step.json"`
73
+ - `role=button label="Search"`
74
+ - `role=menu-item label="Rename"`
75
+
76
+ Prefer exact labels when the desktop UI is stable. Use `id=...` when the AX identifier is clearly app-owned and not a framework-generated `_NS:*` value.
77
+
78
+ ## Things not to rely on
79
+
80
+ - Mobile-only helpers like `install`, `reinstall`, `push`, `logs`, `network`, or generic `alert`
81
+ - Long-press as a substitute for right-click
82
+ - Raw coordinate assumptions across runs; macOS windows can move
83
+ - Framework-generated `_NS:*` identifiers as stable selectors
84
+
85
+ ## Troubleshooting
86
+
87
+ - If visible window content is missing from `snapshot -i`, re-snapshot once after the UI settles.
88
+ - If the wrong menu opened or no menu appeared, retry secondary-clicking the row/cell wrapper instead of the nested text node.
89
+ - If the app has multiple windows, ensure the correct one is frontmost before relying on refs.
@@ -1,9 +1,10 @@
1
- # Snapshot Refs and Selectors (Mobile)
1
+ # Snapshot Refs and Selectors
2
2
 
3
3
  ## Purpose
4
4
 
5
5
  Refs are useful for discovery/debugging. For deterministic scripts, use selectors.
6
6
  For tap interactions, `press` is canonical; `click` is an equivalent alias.
7
+ For host Mac desktop apps, pair this reference with [macos-desktop.md](macos-desktop.md) because context menus and native list/table structures need desktop-specific handling.
7
8
 
8
9
  ## Snapshot
9
10
 
@@ -29,6 +30,13 @@ agent-device press @e2
29
30
  agent-device fill @e5 "test"
30
31
  ```
31
32
 
33
+ On macOS, if actions live in a context menu, use:
34
+
35
+ ```bash
36
+ agent-device click @e5 --button secondary --platform macos
37
+ agent-device snapshot -i
38
+ ```
39
+
32
40
  ## Using selectors (deterministic)
33
41
 
34
42
  ```bash
@@ -71,7 +79,8 @@ Efficient pattern:
71
79
  ## Troubleshooting
72
80
 
73
81
  - Ref not found: re-snapshot.
74
- - If XCTest returns 0 nodes, foreground app state may have changed. Re-open the app or retry after state is stable.
82
+ - If `snapshot` returns 0 nodes, foreground app state or accessibility state may have changed. Re-open the app or retry after state is stable.
83
+ - On macOS, use `snapshot --raw --platform macos` to distinguish collector filtering from truly missing AX content.
75
84
 
76
85
  ## Stop Conditions
77
86