agent-device 0.2.3 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/README.md +41 -4
  2. package/dist/src/bin.js +15 -11
  3. package/dist/src/daemon.js +9 -8
  4. package/ios-runner/AgentDeviceRunner/AgentDeviceRunner.xcodeproj/project.pbxproj +2 -0
  5. package/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests.swift +15 -0
  6. package/package.json +3 -2
  7. package/skills/agent-device/SKILL.md +23 -7
  8. package/skills/agent-device/references/session-management.md +9 -0
  9. package/skills/agent-device/references/snapshot-refs.md +18 -5
  10. package/skills/agent-device/references/video-recording.md +2 -2
  11. package/src/cli.ts +6 -0
  12. package/src/core/__tests__/capabilities.test.ts +67 -0
  13. package/src/core/capabilities.ts +49 -0
  14. package/src/core/dispatch.ts +30 -118
  15. package/src/daemon/__tests__/is-predicates.test.ts +68 -0
  16. package/src/daemon/__tests__/selectors.test.ts +128 -0
  17. package/src/daemon/__tests__/session-routing.test.ts +108 -0
  18. package/src/daemon/__tests__/session-selector.test.ts +64 -0
  19. package/src/daemon/__tests__/session-store.test.ts +95 -0
  20. package/src/daemon/__tests__/snapshot-processing.test.ts +47 -0
  21. package/src/daemon/action-utils.ts +29 -0
  22. package/src/daemon/app-state.ts +66 -0
  23. package/src/daemon/context.ts +36 -0
  24. package/src/daemon/device-ready.ts +13 -0
  25. package/src/daemon/handlers/__tests__/find.test.ts +99 -0
  26. package/src/daemon/handlers/__tests__/replay-heal.test.ts +364 -0
  27. package/src/daemon/handlers/__tests__/snapshot.test.ts +128 -0
  28. package/src/daemon/handlers/find.ts +304 -0
  29. package/src/daemon/handlers/interaction.ts +510 -0
  30. package/src/daemon/handlers/parse-utils.ts +8 -0
  31. package/src/daemon/handlers/record-trace.ts +154 -0
  32. package/src/daemon/handlers/session.ts +732 -0
  33. package/src/daemon/handlers/snapshot.ts +396 -0
  34. package/src/daemon/is-predicates.ts +46 -0
  35. package/src/daemon/selectors.ts +423 -0
  36. package/src/daemon/session-routing.ts +22 -0
  37. package/src/daemon/session-selector.ts +39 -0
  38. package/src/daemon/session-store.ts +275 -0
  39. package/src/daemon/snapshot-processing.ts +127 -0
  40. package/src/daemon/types.ts +55 -0
  41. package/src/daemon.ts +66 -1564
  42. package/src/platforms/ios/index.ts +0 -62
  43. package/src/platforms/ios/runner-client.ts +2 -0
  44. package/src/utils/args.ts +20 -12
  45. package/src/utils/interactors.ts +102 -16
  46. package/src/utils/snapshot.ts +1 -0
package/README.md CHANGED
@@ -33,9 +33,11 @@ npx agent-device open SampleApp
33
33
 
34
34
  ## Quick Start
35
35
 
36
+ Use refs for agent-driven exploration and normal automation flows.
37
+
36
38
  ```bash
37
39
  agent-device open Contacts --platform ios # creates session on iOS Simulator
38
- agent-device snapshot
40
+ agent-device snapshot
39
41
  agent-device click @e5
40
42
  agent-device fill @e6 "John"
41
43
  agent-device fill @e7 "Doe"
@@ -75,7 +77,7 @@ Coordinates:
75
77
  ## Command Index
76
78
  - `open`, `close`, `home`, `back`, `app-switcher`
77
79
  - `snapshot`, `find`, `get`
78
- - `click`, `focus`, `type`, `fill`, `press`, `long-press`, `scroll`, `scrollintoview`
80
+ - `click`, `focus`, `type`, `fill`, `press`, `long-press`, `scroll`, `scrollintoview`, `is`
79
81
  - `alert`, `wait`, `screenshot`
80
82
  - `trace start`, `trace stop`
81
83
  - `settings wifi|airplane|location on|off`
@@ -99,7 +101,6 @@ Flags:
99
101
  - `--udid <udid>` (iOS)
100
102
  - `--serial <serial>` (Android)
101
103
  - `--activity <component>` (Android; package/Activity or package/.Activity)
102
- - `--out <path>` (screenshot)
103
104
  - `--session <name>`
104
105
  - `--verbose` for daemon and runner logs
105
106
  - `--json` for structured output
@@ -108,19 +109,55 @@ Flags:
108
109
  ## Skills
109
110
  Install the automation skills listed in [SKILL.md](skills/agent-device/SKILL.md).
110
111
 
112
+ ```bash
113
+ npx skills add https://github.com/callstackincubator/agent-device --skill agent-device
114
+ ```
115
+
111
116
  Sessions:
112
117
  - `open` starts a session. Without args boots/activates the target device/simulator without launching an app.
113
118
  - All interaction commands require an open session.
114
119
  - If a session is already open, `open <app>` switches the active app and updates the session app bundle.
115
120
  - `close` stops the session and releases device resources. Pass an app to close it explicitly, or omit to just close the session.
116
121
  - Use `--session <name>` to manage multiple sessions.
117
- - Session logs are written to `~/.agent-device/sessions/<session>-<timestamp>.ad`.
122
+ - Session scripts are written to `~/.agent-device/sessions/<session>-<timestamp>.ad` when recording is enabled with `--save-script`.
123
+ - Deterministic replay is `.ad`-based; use `replay --update` (`-u`) to update selector drift and rewrite the replay file in place.
118
124
 
119
125
  Find (semantic):
120
126
  - `find <text> <action> [value]` finds by any text (label/value/identifier) using a scoped snapshot.
121
127
  - `find text|label|value|role|id <value> <action> [value]` for specific locators.
122
128
  - Actions: `click` (default), `fill`, `type`, `focus`, `get text`, `get attrs`, `wait [timeout]`, `exists`.
123
129
 
130
+ Assertions:
131
+ - `is` predicates: `visible`, `hidden`, `exists`, `editable`, `selected`, `text`.
132
+ - `is text` uses exact equality.
133
+
134
+ Replay update:
135
+ - `replay <path>` runs deterministic replay from `.ad` scripts.
136
+ - `replay -u <path>` attempts selector updates on failures and atomically rewrites the same file.
137
+ - Refs are the default/core mechanism for interactive agent flows.
138
+ - Update targets: `click`, `fill`, `get`, `is`, `wait`.
139
+ - Selector matching is a replay-update internal: replay parses `.ad` lines into actions, tries them, snapshots on failure, resolves a better selector, then rewrites that failing line.
140
+
141
+ Update examples:
142
+
143
+ ```sh
144
+ # Before (stale selector)
145
+ click "id=\"old_continue\" || label=\"Continue\""
146
+
147
+ # After replay -u (rewritten in place)
148
+ click "id=\"auth_continue\" || label=\"Continue\""
149
+ ```
150
+
151
+ ```sh
152
+ # Before (ref-based action from discovery)
153
+ snapshot -i -c -s "Continue"
154
+ click @e13 "Continue"
155
+
156
+ # After replay -u (upgraded to selector-based action)
157
+ snapshot -i -c -s "Continue"
158
+ click "id=\"auth_continue\" || label=\"Continue\""
159
+ ```
160
+
124
161
  Android fill reliability:
125
162
  - `fill` clears the current value, then enters text.
126
163
  - `type` enters text into the focused field without clearing.
package/dist/src/bin.js CHANGED
@@ -2,7 +2,7 @@ import{node_path as e,fileURLToPath as t,asAppError as r,pathToFileURL as n,runC
2
2
  `)}function d(e){let t=e.details?`
3
3
  ${JSON.stringify(e.details,null,2)}`:"";process.stderr.write(`Error (${e.code}): ${e.message}${t}
4
4
  `)}function u(e,t,r){let n=p(e.type??"Element"),a=function(e,t){var r,n;let a=e.label?.trim(),i=e.value?.trim();if("text-field"===(r=t)||"text-view"===r||"search"===r){if(i)return i;if(a)return a}else if(a)return a;if(i)return i;let s=e.identifier?.trim();return!s||(n=s,/^[\w.]+:id\/[\w.-]+$/i.test(n)&&("group"===t||"image"===t||"list"===t||"collection"===t))?"":s}(e,n),i=" ".repeat(t),s=e.ref?`@${e.ref}`:"",o=[!1===e.enabled?"disabled":null].filter(Boolean).join(", "),l=o?` [${o}]`:"",c=a?` "${a}"`:"";return r?`${i}${s} [${n}]${l}`.trimEnd():`${i}${s} [${n}]${c}${l}`.trimEnd()}function p(e){let t=e.replace(/XCUIElementType/gi,"").toLowerCase(),r=e.includes(".")&&(e.startsWith("android.")||e.startsWith("androidx.")||e.startsWith("com."));switch(t.startsWith("ax")&&(t=t.replace(/^ax/,"")),t.includes(".")&&(t=t.replace(/^android\.widget\./,"").replace(/^android\.view\./,"").replace(/^android\.webkit\./,"").replace(/^androidx\./,"").replace(/^com\.google\.android\./,"").replace(/^com\.android\./,"")),t){case"application":return"application";case"navigationbar":return"navigation-bar";case"tabbar":return"tab-bar";case"button":case"imagebutton":return"button";case"link":return"link";case"cell":return"cell";case"statictext":case"checkedtextview":return"text";case"textfield":case"edittext":return"text-field";case"textview":return r?"text":"text-view";case"textarea":return"text-view";case"switch":return"switch";case"slider":return"slider";case"image":case"imageview":return"image";case"webview":return"webview";case"framelayout":case"linearlayout":case"relativelayout":case"constraintlayout":case"viewgroup":case"view":case"group":return"group";case"listview":case"recyclerview":return"list";case"collectionview":return"collection";case"searchfield":return"search";case"segmentedcontrol":return"segmented-control";case"window":return"window";case"checkbox":return"checkbox";case"radio":return"radio";case"menuitem":return"menu-item";case"toolbar":return"toolbar";case"scrollarea":case"scrollview":case"nestedscrollview":return"scroll-area";case"table":return"table";default:return t||"element"}}let f=e.join(s.homedir(),".agent-device"),m=e.join(f,"daemon.json"),h=function(){let e=process.env.AGENT_DEVICE_DAEMON_TIMEOUT_MS;if(!e)return 6e4;let t=Number(e);return Number.isFinite(t)?Math.max(1e3,Math.floor(t)):6e4}();async function w(e){let t=await y(),r={...e,token:t.token};return await x(t,r)}async function y(){let t=g(),r=function(){try{let t=$();return JSON.parse(i.readFileSync(e.join(t,"package.json"),"utf8")).version??"0.0.0"}catch{return"0.0.0"}}();if(t&&t.version===r&&await v(t))return t;t&&(t.version!==r||!await v(t))&&i.existsSync(m)&&i.unlinkSync(m),await b();let n=Date.now();for(;Date.now()-n<5e3;){let e=g();if(e&&await v(e))return e;await new Promise(e=>setTimeout(e,100))}throw new l("COMMAND_FAILED","Failed to start daemon",{infoPath:m,hint:"Run pnpm build, or delete ~/.agent-device/daemon.json if stale."})}function g(){if(!i.existsSync(m))return null;try{let e=JSON.parse(i.readFileSync(m,"utf8"));if(!e.port||!e.token)return null;return e}catch{return null}}async function v(e){return new Promise(t=>{let r=o.createConnection({host:"127.0.0.1",port:e.port},()=>{r.destroy(),t(!0)});r.on("error",()=>{t(!1)})})}async function b(){let t=$(),r=e.join(t,"dist","src","daemon.js"),n=e.join(t,"src","daemon.ts"),s=i.existsSync(r);if(!s&&!i.existsSync(n))throw new l("COMMAND_FAILED","Daemon entry not found",{distPath:r,srcPath:n});let o=s?[r]:["--experimental-strip-types",n];a(process.execPath,o)}async function x(e,t){return new Promise((r,n)=>{let a=o.createConnection({host:"127.0.0.1",port:e.port},()=>{a.write(`${JSON.stringify(t)}
5
- `)}),i=setTimeout(()=>{a.destroy(),n(new l("COMMAND_FAILED","Daemon request timed out",{timeoutMs:h}))},h),s="";a.setEncoding("utf8"),a.on("data",e=>{let t=(s+=e).indexOf("\n");if(-1===t)return;let o=s.slice(0,t).trim();if(o)try{let e=JSON.parse(o);a.end(),clearTimeout(i),r(e)}catch(e){clearTimeout(i),n(e)}}),a.on("error",e=>{clearTimeout(i),n(e)})})}function $(){let r=e.dirname(t(import.meta.url)),n=r;for(let t=0;t<6;t+=1){let t=e.join(n,"package.json");if(i.existsSync(t))return n;n=e.dirname(n)}return r}async function S(t){let n=function(e){let t={json:!1,help:!1},r=[];for(let n=0;n<e.length;n+=1){let a=e[n];if("--json"===a){t.json=!0;continue}if("--help"===a||"-h"===a){t.help=!0;continue}if("--verbose"===a||"-v"===a){t.verbose=!0;continue}if("-i"===a){t.snapshotInteractiveOnly=!0;continue}if("-c"===a){t.snapshotCompact=!0;continue}if("--raw"===a){t.snapshotRaw=!0;continue}if("--no-record"===a){t.noRecord=!0;continue}if("--record-json"===a){t.recordJson=!0;continue}if("--user-installed"===a){t.appsFilter="user-installed";continue}if("--all"===a){t.appsFilter="all";continue}if("--metadata"===a){t.appsMetadata=!0;continue}if(a.startsWith("--backend")){let r=a.includes("=")?a.split("=")[1]:e[n+1];if(a.includes("=")||(n+=1),"ax"!==r&&"xctest"!==r)throw new l("INVALID_ARGS",`Invalid backend: ${r}`);t.snapshotBackend=r;continue}if(a.startsWith("--")){let[r,i]=a.split("="),s=i??e[n+1];switch(!i&&(n+=1),r){case"--platform":if("ios"!==s&&"android"!==s)throw new l("INVALID_ARGS",`Invalid platform: ${s}`);t.platform=s;break;case"--depth":{let e=Number(s);if(!Number.isFinite(e)||e<0)throw new l("INVALID_ARGS",`Invalid depth: ${s}`);t.snapshotDepth=Math.floor(e);break}case"--scope":t.snapshotScope=s;break;case"--device":t.device=s;break;case"--udid":t.udid=s;break;case"--serial":t.serial=s;break;case"--out":t.out=s;break;case"--session":t.session=s;break;case"--activity":t.activity=s;break;default:throw new l("INVALID_ARGS",`Unknown flag: ${r}`)}continue}if("-d"===a){let r=e[n+1];n+=1;let a=Number(r);if(!Number.isFinite(a)||a<0)throw new l("INVALID_ARGS",`Invalid depth: ${r}`);t.snapshotDepth=Math.floor(a);continue}if("-s"===a){let r=e[n+1];n+=1,t.snapshotScope=r;continue}r.push(a)}return{command:r.shift()??null,positionals:r,flags:t}}(t);(n.flags.help||!n.command)&&(process.stdout.write(`agent-device <command> [args] [--json]
5
+ `)}),i=setTimeout(()=>{a.destroy(),n(new l("COMMAND_FAILED","Daemon request timed out",{timeoutMs:h}))},h),s="";a.setEncoding("utf8"),a.on("data",e=>{let t=(s+=e).indexOf("\n");if(-1===t)return;let o=s.slice(0,t).trim();if(o)try{let e=JSON.parse(o);a.end(),clearTimeout(i),r(e)}catch(e){clearTimeout(i),n(e)}}),a.on("error",e=>{clearTimeout(i),n(e)})})}function $(){let r=e.dirname(t(import.meta.url)),n=r;for(let t=0;t<6;t+=1){let t=e.join(n,"package.json");if(i.existsSync(t))return n;n=e.dirname(n)}return r}async function S(t){let n=function(e){let t={json:!1,help:!1},r=[];for(let n=0;n<e.length;n+=1){let a=e[n];if("--json"===a){t.json=!0;continue}if("--help"===a||"-h"===a){t.help=!0;continue}if("--verbose"===a||"-v"===a){t.verbose=!0;continue}if("-i"===a){t.snapshotInteractiveOnly=!0;continue}if("-c"===a){t.snapshotCompact=!0;continue}if("--raw"===a){t.snapshotRaw=!0;continue}if("--no-record"===a){t.noRecord=!0;continue}if("--save-script"===a){t.saveScript=!0;continue}if("--update"===a||"-u"===a){t.replayUpdate=!0;continue}if("--user-installed"===a){t.appsFilter="user-installed";continue}if("--all"===a){t.appsFilter="all";continue}if("--metadata"===a){t.appsMetadata=!0;continue}if(a.startsWith("--backend")){let r=a.includes("=")?a.split("=")[1]:e[n+1];if(a.includes("=")||(n+=1),"ax"!==r&&"xctest"!==r)throw new l("INVALID_ARGS",`Invalid backend: ${r}`);t.snapshotBackend=r;continue}if(a.startsWith("--")){let[r,i]=a.split("="),s=i??e[n+1];switch(!i&&(n+=1),r){case"--platform":if("ios"!==s&&"android"!==s)throw new l("INVALID_ARGS",`Invalid platform: ${s}`);t.platform=s;break;case"--depth":{let e=Number(s);if(!Number.isFinite(e)||e<0)throw new l("INVALID_ARGS",`Invalid depth: ${s}`);t.snapshotDepth=Math.floor(e);break}case"--scope":t.snapshotScope=s;break;case"--device":t.device=s;break;case"--udid":t.udid=s;break;case"--serial":t.serial=s;break;case"--out":t.out=s;break;case"--session":t.session=s;break;case"--activity":t.activity=s;break;default:throw new l("INVALID_ARGS",`Unknown flag: ${r}`)}continue}if("-d"===a){let r=e[n+1];n+=1;let a=Number(r);if(!Number.isFinite(a)||a<0)throw new l("INVALID_ARGS",`Invalid depth: ${r}`);t.snapshotDepth=Math.floor(a);continue}if("-s"===a){let r=e[n+1];n+=1,t.snapshotScope=r;continue}r.push(a)}return{command:r.shift()??null,positionals:r,flags:t}}(t);(n.flags.help||!n.command)&&(process.stdout.write(`agent-device <command> [args] [--json]
6
6
 
7
7
  CLI to control iOS and Android devices for AI agents.
8
8
 
@@ -24,20 +24,22 @@ Commands:
24
24
  back Navigate back (where supported)
25
25
  home Go to home screen (where supported)
26
26
  app-switcher Open app switcher (where supported)
27
- wait <ms>|text <text>|@ref [timeoutMs] Wait for duration or text to appear
27
+ wait <ms>|text <text>|@ref|<selector> [timeoutMs]
28
+ Wait for duration, text, ref, or selector to appear
28
29
  alert [get|accept|dismiss|wait] [timeout] Inspect or handle alert (iOS simulator)
29
- click <@ref> Click element by snapshot ref
30
- get text <@ref> Return element text by ref
31
- get attrs <@ref> Return element attributes by ref
32
- replay <path> Replay a recorded session
30
+ click <@ref|selector> Click element by snapshot ref or selector
31
+ get text <@ref|selector> Return element text by ref or selector
32
+ get attrs <@ref|selector> Return element attributes by ref or selector
33
+ replay <path> [--update|-u] Replay a recorded session
33
34
  press <x> <y> Tap at coordinates
34
35
  long-press <x> <y> [durationMs] Long press (where supported)
35
36
  focus <x> <y> Focus input at coordinates
36
37
  type <text> Type text in focused field
37
- fill <x> <y> <text> | fill <@ref> <text> Tap then type
38
+ fill <x> <y> <text> | fill <@ref|selector> <text>
39
+ Tap then type
38
40
  scroll <direction> [amount] Scroll in direction (0-1 amount)
39
41
  scrollintoview <text> Scroll until text appears (Android only)
40
- screenshot [--out path] Capture screenshot
42
+ screenshot [path] Capture screenshot
41
43
  record start [path] Start screen recording
42
44
  record stop Stop screen recording
43
45
  trace start [path] Start trace log capture
@@ -48,6 +50,7 @@ Commands:
48
50
  find value <value> <action> [value] Find by value
49
51
  find role <role> <action> [value] Find by role/type
50
52
  find id <id> <action> [value] Find by identifier/resource-id
53
+ is <predicate> <selector> [value] Assert UI state (visible|hidden|exists|editable|selected|text)
51
54
  settings <wifi|airplane|location> <on|off> Toggle OS settings (simulators)
52
55
  session list List active sessions
53
56
 
@@ -57,12 +60,12 @@ Flags:
57
60
  --udid <udid> iOS device UDID
58
61
  --serial <serial> Android device serial
59
62
  --activity <component> Android activity to launch (package/Activity)
60
- --out <path> Output path for screenshots
61
63
  --session <name> Named session
62
64
  --verbose Stream daemon/runner logs
63
65
  --json JSON output
66
+ --save-script Save session script (.ad) on close
64
67
  --no-record Do not record this action
65
- --record-json Record JSON session log
68
+ --update, -u Replay: update selectors and rewrite replay file in place
66
69
  --user-installed Apps: list user-installed packages (Android only)
67
70
  --all Apps: list all packages (Android only)
68
71
 
@@ -80,7 +83,8 @@ ${f.join("\n")}
80
83
  `),h&&h();return}}if("find"===a){let t=e.data;if("string"==typeof t?.text){process.stdout.write(`${t.text}
81
84
  `),h&&h();return}if("boolean"==typeof t?.found){process.stdout.write(`Found: ${t.found}
82
85
  `),h&&h();return}if(t?.node){process.stdout.write(`${JSON.stringify(t.node,null,2)}
83
- `),h&&h();return}}if("click"===a){let t=e.data?.ref??"",r=e.data?.x,n=e.data?.y;t&&"number"==typeof r&&"number"==typeof n&&process.stdout.write(`Clicked @${t} (${r}, ${n})
86
+ `),h&&h();return}}if("is"===a){let t=e.data?.predicate??"assertion";process.stdout.write(`Passed: is ${t}
87
+ `),h&&h();return}if("click"===a){let t=e.data?.ref??"",r=e.data?.x,n=e.data?.y;t&&"number"==typeof r&&"number"==typeof n&&process.stdout.write(`Clicked @${t} (${r}, ${n})
84
88
  `),h&&h();return}if(e.data&&"object"==typeof e.data){let t=e.data;if("devices"===a){let e=(Array.isArray(t.devices)?t.devices:[]).map(e=>{let t=e?.name??e?.id??"unknown",r=e?.platform??"unknown",n=e?.kind?` ${e.kind}`:"",a="boolean"==typeof e?.booted?` booted=${e.booted}`:"";return`${t} (${r}${n})${a}`});process.stdout.write(`${e.join("\n")}
85
89
  `),h&&h();return}if("apps"===a){let e=(Array.isArray(t.apps)?t.apps:[]).map(e=>{if("string"==typeof e)return e;if(e&&"object"==typeof e){let t=e.bundleId??e.package,r=e.name??e.label;return r&&t?`${r} (${t})`:t&&"boolean"==typeof e.launchable?`${t} (launchable=${e.launchable})`:t?String(t):JSON.stringify(e)}return String(e)});process.stdout.write(`${e.join("\n")}
86
90
  `),h&&h();return}if("appstate"===a){let e=t?.platform,r=t?.appBundleId,n=t?.appName,a=t?.source,i=t?.package,s=t?.activity;if("ios"===e){process.stdout.write(`Foreground app: ${n??r}