agent-device 0.3.5 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/README.md +45 -14
  2. package/dist/src/bin.js +35 -97
  3. package/dist/src/daemon.js +16 -15
  4. package/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests.swift +24 -0
  5. package/ios-runner/README.md +1 -1
  6. package/package.json +1 -1
  7. package/skills/agent-device/SKILL.md +21 -11
  8. package/skills/agent-device/references/permissions.md +15 -1
  9. package/skills/agent-device/references/session-management.md +1 -0
  10. package/skills/agent-device/references/snapshot-refs.md +2 -0
  11. package/skills/agent-device/references/video-recording.md +2 -0
  12. package/src/cli.ts +7 -3
  13. package/src/core/__tests__/capabilities.test.ts +11 -6
  14. package/src/core/capabilities.ts +26 -20
  15. package/src/core/dispatch.ts +109 -31
  16. package/src/daemon/__tests__/app-state.test.ts +138 -0
  17. package/src/daemon/app-state.ts +37 -38
  18. package/src/daemon/context.ts +12 -0
  19. package/src/daemon/handlers/__tests__/interaction.test.ts +22 -0
  20. package/src/daemon/handlers/__tests__/session.test.ts +8 -5
  21. package/src/daemon/handlers/__tests__/snapshot-handler.test.ts +92 -0
  22. package/src/daemon/handlers/interaction.ts +37 -0
  23. package/src/daemon/handlers/record-trace.ts +1 -1
  24. package/src/daemon/handlers/session.ts +1 -1
  25. package/src/daemon/handlers/snapshot.ts +21 -3
  26. package/src/daemon-client.ts +14 -6
  27. package/src/daemon.ts +1 -1
  28. package/src/platforms/android/__tests__/index.test.ts +46 -1
  29. package/src/platforms/android/index.ts +23 -0
  30. package/src/platforms/ios/__tests__/runner-client.test.ts +113 -0
  31. package/src/platforms/ios/devices.ts +40 -18
  32. package/src/platforms/ios/index.ts +2 -2
  33. package/src/platforms/ios/runner-client.ts +323 -44
  34. package/src/utils/__tests__/args.test.ts +161 -0
  35. package/src/utils/args.ts +174 -218
  36. package/src/utils/command-schema.ts +591 -0
  37. package/src/utils/interactors.ts +11 -1
package/src/utils/args.ts CHANGED
@@ -1,247 +1,203 @@
1
1
  import { AppError } from './errors.ts';
2
+ import {
3
+ buildUsageText,
4
+ getCommandSchema,
5
+ getFlagDefinition,
6
+ GLOBAL_FLAG_KEYS,
7
+ isStrictFlagModeEnabled,
8
+ type CliFlags,
9
+ type FlagDefinition,
10
+ type FlagKey,
11
+ } from './command-schema.ts';
2
12
 
3
13
  export type ParsedArgs = {
4
14
  command: string | null;
5
15
  positionals: string[];
6
- flags: {
7
- json: boolean;
8
- platform?: 'ios' | 'android';
9
- device?: string;
10
- udid?: string;
11
- serial?: string;
12
- out?: string;
13
- session?: string;
14
- verbose?: boolean;
15
- snapshotInteractiveOnly?: boolean;
16
- snapshotCompact?: boolean;
17
- snapshotDepth?: number;
18
- snapshotScope?: string;
19
- snapshotRaw?: boolean;
20
- snapshotBackend?: 'ax' | 'xctest';
21
- appsFilter?: 'launchable' | 'user-installed' | 'all';
22
- appsMetadata?: boolean;
23
- activity?: string;
24
- saveScript?: boolean;
25
- relaunch?: boolean;
26
- noRecord?: boolean;
27
- replayUpdate?: boolean;
28
- help: boolean;
29
- version: boolean;
30
- };
16
+ flags: CliFlags;
17
+ warnings: string[];
31
18
  };
32
19
 
33
- export function parseArgs(argv: string[]): ParsedArgs {
34
- const flags: ParsedArgs['flags'] = { json: false, help: false, version: false };
20
+ type ParseArgsOptions = {
21
+ strictFlags?: boolean;
22
+ };
23
+
24
+ type ParsedFlagRecord = {
25
+ key: FlagKey;
26
+ token: string;
27
+ };
28
+
29
+ export function parseArgs(argv: string[], options?: ParseArgsOptions): ParsedArgs {
30
+ const strictFlags = options?.strictFlags ?? isStrictFlagModeEnabled(process.env.AGENT_DEVICE_STRICT_FLAGS);
31
+ const flags: CliFlags = { json: false, help: false, version: false };
32
+ let command: string | null = null;
35
33
  const positionals: string[] = [];
34
+ const warnings: string[] = [];
35
+ const providedFlags: ParsedFlagRecord[] = [];
36
+ let parseFlags = true;
36
37
 
37
38
  for (let i = 0; i < argv.length; i += 1) {
38
39
  const arg = argv[i];
39
- if (arg === '--json') {
40
- flags.json = true;
41
- continue;
42
- }
43
- if (arg === '--help' || arg === '-h') {
44
- flags.help = true;
45
- continue;
46
- }
47
- if (arg === '--version' || arg === '-V') {
48
- flags.version = true;
49
- continue;
50
- }
51
- if (arg === '--verbose' || arg === '-v') {
52
- flags.verbose = true;
53
- continue;
54
- }
55
- if (arg === '-i') {
56
- flags.snapshotInteractiveOnly = true;
57
- continue;
58
- }
59
- if (arg === '-c') {
60
- flags.snapshotCompact = true;
61
- continue;
62
- }
63
- if (arg === '--raw') {
64
- flags.snapshotRaw = true;
40
+ if (parseFlags && arg === '--') {
41
+ parseFlags = false;
65
42
  continue;
66
43
  }
67
- if (arg === '--no-record') {
68
- flags.noRecord = true;
44
+ if (!parseFlags) {
45
+ if (!command) command = arg;
46
+ else positionals.push(arg);
69
47
  continue;
70
48
  }
71
- if (arg === '--save-script') {
72
- flags.saveScript = true;
49
+ const isLongFlag = arg.startsWith('--');
50
+ const isShortFlag = arg.startsWith('-') && arg.length > 1;
51
+ if (!isLongFlag && !isShortFlag) {
52
+ if (!command) command = arg;
53
+ else positionals.push(arg);
73
54
  continue;
74
55
  }
75
- if (arg === '--relaunch') {
76
- flags.relaunch = true;
77
- continue;
78
- }
79
- if (arg === '--update' || arg === '-u') {
80
- flags.replayUpdate = true;
81
- continue;
82
- }
83
- if (arg === '--user-installed') {
84
- flags.appsFilter = 'user-installed';
85
- continue;
86
- }
87
- if (arg === '--all') {
88
- flags.appsFilter = 'all';
89
- continue;
90
- }
91
- if (arg === '--metadata') {
92
- flags.appsMetadata = true;
93
- continue;
94
- }
95
- if (arg.startsWith('--backend')) {
96
- const value = arg.includes('=')
97
- ? arg.split('=')[1]
98
- : argv[i + 1];
99
- if (!arg.includes('=')) i += 1;
100
- if (value !== 'ax' && value !== 'xctest') {
101
- throw new AppError('INVALID_ARGS', `Invalid backend: ${value}`);
56
+
57
+ const [token, inlineValue] = isLongFlag ? splitLongFlag(arg) : [arg, undefined];
58
+ const definition = getFlagDefinition(token);
59
+ if (!definition) {
60
+ if (shouldTreatUnknownDashTokenAsPositional(command, positionals, arg)) {
61
+ if (!command) command = arg;
62
+ else positionals.push(arg);
63
+ continue;
102
64
  }
103
- flags.snapshotBackend = value;
104
- continue;
65
+ throw new AppError('INVALID_ARGS', `Unknown flag: ${token}`);
105
66
  }
106
- if (arg.startsWith('--')) {
107
- const [key, valueInline] = arg.split('=');
108
- const value = valueInline ?? argv[i + 1];
109
- if (!valueInline) i += 1;
110
-
111
- switch (key) {
112
- case '--platform':
113
- if (value !== 'ios' && value !== 'android') {
114
- throw new AppError('INVALID_ARGS', `Invalid platform: ${value}`);
115
- }
116
- flags.platform = value;
117
- break;
118
- case '--depth': {
119
- const parsed = Number(value);
120
- if (!Number.isFinite(parsed) || parsed < 0) {
121
- throw new AppError('INVALID_ARGS', `Invalid depth: ${value}`);
122
- }
123
- flags.snapshotDepth = Math.floor(parsed);
124
- break;
125
- }
126
- case '--scope':
127
- flags.snapshotScope = value;
128
- break;
129
- case '--device':
130
- flags.device = value;
131
- break;
132
- case '--udid':
133
- flags.udid = value;
134
- break;
135
- case '--serial':
136
- flags.serial = value;
137
- break;
138
- case '--out':
139
- flags.out = value;
140
- break;
141
- case '--session':
142
- flags.session = value;
143
- break;
144
- case '--activity':
145
- flags.activity = value;
146
- break;
147
- default:
148
- throw new AppError('INVALID_ARGS', `Unknown flag: ${key}`);
149
- }
150
- continue;
67
+
68
+ const parsed = parseFlagValue(definition, token, inlineValue, argv[i + 1]);
69
+ if (parsed.consumeNext) i += 1;
70
+ (flags as Record<string, unknown>)[definition.key] = parsed.value;
71
+ providedFlags.push({ key: definition.key, token });
72
+ }
73
+
74
+ const commandSchema = getCommandSchema(command);
75
+ const allowedFlagKeys = new Set<FlagKey>([
76
+ ...GLOBAL_FLAG_KEYS,
77
+ ...(commandSchema?.allowedFlags ?? []),
78
+ ]);
79
+ const disallowed = providedFlags.filter((entry) => !allowedFlagKeys.has(entry.key));
80
+ if (disallowed.length > 0) {
81
+ const unsupported = disallowed.map((entry) => entry.token);
82
+ const message = formatUnsupportedFlagMessage(command, unsupported);
83
+ if (strictFlags) {
84
+ throw new AppError('INVALID_ARGS', message);
85
+ }
86
+ warnings.push(`${message} Enable AGENT_DEVICE_STRICT_FLAGS=1 to fail fast.`);
87
+ for (const entry of disallowed) {
88
+ delete (flags as Record<string, unknown>)[entry.key];
151
89
  }
152
- if (arg === '-d') {
153
- const value = argv[i + 1];
154
- i += 1;
155
- const parsed = Number(value);
156
- if (!Number.isFinite(parsed) || parsed < 0) {
157
- throw new AppError('INVALID_ARGS', `Invalid depth: ${value}`);
90
+ }
91
+ if (commandSchema?.defaults) {
92
+ for (const [key, value] of Object.entries(commandSchema.defaults) as Array<[FlagKey, unknown]>) {
93
+ if ((flags as Record<string, unknown>)[key] === undefined) {
94
+ (flags as Record<string, unknown>)[key] = value;
158
95
  }
159
- flags.snapshotDepth = Math.floor(parsed);
160
- continue;
161
96
  }
162
- if (arg === '-s') {
163
- const value = argv[i + 1];
164
- i += 1;
165
- flags.snapshotScope = value;
166
- continue;
97
+ }
98
+ return { command, positionals, flags, warnings };
99
+ }
100
+
101
+ function splitLongFlag(flag: string): [string, string | undefined] {
102
+ const equals = flag.indexOf('=');
103
+ if (equals === -1) return [flag, undefined];
104
+ return [flag.slice(0, equals), flag.slice(equals + 1)];
105
+ }
106
+
107
+ function parseFlagValue(
108
+ definition: FlagDefinition,
109
+ token: string,
110
+ inlineValue: string | undefined,
111
+ nextArg: string | undefined,
112
+ ): { value: unknown; consumeNext: boolean } {
113
+ if (definition.setValue !== undefined) {
114
+ if (inlineValue !== undefined) {
115
+ throw new AppError('INVALID_ARGS', `Flag ${token} does not take a value.`);
116
+ }
117
+ return { value: definition.setValue, consumeNext: false };
118
+ }
119
+ if (definition.type === 'boolean') {
120
+ if (inlineValue !== undefined) {
121
+ throw new AppError('INVALID_ARGS', `Flag ${token} does not take a value.`);
122
+ }
123
+ return { value: true, consumeNext: false };
124
+ }
125
+
126
+ const value = inlineValue ?? nextArg;
127
+ if (value === undefined) {
128
+ throw new AppError('INVALID_ARGS', `Flag ${token} requires a value.`);
129
+ }
130
+ if (inlineValue === undefined && looksLikeFlagToken(value)) {
131
+ throw new AppError('INVALID_ARGS', `Flag ${token} requires a value.`);
132
+ }
133
+
134
+ if (definition.type === 'string') {
135
+ return { value, consumeNext: inlineValue === undefined };
136
+ }
137
+ if (definition.type === 'enum') {
138
+ if (!definition.enumValues?.includes(value)) {
139
+ throw new AppError('INVALID_ARGS', `Invalid ${labelForFlag(token)}: ${value}`);
167
140
  }
168
- positionals.push(arg);
141
+ return { value, consumeNext: inlineValue === undefined };
142
+ }
143
+ const parsed = Number(value);
144
+ if (!Number.isFinite(parsed)) {
145
+ throw new AppError('INVALID_ARGS', `Invalid ${labelForFlag(token)}: ${value}`);
146
+ }
147
+ if (typeof definition.min === 'number' && parsed < definition.min) {
148
+ throw new AppError('INVALID_ARGS', `Invalid ${labelForFlag(token)}: ${value}`);
149
+ }
150
+ if (typeof definition.max === 'number' && parsed > definition.max) {
151
+ throw new AppError('INVALID_ARGS', `Invalid ${labelForFlag(token)}: ${value}`);
152
+ }
153
+ return { value: Math.floor(parsed), consumeNext: inlineValue === undefined };
154
+ }
155
+
156
+ function labelForFlag(token: string): string {
157
+ return token.replace(/^-+/, '');
158
+ }
159
+
160
+ function looksLikeFlagToken(value: string): boolean {
161
+ if (!value.startsWith('-') || value === '-') return false;
162
+ const [token] = value.startsWith('--') ? splitLongFlag(value) : [value, undefined];
163
+ return getFlagDefinition(token) !== undefined;
164
+ }
165
+
166
+ function shouldTreatUnknownDashTokenAsPositional(
167
+ command: string | null,
168
+ positionals: string[],
169
+ arg: string,
170
+ ): boolean {
171
+ if (!isNegativeNumericToken(arg)) return false;
172
+ if (!command) return false;
173
+ const schema = getCommandSchema(command);
174
+ if (!schema) return true;
175
+ if (schema.allowsExtraPositionals) return true;
176
+ if (schema.positionalArgs.length === 0) return false;
177
+ if (positionals.length < schema.positionalArgs.length) return true;
178
+ return schema.positionalArgs.some((entry) => entry.includes('?'));
179
+ }
180
+
181
+ function isNegativeNumericToken(value: string): boolean {
182
+ return /^-\d+(\.\d+)?$/.test(value);
183
+ }
184
+
185
+ function formatUnsupportedFlagMessage(command: string | null, unsupported: string[]): string {
186
+ if (!command) {
187
+ return unsupported.length === 1
188
+ ? `Flag ${unsupported[0]} requires a command that supports it.`
189
+ : `Flags ${unsupported.join(', ')} require a command that supports them.`;
169
190
  }
191
+ return unsupported.length === 1
192
+ ? `Flag ${unsupported[0]} is not supported for command ${command}.`
193
+ : `Flags ${unsupported.join(', ')} are not supported for command ${command}.`;
194
+ }
170
195
 
171
- const command = positionals.shift() ?? null;
172
- return { command, positionals, flags };
196
+ export function toDaemonFlags(flags: CliFlags): Omit<CliFlags, 'json' | 'help' | 'version'> {
197
+ const { json: _json, help: _help, version: _version, ...daemonFlags } = flags;
198
+ return daemonFlags;
173
199
  }
174
200
 
175
201
  export function usage(): string {
176
- return `agent-device <command> [args] [--json]
177
-
178
- CLI to control iOS and Android devices for AI agents.
179
-
180
- Commands:
181
- boot Ensure target device/simulator is booted and ready
182
- open [app|url] Boot device/simulator; optionally launch app or deep link URL
183
- close [app] Close app or just end session
184
- reinstall <app> <path> Uninstall + install app from binary path
185
- snapshot [-i] [-c] [-d <depth>] [-s <scope>] [--raw] [--backend ax|xctest]
186
- Capture accessibility tree
187
- -i Interactive elements only
188
- -c Compact output (drop empty structure)
189
- -d <depth> Limit snapshot depth
190
- -s <scope> Scope snapshot to label/identifier
191
- --raw Raw node output
192
- --backend ax|xctest xctest: default; XCTest snapshot (slower, no permissions)
193
- ax: macOS Accessibility tree (fast, needs permissions)
194
- devices List available devices
195
- apps [--user-installed|--all|--metadata] List installed apps (Android launchable by default, iOS simulator)
196
- appstate Show foreground app/activity
197
- back Navigate back (where supported)
198
- home Go to home screen (where supported)
199
- app-switcher Open app switcher (where supported)
200
- wait <ms>|text <text>|@ref|<selector> [timeoutMs]
201
- Wait for duration, text, ref, or selector to appear
202
- alert [get|accept|dismiss|wait] [timeout] Inspect or handle alert (iOS simulator)
203
- click <@ref|selector> Click element by snapshot ref or selector
204
- get text <@ref|selector> Return element text by ref or selector
205
- get attrs <@ref|selector> Return element attributes by ref or selector
206
- replay <path> [--update|-u] Replay a recorded session
207
- press <x> <y> Tap at coordinates
208
- long-press <x> <y> [durationMs] Long press (where supported)
209
- focus <x> <y> Focus input at coordinates
210
- type <text> Type text in focused field
211
- fill <x> <y> <text> | fill <@ref|selector> <text>
212
- Tap then type
213
- scroll <direction> [amount] Scroll in direction (0-1 amount)
214
- scrollintoview <text> Scroll until text appears (Android only)
215
- screenshot [path] Capture screenshot
216
- record start [path] Start screen recording
217
- record stop Stop screen recording
218
- trace start [path] Start trace log capture
219
- trace stop [path] Stop trace log capture
220
- find <text> <action> [value] Find by any text (label/value/id)
221
- find text <text> <action> [value] Find by text content
222
- find label <label> <action> [value] Find by label
223
- find value <value> <action> [value] Find by value
224
- find role <role> <action> [value] Find by role/type
225
- find id <id> <action> [value] Find by identifier/resource-id
226
- is <predicate> <selector> [value] Assert UI state (visible|hidden|exists|editable|selected|text)
227
- settings <wifi|airplane|location> <on|off> Toggle OS settings (simulators)
228
- session list List active sessions
229
-
230
- Flags:
231
- --platform ios|android Platform to target
232
- --device <name> Device name to target
233
- --udid <udid> iOS device UDID
234
- --serial <serial> Android device serial
235
- --activity <component> Android app launch activity (package/Activity); not for URL opens
236
- --session <name> Named session
237
- --verbose Stream daemon/runner logs
238
- --json JSON output
239
- --save-script Save session script (.ad) on close
240
- --relaunch open: terminate app process before launching it
241
- --no-record Do not record this action
242
- --update, -u Replay: update selectors and rewrite replay file in place
243
- --user-installed Apps: list user-installed packages (Android only)
244
- --all Apps: list all packages (Android only)
245
- --version, -V Print version and exit
246
- `;
202
+ return buildUsageText();
247
203
  }