agent-device 0.3.4 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/README.md +58 -16
  2. package/dist/src/bin.js +35 -96
  3. package/dist/src/daemon.js +16 -15
  4. package/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests.swift +24 -0
  5. package/ios-runner/README.md +1 -1
  6. package/package.json +1 -1
  7. package/skills/agent-device/SKILL.md +32 -14
  8. package/skills/agent-device/references/permissions.md +15 -1
  9. package/skills/agent-device/references/session-management.md +2 -0
  10. package/skills/agent-device/references/snapshot-refs.md +2 -0
  11. package/skills/agent-device/references/video-recording.md +2 -0
  12. package/src/cli.ts +7 -3
  13. package/src/core/__tests__/capabilities.test.ts +11 -6
  14. package/src/core/__tests__/open-target.test.ts +16 -0
  15. package/src/core/capabilities.ts +26 -20
  16. package/src/core/dispatch.ts +110 -31
  17. package/src/core/open-target.ts +13 -0
  18. package/src/daemon/__tests__/app-state.test.ts +138 -0
  19. package/src/daemon/__tests__/session-store.test.ts +24 -0
  20. package/src/daemon/app-state.ts +37 -38
  21. package/src/daemon/context.ts +12 -0
  22. package/src/daemon/handlers/__tests__/interaction.test.ts +22 -0
  23. package/src/daemon/handlers/__tests__/session.test.ts +226 -5
  24. package/src/daemon/handlers/__tests__/snapshot-handler.test.ts +92 -0
  25. package/src/daemon/handlers/interaction.ts +37 -0
  26. package/src/daemon/handlers/record-trace.ts +1 -1
  27. package/src/daemon/handlers/session.ts +96 -26
  28. package/src/daemon/handlers/snapshot.ts +21 -3
  29. package/src/daemon/session-store.ts +11 -0
  30. package/src/daemon-client.ts +14 -6
  31. package/src/daemon.ts +1 -1
  32. package/src/platforms/android/__tests__/index.test.ts +67 -1
  33. package/src/platforms/android/index.ts +41 -0
  34. package/src/platforms/ios/__tests__/index.test.ts +24 -0
  35. package/src/platforms/ios/__tests__/runner-client.test.ts +113 -0
  36. package/src/platforms/ios/devices.ts +40 -18
  37. package/src/platforms/ios/index.ts +70 -5
  38. package/src/platforms/ios/runner-client.ts +329 -42
  39. package/src/utils/__tests__/args.test.ts +175 -0
  40. package/src/utils/args.ts +174 -212
  41. package/src/utils/command-schema.ts +591 -0
  42. package/src/utils/interactors.ts +13 -3
package/src/utils/args.ts CHANGED
@@ -1,241 +1,203 @@
1
1
  import { AppError } from './errors.ts';
2
+ import {
3
+ buildUsageText,
4
+ getCommandSchema,
5
+ getFlagDefinition,
6
+ GLOBAL_FLAG_KEYS,
7
+ isStrictFlagModeEnabled,
8
+ type CliFlags,
9
+ type FlagDefinition,
10
+ type FlagKey,
11
+ } from './command-schema.ts';
2
12
 
3
13
  export type ParsedArgs = {
4
14
  command: string | null;
5
15
  positionals: string[];
6
- flags: {
7
- json: boolean;
8
- platform?: 'ios' | 'android';
9
- device?: string;
10
- udid?: string;
11
- serial?: string;
12
- out?: string;
13
- session?: string;
14
- verbose?: boolean;
15
- snapshotInteractiveOnly?: boolean;
16
- snapshotCompact?: boolean;
17
- snapshotDepth?: number;
18
- snapshotScope?: string;
19
- snapshotRaw?: boolean;
20
- snapshotBackend?: 'ax' | 'xctest';
21
- appsFilter?: 'launchable' | 'user-installed' | 'all';
22
- appsMetadata?: boolean;
23
- activity?: string;
24
- saveScript?: boolean;
25
- noRecord?: boolean;
26
- replayUpdate?: boolean;
27
- help: boolean;
28
- version: boolean;
29
- };
16
+ flags: CliFlags;
17
+ warnings: string[];
30
18
  };
31
19
 
32
- export function parseArgs(argv: string[]): ParsedArgs {
33
- const flags: ParsedArgs['flags'] = { json: false, help: false, version: false };
20
+ type ParseArgsOptions = {
21
+ strictFlags?: boolean;
22
+ };
23
+
24
+ type ParsedFlagRecord = {
25
+ key: FlagKey;
26
+ token: string;
27
+ };
28
+
29
+ export function parseArgs(argv: string[], options?: ParseArgsOptions): ParsedArgs {
30
+ const strictFlags = options?.strictFlags ?? isStrictFlagModeEnabled(process.env.AGENT_DEVICE_STRICT_FLAGS);
31
+ const flags: CliFlags = { json: false, help: false, version: false };
32
+ let command: string | null = null;
34
33
  const positionals: string[] = [];
34
+ const warnings: string[] = [];
35
+ const providedFlags: ParsedFlagRecord[] = [];
36
+ let parseFlags = true;
35
37
 
36
38
  for (let i = 0; i < argv.length; i += 1) {
37
39
  const arg = argv[i];
38
- if (arg === '--json') {
39
- flags.json = true;
40
- continue;
41
- }
42
- if (arg === '--help' || arg === '-h') {
43
- flags.help = true;
44
- continue;
45
- }
46
- if (arg === '--version' || arg === '-V') {
47
- flags.version = true;
48
- continue;
49
- }
50
- if (arg === '--verbose' || arg === '-v') {
51
- flags.verbose = true;
52
- continue;
53
- }
54
- if (arg === '-i') {
55
- flags.snapshotInteractiveOnly = true;
56
- continue;
57
- }
58
- if (arg === '-c') {
59
- flags.snapshotCompact = true;
60
- continue;
61
- }
62
- if (arg === '--raw') {
63
- flags.snapshotRaw = true;
40
+ if (parseFlags && arg === '--') {
41
+ parseFlags = false;
64
42
  continue;
65
43
  }
66
- if (arg === '--no-record') {
67
- flags.noRecord = true;
44
+ if (!parseFlags) {
45
+ if (!command) command = arg;
46
+ else positionals.push(arg);
68
47
  continue;
69
48
  }
70
- if (arg === '--save-script') {
71
- flags.saveScript = true;
49
+ const isLongFlag = arg.startsWith('--');
50
+ const isShortFlag = arg.startsWith('-') && arg.length > 1;
51
+ if (!isLongFlag && !isShortFlag) {
52
+ if (!command) command = arg;
53
+ else positionals.push(arg);
72
54
  continue;
73
55
  }
74
- if (arg === '--update' || arg === '-u') {
75
- flags.replayUpdate = true;
76
- continue;
77
- }
78
- if (arg === '--user-installed') {
79
- flags.appsFilter = 'user-installed';
80
- continue;
81
- }
82
- if (arg === '--all') {
83
- flags.appsFilter = 'all';
84
- continue;
85
- }
86
- if (arg === '--metadata') {
87
- flags.appsMetadata = true;
88
- continue;
89
- }
90
- if (arg.startsWith('--backend')) {
91
- const value = arg.includes('=')
92
- ? arg.split('=')[1]
93
- : argv[i + 1];
94
- if (!arg.includes('=')) i += 1;
95
- if (value !== 'ax' && value !== 'xctest') {
96
- throw new AppError('INVALID_ARGS', `Invalid backend: ${value}`);
56
+
57
+ const [token, inlineValue] = isLongFlag ? splitLongFlag(arg) : [arg, undefined];
58
+ const definition = getFlagDefinition(token);
59
+ if (!definition) {
60
+ if (shouldTreatUnknownDashTokenAsPositional(command, positionals, arg)) {
61
+ if (!command) command = arg;
62
+ else positionals.push(arg);
63
+ continue;
97
64
  }
98
- flags.snapshotBackend = value;
99
- continue;
65
+ throw new AppError('INVALID_ARGS', `Unknown flag: ${token}`);
100
66
  }
101
- if (arg.startsWith('--')) {
102
- const [key, valueInline] = arg.split('=');
103
- const value = valueInline ?? argv[i + 1];
104
- if (!valueInline) i += 1;
105
-
106
- switch (key) {
107
- case '--platform':
108
- if (value !== 'ios' && value !== 'android') {
109
- throw new AppError('INVALID_ARGS', `Invalid platform: ${value}`);
110
- }
111
- flags.platform = value;
112
- break;
113
- case '--depth': {
114
- const parsed = Number(value);
115
- if (!Number.isFinite(parsed) || parsed < 0) {
116
- throw new AppError('INVALID_ARGS', `Invalid depth: ${value}`);
117
- }
118
- flags.snapshotDepth = Math.floor(parsed);
119
- break;
120
- }
121
- case '--scope':
122
- flags.snapshotScope = value;
123
- break;
124
- case '--device':
125
- flags.device = value;
126
- break;
127
- case '--udid':
128
- flags.udid = value;
129
- break;
130
- case '--serial':
131
- flags.serial = value;
132
- break;
133
- case '--out':
134
- flags.out = value;
135
- break;
136
- case '--session':
137
- flags.session = value;
138
- break;
139
- case '--activity':
140
- flags.activity = value;
141
- break;
142
- default:
143
- throw new AppError('INVALID_ARGS', `Unknown flag: ${key}`);
144
- }
145
- continue;
67
+
68
+ const parsed = parseFlagValue(definition, token, inlineValue, argv[i + 1]);
69
+ if (parsed.consumeNext) i += 1;
70
+ (flags as Record<string, unknown>)[definition.key] = parsed.value;
71
+ providedFlags.push({ key: definition.key, token });
72
+ }
73
+
74
+ const commandSchema = getCommandSchema(command);
75
+ const allowedFlagKeys = new Set<FlagKey>([
76
+ ...GLOBAL_FLAG_KEYS,
77
+ ...(commandSchema?.allowedFlags ?? []),
78
+ ]);
79
+ const disallowed = providedFlags.filter((entry) => !allowedFlagKeys.has(entry.key));
80
+ if (disallowed.length > 0) {
81
+ const unsupported = disallowed.map((entry) => entry.token);
82
+ const message = formatUnsupportedFlagMessage(command, unsupported);
83
+ if (strictFlags) {
84
+ throw new AppError('INVALID_ARGS', message);
85
+ }
86
+ warnings.push(`${message} Enable AGENT_DEVICE_STRICT_FLAGS=1 to fail fast.`);
87
+ for (const entry of disallowed) {
88
+ delete (flags as Record<string, unknown>)[entry.key];
146
89
  }
147
- if (arg === '-d') {
148
- const value = argv[i + 1];
149
- i += 1;
150
- const parsed = Number(value);
151
- if (!Number.isFinite(parsed) || parsed < 0) {
152
- throw new AppError('INVALID_ARGS', `Invalid depth: ${value}`);
90
+ }
91
+ if (commandSchema?.defaults) {
92
+ for (const [key, value] of Object.entries(commandSchema.defaults) as Array<[FlagKey, unknown]>) {
93
+ if ((flags as Record<string, unknown>)[key] === undefined) {
94
+ (flags as Record<string, unknown>)[key] = value;
153
95
  }
154
- flags.snapshotDepth = Math.floor(parsed);
155
- continue;
156
96
  }
157
- if (arg === '-s') {
158
- const value = argv[i + 1];
159
- i += 1;
160
- flags.snapshotScope = value;
161
- continue;
97
+ }
98
+ return { command, positionals, flags, warnings };
99
+ }
100
+
101
+ function splitLongFlag(flag: string): [string, string | undefined] {
102
+ const equals = flag.indexOf('=');
103
+ if (equals === -1) return [flag, undefined];
104
+ return [flag.slice(0, equals), flag.slice(equals + 1)];
105
+ }
106
+
107
+ function parseFlagValue(
108
+ definition: FlagDefinition,
109
+ token: string,
110
+ inlineValue: string | undefined,
111
+ nextArg: string | undefined,
112
+ ): { value: unknown; consumeNext: boolean } {
113
+ if (definition.setValue !== undefined) {
114
+ if (inlineValue !== undefined) {
115
+ throw new AppError('INVALID_ARGS', `Flag ${token} does not take a value.`);
116
+ }
117
+ return { value: definition.setValue, consumeNext: false };
118
+ }
119
+ if (definition.type === 'boolean') {
120
+ if (inlineValue !== undefined) {
121
+ throw new AppError('INVALID_ARGS', `Flag ${token} does not take a value.`);
122
+ }
123
+ return { value: true, consumeNext: false };
124
+ }
125
+
126
+ const value = inlineValue ?? nextArg;
127
+ if (value === undefined) {
128
+ throw new AppError('INVALID_ARGS', `Flag ${token} requires a value.`);
129
+ }
130
+ if (inlineValue === undefined && looksLikeFlagToken(value)) {
131
+ throw new AppError('INVALID_ARGS', `Flag ${token} requires a value.`);
132
+ }
133
+
134
+ if (definition.type === 'string') {
135
+ return { value, consumeNext: inlineValue === undefined };
136
+ }
137
+ if (definition.type === 'enum') {
138
+ if (!definition.enumValues?.includes(value)) {
139
+ throw new AppError('INVALID_ARGS', `Invalid ${labelForFlag(token)}: ${value}`);
162
140
  }
163
- positionals.push(arg);
141
+ return { value, consumeNext: inlineValue === undefined };
164
142
  }
143
+ const parsed = Number(value);
144
+ if (!Number.isFinite(parsed)) {
145
+ throw new AppError('INVALID_ARGS', `Invalid ${labelForFlag(token)}: ${value}`);
146
+ }
147
+ if (typeof definition.min === 'number' && parsed < definition.min) {
148
+ throw new AppError('INVALID_ARGS', `Invalid ${labelForFlag(token)}: ${value}`);
149
+ }
150
+ if (typeof definition.max === 'number' && parsed > definition.max) {
151
+ throw new AppError('INVALID_ARGS', `Invalid ${labelForFlag(token)}: ${value}`);
152
+ }
153
+ return { value: Math.floor(parsed), consumeNext: inlineValue === undefined };
154
+ }
155
+
156
+ function labelForFlag(token: string): string {
157
+ return token.replace(/^-+/, '');
158
+ }
159
+
160
+ function looksLikeFlagToken(value: string): boolean {
161
+ if (!value.startsWith('-') || value === '-') return false;
162
+ const [token] = value.startsWith('--') ? splitLongFlag(value) : [value, undefined];
163
+ return getFlagDefinition(token) !== undefined;
164
+ }
165
+
166
+ function shouldTreatUnknownDashTokenAsPositional(
167
+ command: string | null,
168
+ positionals: string[],
169
+ arg: string,
170
+ ): boolean {
171
+ if (!isNegativeNumericToken(arg)) return false;
172
+ if (!command) return false;
173
+ const schema = getCommandSchema(command);
174
+ if (!schema) return true;
175
+ if (schema.allowsExtraPositionals) return true;
176
+ if (schema.positionalArgs.length === 0) return false;
177
+ if (positionals.length < schema.positionalArgs.length) return true;
178
+ return schema.positionalArgs.some((entry) => entry.includes('?'));
179
+ }
180
+
181
+ function isNegativeNumericToken(value: string): boolean {
182
+ return /^-\d+(\.\d+)?$/.test(value);
183
+ }
184
+
185
+ function formatUnsupportedFlagMessage(command: string | null, unsupported: string[]): string {
186
+ if (!command) {
187
+ return unsupported.length === 1
188
+ ? `Flag ${unsupported[0]} requires a command that supports it.`
189
+ : `Flags ${unsupported.join(', ')} require a command that supports them.`;
190
+ }
191
+ return unsupported.length === 1
192
+ ? `Flag ${unsupported[0]} is not supported for command ${command}.`
193
+ : `Flags ${unsupported.join(', ')} are not supported for command ${command}.`;
194
+ }
165
195
 
166
- const command = positionals.shift() ?? null;
167
- return { command, positionals, flags };
196
+ export function toDaemonFlags(flags: CliFlags): Omit<CliFlags, 'json' | 'help' | 'version'> {
197
+ const { json: _json, help: _help, version: _version, ...daemonFlags } = flags;
198
+ return daemonFlags;
168
199
  }
169
200
 
170
201
  export function usage(): string {
171
- return `agent-device <command> [args] [--json]
172
-
173
- CLI to control iOS and Android devices for AI agents.
174
-
175
- Commands:
176
- boot Ensure target device/simulator is booted and ready
177
- open [app] Boot device/simulator; optionally launch app
178
- close [app] Close app or just end session
179
- reinstall <app> <path> Uninstall + install app from binary path
180
- snapshot [-i] [-c] [-d <depth>] [-s <scope>] [--raw] [--backend ax|xctest]
181
- Capture accessibility tree
182
- -i Interactive elements only
183
- -c Compact output (drop empty structure)
184
- -d <depth> Limit snapshot depth
185
- -s <scope> Scope snapshot to label/identifier
186
- --raw Raw node output
187
- --backend ax|xctest xctest: default; XCTest snapshot (slower, no permissions)
188
- ax: macOS Accessibility tree (fast, needs permissions)
189
- devices List available devices
190
- apps [--user-installed|--all|--metadata] List installed apps (Android launchable by default, iOS simulator)
191
- appstate Show foreground app/activity
192
- back Navigate back (where supported)
193
- home Go to home screen (where supported)
194
- app-switcher Open app switcher (where supported)
195
- wait <ms>|text <text>|@ref|<selector> [timeoutMs]
196
- Wait for duration, text, ref, or selector to appear
197
- alert [get|accept|dismiss|wait] [timeout] Inspect or handle alert (iOS simulator)
198
- click <@ref|selector> Click element by snapshot ref or selector
199
- get text <@ref|selector> Return element text by ref or selector
200
- get attrs <@ref|selector> Return element attributes by ref or selector
201
- replay <path> [--update|-u] Replay a recorded session
202
- press <x> <y> Tap at coordinates
203
- long-press <x> <y> [durationMs] Long press (where supported)
204
- focus <x> <y> Focus input at coordinates
205
- type <text> Type text in focused field
206
- fill <x> <y> <text> | fill <@ref|selector> <text>
207
- Tap then type
208
- scroll <direction> [amount] Scroll in direction (0-1 amount)
209
- scrollintoview <text> Scroll until text appears (Android only)
210
- screenshot [path] Capture screenshot
211
- record start [path] Start screen recording
212
- record stop Stop screen recording
213
- trace start [path] Start trace log capture
214
- trace stop [path] Stop trace log capture
215
- find <text> <action> [value] Find by any text (label/value/id)
216
- find text <text> <action> [value] Find by text content
217
- find label <label> <action> [value] Find by label
218
- find value <value> <action> [value] Find by value
219
- find role <role> <action> [value] Find by role/type
220
- find id <id> <action> [value] Find by identifier/resource-id
221
- is <predicate> <selector> [value] Assert UI state (visible|hidden|exists|editable|selected|text)
222
- settings <wifi|airplane|location> <on|off> Toggle OS settings (simulators)
223
- session list List active sessions
224
-
225
- Flags:
226
- --platform ios|android Platform to target
227
- --device <name> Device name to target
228
- --udid <udid> iOS device UDID
229
- --serial <serial> Android device serial
230
- --activity <component> Android activity to launch (package/Activity)
231
- --session <name> Named session
232
- --verbose Stream daemon/runner logs
233
- --json JSON output
234
- --save-script Save session script (.ad) on close
235
- --no-record Do not record this action
236
- --update, -u Replay: update selectors and rewrite replay file in place
237
- --user-installed Apps: list user-installed packages (Android only)
238
- --all Apps: list all packages (Android only)
239
- --version, -V Print version and exit
240
- `;
202
+ return buildUsageText();
241
203
  }