agent-device 0.1.8 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -45,7 +45,7 @@ agent-device snapshot -c # Compact output
45
45
  agent-device snapshot -d 3 # Limit depth
46
46
  agent-device snapshot -s "Camera" # Scope to label/identifier
47
47
  agent-device snapshot --raw # Raw node output
48
- agent-device snapshot --backend hybrid # Default: best speed vs correctness trade-off (AX fast, XCTest complete)
48
+ agent-device snapshot --backend xctest # Default: full XCTest snapshot (most complete)
49
49
  agent-device snapshot --backend ax # macOS Accessibility tree (fast, needs permissions)
50
50
  agent-device snapshot --backend xctest # XCTest snapshot (slow, no permissions)
51
51
  ```
@@ -53,7 +53,7 @@ agent-device snapshot --backend xctest # XCTest snapshot (slow, no permissions)
53
53
  Hybrid will automatically fill empty containers (e.g. `group`, `tab bar`) by scoping XCTest to the container label.
54
54
  It is recommended because AX is fast but can miss UI details, while XCTest is slower but more complete.
55
55
  If you want explicit control or AX is unavailable, use `--backend xctest`.
56
- In practice, if AX returns a `Tab Bar` group with no children, hybrid will run a scoped XCTest snapshot for `Tab Bar` and insert those nodes under the group.
56
+ Use `--backend ax` when you need faster snapshots and can tolerate missing details.
57
57
 
58
58
  ### Find (semantic)
59
59
 
@@ -139,7 +139,9 @@ agent-device apps --platform android --user-installed
139
139
 
140
140
  - Always snapshot right before interactions; refs invalidate on UI changes.
141
141
  - Prefer `snapshot -i` to reduce output size.
142
- - On iOS, hybrid is the default and uses AX first, so Accessibility permission is still required.
142
+ - On iOS, `xctest` is the default and does not require Accessibility permission.
143
+ - If XCTest returns 0 nodes (foreground app changed), agent-device falls back to AX when available.
144
+ - `open <app>` can be used within an existing session to switch apps and update the session bundle id.
143
145
  - If AX returns the Simulator window or empty tree, restart Simulator or use `--backend xctest`.
144
146
  - Use `--session <name>` for parallel sessions; avoid device contention.
145
147
 
@@ -46,4 +46,4 @@ agent-device snapshot -i -s @e3 --platform ios
46
46
 
47
47
  - Ref not found: re-snapshot.
48
48
  - AX returns Simulator window: restart Simulator and re-run.
49
- - AX empty: verify Accessibility permission or use `--backend xctest` (hybrid is recommended because AX is fast but can miss UI details, while XCTest is slower but more complete).
49
+ - AX empty: verify Accessibility permission or use `--backend xctest` (XCTest is slower but more complete).
@@ -28,7 +28,7 @@ export type CommandFlags = {
28
28
  snapshotDepth?: number;
29
29
  snapshotScope?: string;
30
30
  snapshotRaw?: boolean;
31
- snapshotBackend?: 'ax' | 'xctest' | 'hybrid';
31
+ snapshotBackend?: 'ax' | 'xctest';
32
32
  noRecord?: boolean;
33
33
  recordJson?: boolean;
34
34
  appsFilter?: 'launchable' | 'user-installed' | 'all';
@@ -83,7 +83,7 @@ export async function dispatchCommand(
83
83
  snapshotDepth?: number;
84
84
  snapshotScope?: string;
85
85
  snapshotRaw?: boolean;
86
- snapshotBackend?: 'ax' | 'xctest' | 'hybrid';
86
+ snapshotBackend?: 'ax' | 'xctest';
87
87
  },
88
88
  ): Promise<Record<string, unknown> | void> {
89
89
  const interactor = getInteractor(device);
@@ -283,7 +283,7 @@ export async function dispatchCommand(
283
283
  return { setting, state };
284
284
  }
285
285
  case 'snapshot': {
286
- const backend = context?.snapshotBackend ?? 'hybrid';
286
+ const backend = context?.snapshotBackend ?? 'xctest';
287
287
  if (device.platform === 'ios') {
288
288
  if (device.kind !== 'simulator') {
289
289
  throw new AppError(
@@ -295,25 +295,6 @@ export async function dispatchCommand(
295
295
  const ax = await snapshotAx(device, { traceLogPath: context?.traceLogPath });
296
296
  return { nodes: ax.nodes ?? [], truncated: false, backend: 'ax' };
297
297
  }
298
- if (backend === 'hybrid') {
299
- const ax = await snapshotAx(device, { traceLogPath: context?.traceLogPath });
300
- const axNodes = ax.nodes ?? [];
301
- const containers = findHybridContainers(axNodes);
302
- if (containers.length === 0) {
303
- return { nodes: axNodes, truncated: false, backend: 'hybrid' };
304
- }
305
- const merged = await fillHybridContainers(device, axNodes, containers, {
306
- appBundleId: context?.appBundleId,
307
- interactiveOnly: context?.snapshotInteractiveOnly,
308
- compact: context?.snapshotCompact,
309
- depth: context?.snapshotDepth,
310
- raw: context?.snapshotRaw,
311
- verbose: context?.verbose,
312
- logPath: context?.logPath,
313
- traceLogPath: context?.traceLogPath,
314
- });
315
- return { nodes: merged.nodes, truncated: merged.truncated, backend: 'hybrid' };
316
- }
317
298
  const result = (await runIosRunnerCommand(
318
299
  device,
319
300
  {
@@ -327,7 +308,16 @@ export async function dispatchCommand(
327
308
  },
328
309
  { verbose: context?.verbose, logPath: context?.logPath, traceLogPath: context?.traceLogPath },
329
310
  )) as { nodes?: RawSnapshotNode[]; truncated?: boolean };
330
- return { nodes: result.nodes ?? [], truncated: result.truncated ?? false, backend: 'xctest' };
311
+ const nodes = result.nodes ?? [];
312
+ if (nodes.length === 0) {
313
+ try {
314
+ const ax = await snapshotAx(device, { traceLogPath: context?.traceLogPath });
315
+ return { nodes: ax.nodes ?? [], truncated: false, backend: 'ax' };
316
+ } catch {
317
+ // keep the empty XCTest snapshot if AX is unavailable
318
+ }
319
+ }
320
+ return { nodes, truncated: result.truncated ?? false, backend: 'xctest' };
331
321
  }
332
322
  const androidResult = await snapshotAndroid(device, {
333
323
  interactiveOnly: context?.snapshotInteractiveOnly,
@@ -343,116 +333,6 @@ export async function dispatchCommand(
343
333
  }
344
334
  }
345
335
 
346
- type HybridContainer = {
347
- index: number;
348
- depth: number;
349
- label?: string;
350
- identifier?: string;
351
- type?: string;
352
- };
353
-
354
- const hybridContainerTypes = new Set(['tabbar', 'toolbar', 'group']);
355
-
356
- function findHybridContainers(nodes: RawSnapshotNode[]): HybridContainer[] {
357
- const containers: HybridContainer[] = [];
358
- for (let i = 0; i < nodes.length; i += 1) {
359
- const node = nodes[i];
360
- const depth = node.depth ?? 0;
361
- const nextDepth = nodes[i + 1]?.depth ?? -1;
362
- if (nextDepth > depth) continue;
363
- const normalized = normalizeSnapshotType(node.type);
364
- if (!hybridContainerTypes.has(normalized)) continue;
365
- containers.push({
366
- index: i,
367
- depth,
368
- label: node.label,
369
- identifier: node.identifier,
370
- type: node.type,
371
- });
372
- }
373
- return containers;
374
- }
375
-
376
- async function fillHybridContainers(
377
- device: DeviceInfo,
378
- axNodes: RawSnapshotNode[],
379
- containers: HybridContainer[],
380
- options: {
381
- appBundleId?: string;
382
- interactiveOnly?: boolean;
383
- compact?: boolean;
384
- depth?: number;
385
- raw?: boolean;
386
- verbose?: boolean;
387
- logPath?: string;
388
- traceLogPath?: string;
389
- },
390
- ): Promise<{ nodes: RawSnapshotNode[]; truncated: boolean }> {
391
- let merged = [...axNodes];
392
- let truncated = false;
393
- let offset = 0;
394
- for (const container of containers) {
395
- const scope = resolveContainerScope(container);
396
- if (!scope) continue;
397
- const result = (await runIosRunnerCommand(
398
- device,
399
- {
400
- command: 'snapshot',
401
- appBundleId: options.appBundleId,
402
- interactiveOnly: options.interactiveOnly,
403
- compact: options.compact,
404
- depth: options.depth,
405
- scope,
406
- raw: options.raw,
407
- },
408
- { verbose: options.verbose, logPath: options.logPath, traceLogPath: options.traceLogPath },
409
- )) as { nodes?: RawSnapshotNode[]; truncated?: boolean };
410
- if (result.truncated) truncated = true;
411
- const filtered = (result.nodes ?? []).filter((node) => {
412
- const normalized = normalizeSnapshotType(node.type);
413
- return normalized !== 'application' && normalized !== 'window';
414
- });
415
- if (filtered.length === 0) continue;
416
- const adjusted = adjustDepths(filtered, container.depth + 1);
417
- merged.splice(container.index + 1 + offset, 0, ...adjusted);
418
- offset += adjusted.length;
419
- }
420
- merged = merged.map((node, index) => ({ ...node, index }));
421
- return { nodes: merged, truncated };
422
- }
423
-
424
- function adjustDepths(nodes: RawSnapshotNode[], baseDepth: number): RawSnapshotNode[] {
425
- let minDepth = Number.POSITIVE_INFINITY;
426
- for (const node of nodes) {
427
- const depth = node.depth ?? 0;
428
- if (depth < minDepth) minDepth = depth;
429
- }
430
- if (!Number.isFinite(minDepth)) minDepth = 0;
431
- return nodes.map((node) => ({
432
- ...node,
433
- depth: baseDepth + (node.depth ?? 0) - minDepth,
434
- }));
435
- }
436
-
437
- function normalizeSnapshotType(type?: string): string {
438
- if (!type) return '';
439
- let value = type.replace(/XCUIElementType/gi, '').toLowerCase();
440
- if (value.startsWith('ax')) {
441
- value = value.replace(/^ax/, '');
442
- }
443
- return value;
444
- }
445
-
446
- function resolveContainerScope(container: HybridContainer): string | null {
447
- const candidates = [container.label, container.identifier];
448
- for (const candidate of candidates) {
449
- if (!candidate) continue;
450
- const value = candidate.trim();
451
- if (value) return value;
452
- }
453
- return null;
454
- }
455
-
456
336
  function invertScrollDirection(direction: 'up' | 'down' | 'left' | 'right'): 'up' | 'down' | 'left' | 'right' {
457
337
  switch (direction) {
458
338
  case 'up':
package/src/daemon.ts CHANGED
@@ -63,7 +63,7 @@ type SessionAction = {
63
63
  snapshotDepth?: number;
64
64
  snapshotScope?: string;
65
65
  snapshotRaw?: boolean;
66
- snapshotBackend?: 'ax' | 'xctest' | 'hybrid';
66
+ snapshotBackend?: 'ax' | 'xctest';
67
67
  noRecord?: boolean;
68
68
  recordJson?: boolean;
69
69
  };
@@ -91,7 +91,7 @@ function contextFromFlags(
91
91
  snapshotCompact?: boolean;
92
92
  snapshotDepth?: number;
93
93
  snapshotScope?: string;
94
- snapshotBackend?: 'ax' | 'xctest' | 'hybrid';
94
+ snapshotBackend?: 'ax' | 'xctest';
95
95
  snapshotRaw?: boolean;
96
96
  } {
97
97
  return {
@@ -244,13 +244,43 @@ async function handleRequest(req: DaemonRequest): Promise<DaemonResponse> {
244
244
 
245
245
  if (command === 'open') {
246
246
  if (sessions.has(sessionName)) {
247
- return {
248
- ok: false,
249
- error: {
250
- code: 'INVALID_ARGS',
251
- message: 'Session already active. Close it first or pass a new --session name.',
252
- },
247
+ const session = sessions.get(sessionName);
248
+ const appName = req.positionals?.[0];
249
+ if (!session || !appName) {
250
+ return {
251
+ ok: false,
252
+ error: {
253
+ code: 'INVALID_ARGS',
254
+ message: 'Session already active. Close it first or pass a new --session name.',
255
+ },
256
+ };
257
+ }
258
+ let appBundleId: string | undefined;
259
+ if (session.device.platform === 'ios') {
260
+ try {
261
+ const { resolveIosApp } = await import('./platforms/ios/index.ts');
262
+ appBundleId = await resolveIosApp(session.device, appName);
263
+ } catch {
264
+ appBundleId = undefined;
265
+ }
266
+ }
267
+ await dispatchCommand(session.device, 'open', req.positionals ?? [], req.flags?.out, {
268
+ ...contextFromFlags(req.flags, appBundleId),
269
+ });
270
+ const nextSession: SessionState = {
271
+ ...session,
272
+ appBundleId,
273
+ appName,
274
+ snapshot: undefined,
253
275
  };
276
+ recordAction(nextSession, {
277
+ command,
278
+ positionals: req.positionals ?? [],
279
+ flags: req.flags ?? {},
280
+ result: { session: sessionName, appName, appBundleId },
281
+ });
282
+ sessions.set(sessionName, nextSession);
283
+ return { ok: true, data: { session: sessionName, appName, appBundleId } };
254
284
  }
255
285
  const device = await resolveTargetDevice(req.flags ?? {});
256
286
  await ensureDeviceReady(device);
@@ -377,7 +407,7 @@ async function handleRequest(req: DaemonRequest): Promise<DaemonResponse> {
377
407
  })) as {
378
408
  nodes?: RawSnapshotNode[];
379
409
  truncated?: boolean;
380
- backend?: 'ax' | 'xctest' | 'hybrid' | 'android';
410
+ backend?: 'ax' | 'xctest' | 'android';
381
411
  };
382
412
  const rawNodes = data?.nodes ?? [];
383
413
  const nodes = attachRefs(req.flags?.snapshotRaw ? rawNodes : pruneGroupNodes(rawNodes));
@@ -767,7 +797,7 @@ async function handleRequest(req: DaemonRequest): Promise<DaemonResponse> {
767
797
  })) as {
768
798
  nodes?: RawSnapshotNode[];
769
799
  truncated?: boolean;
770
- backend?: 'ax' | 'xctest' | 'hybrid' | 'android';
800
+ backend?: 'ax' | 'xctest' | 'android';
771
801
  };
772
802
  const rawNodes = data?.nodes ?? [];
773
803
  const nodes = attachRefs(req.flags?.snapshotRaw ? rawNodes : pruneGroupNodes(rawNodes));
@@ -965,26 +995,6 @@ async function handleRequest(req: DaemonRequest): Promise<DaemonResponse> {
965
995
  return { ok: false, error: { code: 'COMMAND_FAILED', message: `Ref ${refInput} not found or has no bounds` } };
966
996
  }
967
997
  const refLabel = resolveRefLabel(node, session.snapshot.nodes);
968
- const label = node.label?.trim();
969
- if (
970
- session.device.platform === 'ios' &&
971
- session.device.kind === 'simulator' &&
972
- label &&
973
- isLabelUnique(session.snapshot.nodes, label)
974
- ) {
975
- await runIosRunnerCommand(
976
- session.device,
977
- { command: 'tap', text: label, appBundleId: session.appBundleId },
978
- { verbose: req.flags?.verbose, logPath, traceLogPath: session?.trace?.outPath },
979
- );
980
- recordAction(session, {
981
- command,
982
- positionals: req.positionals ?? [],
983
- flags: req.flags ?? {},
984
- result: { ref, refLabel: label, mode: 'text' },
985
- });
986
- return { ok: true, data: { ref, mode: 'text' } };
987
- }
988
998
  const { x, y } = centerOfRect(node.rect);
989
999
  await dispatchCommand(session.device, 'press', [String(x), String(y)], req.flags?.out, {
990
1000
  ...contextFromFlags(req.flags, session.appBundleId, session.trace?.outPath),
@@ -1022,45 +1032,29 @@ async function handleRequest(req: DaemonRequest): Promise<DaemonResponse> {
1022
1032
  }
1023
1033
  const refLabel = resolveRefLabel(node, session.snapshot.nodes);
1024
1034
  const label = node.label?.trim();
1025
- if (session.device.platform === 'ios' && session.device.kind === 'simulator') {
1026
- if (refLabel && isTextInputType(node.type)) {
1027
- await runIosRunnerCommand(
1028
- session.device,
1029
- { command: 'tap', text: refLabel, appBundleId: session.appBundleId },
1030
- { verbose: req.flags?.verbose, logPath, traceLogPath: session?.trace?.outPath },
1031
- );
1032
- await runIosRunnerCommand(
1033
- session.device,
1034
- { command: 'type', text, appBundleId: session.appBundleId },
1035
- { verbose: req.flags?.verbose, logPath, traceLogPath: session?.trace?.outPath },
1036
- );
1037
- recordAction(session, {
1038
- command,
1039
- positionals: req.positionals ?? [],
1040
- flags: req.flags ?? {},
1041
- result: { ref, refLabel, mode: 'text' },
1042
- });
1043
- return { ok: true, data: { ref, mode: 'text' } };
1044
- }
1045
- if (label && isLabelUnique(session.snapshot.nodes, label)) {
1046
- await runIosRunnerCommand(
1047
- session.device,
1048
- { command: 'tap', text: label, appBundleId: session.appBundleId },
1049
- { verbose: req.flags?.verbose, logPath, traceLogPath: session?.trace?.outPath },
1050
- );
1051
- await runIosRunnerCommand(
1052
- session.device,
1053
- { command: 'type', text, appBundleId: session.appBundleId },
1054
- { verbose: req.flags?.verbose, logPath, traceLogPath: session?.trace?.outPath },
1055
- );
1056
- recordAction(session, {
1057
- command,
1058
- positionals: req.positionals ?? [],
1059
- flags: req.flags ?? {},
1060
- result: { ref, refLabel: label, mode: 'text' },
1061
- });
1062
- return { ok: true, data: { ref, mode: 'text' } };
1035
+ if (session.device.platform === 'ios' && session.device.kind === 'simulator' && isTextInputType(node.type)) {
1036
+ const coords = node.rect ? centerOfRect(node.rect) : null;
1037
+ if (!coords) {
1038
+ return {
1039
+ ok: false,
1040
+ error: { code: 'COMMAND_FAILED', message: `Ref ${req.positionals[0]} not found or has no bounds` },
1041
+ };
1063
1042
  }
1043
+ await dispatchCommand(session.device, 'focus', [String(coords.x), String(coords.y)], req.flags?.out, {
1044
+ ...contextFromFlags(req.flags, session.appBundleId, session.trace?.outPath),
1045
+ });
1046
+ await runIosRunnerCommand(
1047
+ session.device,
1048
+ { command: 'type', text, appBundleId: session.appBundleId },
1049
+ { verbose: req.flags?.verbose, logPath, traceLogPath: session?.trace?.outPath },
1050
+ );
1051
+ recordAction(session, {
1052
+ command,
1053
+ positionals: req.positionals ?? [],
1054
+ flags: req.flags ?? {},
1055
+ result: { ref, refLabel: refLabel ?? label, action: 'fill', text },
1056
+ });
1057
+ return { ok: true, data: { ref } };
1064
1058
  }
1065
1059
  const { x, y } = centerOfRect(node.rect);
1066
1060
  const data = await dispatchCommand(
package/src/utils/args.ts CHANGED
@@ -17,7 +17,7 @@ export type ParsedArgs = {
17
17
  snapshotDepth?: number;
18
18
  snapshotScope?: string;
19
19
  snapshotRaw?: boolean;
20
- snapshotBackend?: 'ax' | 'xctest' | 'hybrid';
20
+ snapshotBackend?: 'ax' | 'xctest';
21
21
  appsFilter?: 'launchable' | 'user-installed' | 'all';
22
22
  appsMetadata?: boolean;
23
23
  noRecord?: boolean;
@@ -81,7 +81,7 @@ export function parseArgs(argv: string[]): ParsedArgs {
81
81
  ? arg.split('=')[1]
82
82
  : argv[i + 1];
83
83
  if (!arg.includes('=')) i += 1;
84
- if (value !== 'ax' && value !== 'xctest' && value !== 'hybrid') {
84
+ if (value !== 'ax' && value !== 'xctest') {
85
85
  throw new AppError('INVALID_ARGS', `Invalid backend: ${value}`);
86
86
  }
87
87
  flags.snapshotBackend = value;
@@ -161,16 +161,15 @@ CLI to control iOS and Android devices for AI agents.
161
161
  Commands:
162
162
  open [app] Boot device/simulator; optionally launch app
163
163
  close [app] Close app or just end session
164
- snapshot [-i] [-c] [-d <depth>] [-s <scope>] [--raw] [--backend ax|xctest|hybrid]
164
+ snapshot [-i] [-c] [-d <depth>] [-s <scope>] [--raw] [--backend ax|xctest]
165
165
  Capture accessibility tree
166
166
  -i Interactive elements only
167
167
  -c Compact output (drop empty structure)
168
168
  -d <depth> Limit snapshot depth
169
169
  -s <scope> Scope snapshot to label/identifier
170
170
  --raw Raw node output
171
- --backend ax|xctest|hybrid hybrid: default; AX snapshot with XCTest fill for empty containers
171
+ --backend ax|xctest xctest: default; XCTest snapshot (slower, no permissions)
172
172
  ax: macOS Accessibility tree (fast, needs permissions)
173
- xctest: XCTest snapshot (slower, no permissions)
174
173
  devices List available devices
175
174
  apps [--user-installed|--all|--metadata] List installed apps (Android launchable by default, iOS simulator)
176
175
  appstate Show foreground app/activity
@@ -34,7 +34,7 @@ export type SnapshotState = {
34
34
  nodes: SnapshotNode[];
35
35
  createdAt: number;
36
36
  truncated?: boolean;
37
- backend?: 'ax' | 'xctest' | 'hybrid' | 'android';
37
+ backend?: 'ax' | 'xctest' | 'android';
38
38
  };
39
39
 
40
40
  export function attachRefs(nodes: RawSnapshotNode[]): SnapshotNode[] {