agent-device 0.2.6 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,74 @@
1
+ import test from 'node:test';
2
+ import assert from 'node:assert/strict';
3
+ import { findBounds, parseUiHierarchy } from '../ui-hierarchy.ts';
4
+
5
+ test('parseUiHierarchy reads double-quoted Android node attributes', () => {
6
+ const xml =
7
+ '<hierarchy><node class="android.widget.TextView" text="Hello" content-desc="Greeting" resource-id="com.demo:id/title" bounds="[10,20][110,60]" clickable="true" enabled="true"/></hierarchy>';
8
+
9
+ const result = parseUiHierarchy(xml, 800, { raw: true });
10
+ assert.equal(result.nodes.length, 1);
11
+ assert.equal(result.nodes[0].value, 'Hello');
12
+ assert.equal(result.nodes[0].label, 'Hello');
13
+ assert.equal(result.nodes[0].identifier, 'com.demo:id/title');
14
+ assert.deepEqual(result.nodes[0].rect, { x: 10, y: 20, width: 100, height: 40 });
15
+ assert.equal(result.nodes[0].hittable, true);
16
+ assert.equal(result.nodes[0].enabled, true);
17
+ });
18
+
19
+ test('parseUiHierarchy reads single-quoted Android node attributes', () => {
20
+ const xml =
21
+ "<hierarchy><node class='android.widget.TextView' text='Hello' content-desc='Greeting' resource-id='com.demo:id/title' bounds='[10,20][110,60]' clickable='true' enabled='true'/></hierarchy>";
22
+
23
+ const result = parseUiHierarchy(xml, 800, { raw: true });
24
+ assert.equal(result.nodes.length, 1);
25
+ assert.equal(result.nodes[0].value, 'Hello');
26
+ assert.equal(result.nodes[0].label, 'Hello');
27
+ assert.equal(result.nodes[0].identifier, 'com.demo:id/title');
28
+ assert.deepEqual(result.nodes[0].rect, { x: 10, y: 20, width: 100, height: 40 });
29
+ assert.equal(result.nodes[0].hittable, true);
30
+ assert.equal(result.nodes[0].enabled, true);
31
+ });
32
+
33
+ test('parseUiHierarchy supports mixed quote styles in one node', () => {
34
+ const xml =
35
+ '<hierarchy><node class="android.widget.TextView" text=\'Hello\' content-desc="Greeting" resource-id=\'com.demo:id/title\' bounds="[10,20][110,60]"/></hierarchy>';
36
+
37
+ const result = parseUiHierarchy(xml, 800, { raw: true });
38
+ assert.equal(result.nodes.length, 1);
39
+ assert.equal(result.nodes[0].value, 'Hello');
40
+ assert.equal(result.nodes[0].label, 'Hello');
41
+ assert.equal(result.nodes[0].identifier, 'com.demo:id/title');
42
+ });
43
+
44
+ test('findBounds supports single and double quoted attributes', () => {
45
+ const xml = [
46
+ '<hierarchy>',
47
+ '<node text="Nothing" content-desc="Irrelevant" bounds="[0,0][10,10]"/>',
48
+ "<node text='Target from single quote' content-desc='Alt single' bounds='[100,200][300,500]'/>",
49
+ '<node text="Target from double quote" content-desc="Alt double" bounds="[50,50][150,250]"/>',
50
+ '</hierarchy>',
51
+ ].join('');
52
+
53
+ assert.deepEqual(findBounds(xml, 'single quote'), { x: 200, y: 350 });
54
+ assert.deepEqual(findBounds(xml, 'alt double'), { x: 100, y: 150 });
55
+ });
56
+
57
+ test('parseUiHierarchy ignores attribute-name prefix spoofing', () => {
58
+ const xml =
59
+ "<hierarchy><node class='android.widget.TextView' hint-text='Spoofed' text='Actual' bounds='[10,20][110,60]'/></hierarchy>";
60
+
61
+ const result = parseUiHierarchy(xml, 800, { raw: true });
62
+ assert.equal(result.nodes.length, 1);
63
+ assert.equal(result.nodes[0].value, 'Actual');
64
+ });
65
+
66
+ test('findBounds ignores bounds-like fragments inside other attribute values', () => {
67
+ const xml = [
68
+ '<hierarchy>',
69
+ "<node text='Target' content-desc=\"metadata bounds='[900,900][1000,1000]'\" bounds='[100,200][300,500]'/>",
70
+ '</hierarchy>',
71
+ ].join('');
72
+
73
+ assert.deepEqual(findBounds(xml, 'target'), { x: 200, y: 350 });
74
+ });
@@ -3,8 +3,9 @@ import { runCmd, whichCmd } from '../../utils/exec.ts';
3
3
  import { withRetry } from '../../utils/retry.ts';
4
4
  import { AppError } from '../../utils/errors.ts';
5
5
  import type { DeviceInfo } from '../../utils/device.ts';
6
- import type { RawSnapshotNode, Rect, SnapshotOptions } from '../../utils/snapshot.ts';
6
+ import type { RawSnapshotNode, SnapshotOptions } from '../../utils/snapshot.ts';
7
7
  import { waitForAndroidBoot } from './devices.ts';
8
+ import { findBounds, parseBounds, parseUiHierarchy, readNodeAttributes } from './ui-hierarchy.ts';
8
9
 
9
10
  const ALIASES: Record<string, { type: 'intent' | 'package'; value: string }> = {
10
11
  settings: { type: 'intent', value: 'android.settings.SETTINGS' },
@@ -449,12 +450,52 @@ async function dumpUiHierarchy(device: DeviceInfo): Promise<string> {
449
450
  }
450
451
 
451
452
  async function dumpUiHierarchyOnce(device: DeviceInfo): Promise<string> {
452
- await runCmd(
453
+ // Preferred: stream XML directly to stdout, avoiding file I/O race conditions.
454
+ const streamed = await runCmd(
453
455
  'adb',
454
- adbArgs(device, ['shell', 'uiautomator', 'dump', '/sdcard/window_dump.xml']),
456
+ adbArgs(device, ['exec-out', 'uiautomator', 'dump', '/dev/tty']),
457
+ { allowFailure: true },
455
458
  );
456
- const result = await runCmd('adb', adbArgs(device, ['shell', 'cat', '/sdcard/window_dump.xml']));
457
- return result.stdout;
459
+ if (streamed.exitCode === 0) {
460
+ const fromStream = extractUiDumpXml(streamed.stdout, streamed.stderr);
461
+ if (fromStream) return fromStream;
462
+ }
463
+
464
+ // Fallback: dump to file and read back.
465
+ // If `cat` fails with "no such file", the outer withRetry (via isRetryableAdbError) handles it.
466
+ const dumpPath = '/sdcard/window_dump.xml';
467
+ const dumpResult = await runCmd(
468
+ 'adb',
469
+ adbArgs(device, ['shell', 'uiautomator', 'dump', dumpPath]),
470
+ );
471
+ const actualPath = resolveDumpPath(dumpPath, dumpResult.stdout, dumpResult.stderr);
472
+
473
+ const result = await runCmd('adb', adbArgs(device, ['shell', 'cat', actualPath]));
474
+ const xml = extractUiDumpXml(result.stdout, result.stderr);
475
+ if (!xml) {
476
+ throw new AppError('COMMAND_FAILED', 'uiautomator dump did not return XML', {
477
+ stdout: result.stdout,
478
+ stderr: result.stderr,
479
+ });
480
+ }
481
+ return xml;
482
+ }
483
+
484
+ function resolveDumpPath(defaultPath: string, stdout: string, stderr: string): string {
485
+ const text = `${stdout}\n${stderr}`;
486
+ const match = /dumped to:\s*(\S+)/i.exec(text);
487
+ return match?.[1] ?? defaultPath;
488
+ }
489
+
490
+ function extractUiDumpXml(stdout: string, stderr: string): string | null {
491
+ const text = `${stdout}\n${stderr}`;
492
+ const start = text.indexOf('<?xml');
493
+ const hierarchyStart = start >= 0 ? start : text.indexOf('<hierarchy');
494
+ if (hierarchyStart < 0) return null;
495
+ const end = text.lastIndexOf('</hierarchy>');
496
+ if (end < 0 || end < hierarchyStart) return null;
497
+ const xml = text.slice(hierarchyStart, end + '</hierarchy>'.length).trim();
498
+ return xml.length > 0 ? xml : null;
458
499
  }
459
500
 
460
501
  function isRetryableAdbError(err: unknown): boolean {
@@ -467,6 +508,7 @@ function isRetryableAdbError(err: unknown): boolean {
467
508
  if (stderr.includes('connection reset')) return true;
468
509
  if (stderr.includes('broken pipe')) return true;
469
510
  if (stderr.includes('timed out')) return true;
511
+ if (stderr.includes('no such file or directory')) return true;
470
512
  return false;
471
513
  }
472
514
 
@@ -582,291 +624,3 @@ async function sleep(ms: number): Promise<void> {
582
624
  function clampCount(value: number, min: number, max: number): number {
583
625
  return Math.max(min, Math.min(max, value));
584
626
  }
585
-
586
- function findBounds(xml: string, query: string): { x: number; y: number } | null {
587
- const q = query.toLowerCase();
588
- const nodeRegex = /<node[^>]+>/g;
589
- let match = nodeRegex.exec(xml);
590
- while (match) {
591
- const node = match[0];
592
- const textMatch = /text="([^"]*)"/.exec(node);
593
- const descMatch = /content-desc="([^"]*)"/.exec(node);
594
- const textVal = (textMatch?.[1] ?? '').toLowerCase();
595
- const descVal = (descMatch?.[1] ?? '').toLowerCase();
596
- if (textVal.includes(q) || descVal.includes(q)) {
597
- const boundsMatch = /bounds="\[(\d+),(\d+)\]\[(\d+),(\d+)\]"/.exec(node);
598
- if (boundsMatch) {
599
- const x1 = Number(boundsMatch[1]);
600
- const y1 = Number(boundsMatch[2]);
601
- const x2 = Number(boundsMatch[3]);
602
- const y2 = Number(boundsMatch[4]);
603
- return { x: Math.floor((x1 + x2) / 2), y: Math.floor((y1 + y2) / 2) };
604
- }
605
- return { x: 0, y: 0 };
606
- }
607
- match = nodeRegex.exec(xml);
608
- }
609
- return null;
610
- }
611
-
612
- function parseUiHierarchy(
613
- xml: string,
614
- maxNodes: number,
615
- options: SnapshotOptions,
616
- ): { nodes: RawSnapshotNode[]; truncated?: boolean } {
617
- const tree = parseUiHierarchyTree(xml);
618
- const nodes: RawSnapshotNode[] = [];
619
- let truncated = false;
620
- const maxDepth = options.depth ?? Number.POSITIVE_INFINITY;
621
- const scopedRoot = options.scope ? findScopeNode(tree, options.scope) : null;
622
- const roots = scopedRoot ? [scopedRoot] : tree.children;
623
-
624
- const interactiveDescendantMemo = new Map<AndroidNode, boolean>();
625
- const hasInteractiveDescendant = (node: AndroidNode): boolean => {
626
- const cached = interactiveDescendantMemo.get(node);
627
- if (cached !== undefined) return cached;
628
- for (const child of node.children) {
629
- if (child.hittable || hasInteractiveDescendant(child)) {
630
- interactiveDescendantMemo.set(node, true);
631
- return true;
632
- }
633
- }
634
- interactiveDescendantMemo.set(node, false);
635
- return false;
636
- };
637
-
638
- const walk = (
639
- node: AndroidNode,
640
- depth: number,
641
- parentIndex?: number,
642
- ancestorHittable: boolean = false,
643
- ancestorCollection: boolean = false,
644
- ) => {
645
- if (nodes.length >= maxNodes) {
646
- truncated = true;
647
- return;
648
- }
649
- if (depth > maxDepth) return;
650
-
651
- const include = options.raw
652
- ? true
653
- : shouldIncludeAndroidNode(
654
- node,
655
- options,
656
- ancestorHittable,
657
- hasInteractiveDescendant(node),
658
- ancestorCollection,
659
- );
660
- let currentIndex = parentIndex;
661
- if (include) {
662
- currentIndex = nodes.length;
663
- nodes.push({
664
- index: currentIndex,
665
- type: node.type ?? undefined,
666
- label: node.label ?? undefined,
667
- value: node.value ?? undefined,
668
- identifier: node.identifier ?? undefined,
669
- rect: node.rect,
670
- enabled: node.enabled,
671
- hittable: node.hittable,
672
- depth,
673
- parentIndex,
674
- });
675
- }
676
- const nextAncestorHittable = ancestorHittable || Boolean(node.hittable);
677
- const nextAncestorCollection = ancestorCollection || isCollectionContainerType(node.type);
678
- for (const child of node.children) {
679
- walk(child, depth + 1, currentIndex, nextAncestorHittable, nextAncestorCollection);
680
- if (truncated) return;
681
- }
682
- };
683
-
684
- for (const root of roots) {
685
- walk(root, 0, undefined, false, false);
686
- if (truncated) break;
687
- }
688
-
689
- return truncated ? { nodes, truncated } : { nodes };
690
- }
691
-
692
- function readNodeAttributes(node: string): {
693
- text: string | null;
694
- desc: string | null;
695
- resourceId: string | null;
696
- className: string | null;
697
- bounds: string | null;
698
- clickable?: boolean;
699
- enabled?: boolean;
700
- focusable?: boolean;
701
- focused?: boolean;
702
- } {
703
- const getAttr = (name: string): string | null => {
704
- const regex = new RegExp(`${name}="([^"]*)"`);
705
- const match = regex.exec(node);
706
- return match ? match[1] : null;
707
- };
708
- const boolAttr = (name: string): boolean | undefined => {
709
- const raw = getAttr(name);
710
- if (raw === null) return undefined;
711
- return raw === 'true';
712
- };
713
- return {
714
- text: getAttr('text'),
715
- desc: getAttr('content-desc'),
716
- resourceId: getAttr('resource-id'),
717
- className: getAttr('class'),
718
- bounds: getAttr('bounds'),
719
- clickable: boolAttr('clickable'),
720
- enabled: boolAttr('enabled'),
721
- focusable: boolAttr('focusable'),
722
- focused: boolAttr('focused'),
723
- };
724
- }
725
-
726
- function parseBounds(bounds: string | null): Rect | undefined {
727
- if (!bounds) return undefined;
728
- const match = /\[(\d+),(\d+)\]\[(\d+),(\d+)\]/.exec(bounds);
729
- if (!match) return undefined;
730
- const x1 = Number(match[1]);
731
- const y1 = Number(match[2]);
732
- const x2 = Number(match[3]);
733
- const y2 = Number(match[4]);
734
- return { x: x1, y: y1, width: Math.max(0, x2 - x1), height: Math.max(0, y2 - y1) };
735
- }
736
-
737
- type AndroidNode = {
738
- type: string | null;
739
- label: string | null;
740
- value: string | null;
741
- identifier: string | null;
742
- rect?: Rect;
743
- enabled?: boolean;
744
- hittable?: boolean;
745
- depth: number;
746
- parentIndex?: number;
747
- children: AndroidNode[];
748
- };
749
-
750
- function parseUiHierarchyTree(xml: string): AndroidNode {
751
- const root: AndroidNode = {
752
- type: null,
753
- label: null,
754
- value: null,
755
- identifier: null,
756
- depth: -1,
757
- children: [],
758
- };
759
- const stack: AndroidNode[] = [root];
760
- const tokenRegex = /<node\b[^>]*>|<\/node>/g;
761
- let match = tokenRegex.exec(xml);
762
- while (match) {
763
- const token = match[0];
764
- if (token.startsWith('</node')) {
765
- if (stack.length > 1) stack.pop();
766
- match = tokenRegex.exec(xml);
767
- continue;
768
- }
769
- const attrs = readNodeAttributes(token);
770
- const rect = parseBounds(attrs.bounds);
771
- const parent = stack[stack.length - 1];
772
- const node: AndroidNode = {
773
- type: attrs.className,
774
- label: attrs.text || attrs.desc,
775
- value: attrs.text,
776
- identifier: attrs.resourceId,
777
- rect,
778
- enabled: attrs.enabled,
779
- hittable: attrs.clickable ?? attrs.focusable,
780
- depth: parent.depth + 1,
781
- parentIndex: undefined,
782
- children: [],
783
- };
784
- parent.children.push(node);
785
- if (!token.endsWith('/>')) {
786
- stack.push(node);
787
- }
788
- match = tokenRegex.exec(xml);
789
- }
790
- return root;
791
- }
792
-
793
- function shouldIncludeAndroidNode(
794
- node: AndroidNode,
795
- options: SnapshotOptions,
796
- ancestorHittable: boolean,
797
- descendantHittable: boolean,
798
- ancestorCollection: boolean,
799
- ): boolean {
800
- const type = normalizeAndroidType(node.type);
801
- const hasText = Boolean(node.label && node.label.trim().length > 0);
802
- const hasId = Boolean(node.identifier && node.identifier.trim().length > 0);
803
- const hasMeaningfulText = hasText && !isGenericAndroidId(node.label ?? '');
804
- const hasMeaningfulId = hasId && !isGenericAndroidId(node.identifier ?? '');
805
- const isStructural = isStructuralAndroidType(type);
806
- const isVisual = type === 'imageview' || type === 'imagebutton';
807
- if (options.interactiveOnly) {
808
- if (node.hittable) return true;
809
- // Keep text proxies for tappable rows while dropping structural noise.
810
- const proxyCandidate = hasMeaningfulText || hasMeaningfulId;
811
- if (!proxyCandidate) return false;
812
- if (isVisual) return false;
813
- if (isStructural && !ancestorCollection) return false;
814
- return ancestorHittable || descendantHittable || ancestorCollection;
815
- }
816
- if (options.compact) {
817
- return hasMeaningfulText || hasMeaningfulId || Boolean(node.hittable);
818
- }
819
- if (isStructural || isVisual) {
820
- if (node.hittable) return true;
821
- if (hasMeaningfulText) return true;
822
- if (hasMeaningfulId && descendantHittable) return true;
823
- return descendantHittable;
824
- }
825
- return true;
826
- }
827
-
828
- function isCollectionContainerType(type: string | null): boolean {
829
- if (!type) return false;
830
- const normalized = normalizeAndroidType(type);
831
- return (
832
- normalized.includes('recyclerview') ||
833
- normalized.includes('listview') ||
834
- normalized.includes('gridview')
835
- );
836
- }
837
-
838
- function normalizeAndroidType(type: string | null): string {
839
- if (!type) return '';
840
- return type.toLowerCase();
841
- }
842
-
843
- function isStructuralAndroidType(type: string): boolean {
844
- const short = type.split('.').pop() ?? type;
845
- return (
846
- short.includes('layout') ||
847
- short === 'viewgroup' ||
848
- short === 'view'
849
- );
850
- }
851
-
852
- function isGenericAndroidId(value: string): boolean {
853
- const trimmed = value.trim();
854
- if (!trimmed) return false;
855
- return /^[\w.]+:id\/[\w.-]+$/i.test(trimmed);
856
- }
857
-
858
- function findScopeNode(root: AndroidNode, scope: string): AndroidNode | null {
859
- const query = scope.toLowerCase();
860
- const stack: AndroidNode[] = [...root.children];
861
- while (stack.length > 0) {
862
- const node = stack.shift() as AndroidNode;
863
- const label = node.label?.toLowerCase() ?? '';
864
- const value = node.value?.toLowerCase() ?? '';
865
- const identifier = node.identifier?.toLowerCase() ?? '';
866
- if (label.includes(query) || value.includes(query) || identifier.includes(query)) {
867
- return node;
868
- }
869
- stack.push(...node.children);
870
- }
871
- return null;
872
- }