mobile-debug-mcp 0.15.0 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  import { execAdb, getAndroidDeviceMetadata, getDeviceInfo } from "../utils/android/utils.js";
2
2
  import { AndroidObserve } from "../observe/index.js";
3
- import { scrollToElementShared } from "../interact/shared/scroll_to_element.js";
3
+ import { scrollToElementShared } from "../utils/ui/index.js";
4
4
  export class AndroidInteract {
5
5
  observe = new AndroidObserve();
6
6
  async waitForElement(text, timeout, deviceId) {
@@ -2,6 +2,7 @@ import { AndroidInteract } from './android.js';
2
2
  import { iOSInteract } from './ios.js';
3
3
  export { AndroidInteract, iOSInteract };
4
4
  import { resolveTargetDevice } from '../utils/resolve-device.js';
5
+ import { ToolsObserve } from '../observe/index.js';
5
6
  export class ToolsInteract {
6
7
  static async getInteractionService(platform, deviceId) {
7
8
  const effectivePlatform = platform || 'android';
@@ -34,4 +35,42 @@ export class ToolsInteract {
34
35
  const { interact, resolved } = await ToolsInteract.getInteractionService(platform, deviceId);
35
36
  return await interact.scrollToElement(selector, direction, maxScrolls, scrollAmount, resolved.id);
36
37
  }
38
+ static async waitForScreenChangeHandler({ platform, previousFingerprint, timeoutMs = 5000, pollIntervalMs = 300, deviceId }) {
39
+ const start = Date.now();
40
+ let lastFingerprint = null;
41
+ while (Date.now() - start < timeoutMs) {
42
+ try {
43
+ const res = await ToolsObserve.getScreenFingerprintHandler({ platform, deviceId });
44
+ const fp = res?.fingerprint ?? null;
45
+ if (fp === null || fp === undefined) {
46
+ lastFingerprint = null;
47
+ await new Promise(resolve => setTimeout(resolve, pollIntervalMs));
48
+ continue;
49
+ }
50
+ lastFingerprint = fp;
51
+ if (fp !== previousFingerprint) {
52
+ // Stability confirmation
53
+ await new Promise(resolve => setTimeout(resolve, pollIntervalMs));
54
+ try {
55
+ const confirmRes = await ToolsObserve.getScreenFingerprintHandler({ platform, deviceId });
56
+ const confirmFp = confirmRes?.fingerprint ?? null;
57
+ if (confirmFp === fp) {
58
+ return { success: true, newFingerprint: fp, elapsedMs: Date.now() - start };
59
+ }
60
+ lastFingerprint = confirmFp;
61
+ continue;
62
+ }
63
+ catch {
64
+ // ignore and continue polling
65
+ continue;
66
+ }
67
+ }
68
+ }
69
+ catch {
70
+ // ignore transient errors
71
+ }
72
+ await new Promise(resolve => setTimeout(resolve, pollIntervalMs));
73
+ }
74
+ return { success: false, reason: 'timeout', lastFingerprint, elapsedMs: Date.now() - start };
75
+ }
37
76
  }
@@ -1,7 +1,7 @@
1
1
  import { spawn } from "child_process";
2
2
  import { getIOSDeviceMetadata, getIdbCmd, isIDBInstalled } from "../utils/ios/utils.js";
3
3
  import { iOSObserve } from "../observe/index.js";
4
- import { scrollToElementShared } from "../interact/shared/scroll_to_element.js";
4
+ import { scrollToElementShared } from "../utils/ui/index.js";
5
5
  export class iOSInteract {
6
6
  observe = new iOSObserve();
7
7
  async waitForElement(text, timeout, deviceId = "booted") {
@@ -1,72 +1 @@
1
- import crypto from 'crypto';
2
- const ANDROID_STRUCTURAL_TYPES = ['Window', 'Application', 'View', 'ViewGroup', 'LinearLayout', 'FrameLayout', 'RelativeLayout', 'ScrollView', 'RecyclerView', 'TextView', 'ImageView'];
3
- const IOS_STRUCTURAL_TYPES = ['Window', 'Application', 'View', 'ViewController', 'UITableView', 'UICollectionView', 'UILabel', 'UIImageView', 'UIView', 'UIWindow', 'UIStackView', 'UITextView', 'UITableViewCell'];
4
- function isDynamicText(t) {
5
- if (!t)
6
- return false;
7
- const txt = t.trim();
8
- if (!txt)
9
- return false;
10
- if (/\b\d{1,2}:\d{2}\b/.test(txt))
11
- return true;
12
- if (/\b\d{4}-\d{2}-\d{2}\b/.test(txt))
13
- return true;
14
- if (/^\d+(?:\.\d+)?%$/.test(txt))
15
- return true;
16
- if (/^\d+$/.test(txt))
17
- return true;
18
- if (/^[\d,]{1,10}$/.test(txt))
19
- return true;
20
- return false;
21
- }
22
- function normalizeElement(e) {
23
- return {
24
- type: (e.type || '').toString(),
25
- resourceId: (e.resourceId || '').toString(),
26
- text: typeof e.text === 'string' ? (isDynamicText(e.text) ? '' : e.text.trim().toLowerCase()) : '',
27
- contentDesc: (e.contentDescription || '').toString(),
28
- bounds: Array.isArray(e.bounds) ? e.bounds.slice(0, 4).map((n) => Number(n) || 0) : [0, 0, 0, 0]
29
- };
30
- }
31
- export function computeScreenFingerprint(tree, current, platform, limit = 50) {
32
- try {
33
- if (!tree || tree.error)
34
- return { fingerprint: null, error: tree.error };
35
- const activity = current && (current.activity || current.shortActivity) ? (current.activity || current.shortActivity) : '';
36
- const candidates = (tree.elements || []).filter(e => {
37
- if (!e)
38
- return false;
39
- if (!e.visible)
40
- return false;
41
- const hasStableText = typeof e.text === 'string' && e.text.trim().length > 0;
42
- const hasResource = !!e.resourceId;
43
- const interactable = !!e.clickable || !!e.enabled;
44
- const structuralList = platform === 'android' ? ANDROID_STRUCTURAL_TYPES : IOS_STRUCTURAL_TYPES;
45
- const structurallySignificant = hasStableText || hasResource || structuralList.includes(e.type || '');
46
- return interactable || structurallySignificant;
47
- });
48
- const normalized = candidates.map(normalizeElement);
49
- const filteredNormalized = normalized.filter(e => (e.text && e.text.length > 0) || (e.resourceId && e.resourceId.length > 0) || (e.contentDesc && e.contentDesc.length > 0));
50
- filteredNormalized.sort((a, b) => {
51
- const ay = (a.bounds && a.bounds[1]) || 0;
52
- const by = (b.bounds && b.bounds[1]) || 0;
53
- if (ay !== by)
54
- return ay - by;
55
- const ax = (a.bounds && a.bounds[0]) || 0;
56
- const bx = (b.bounds && b.bounds[0]) || 0;
57
- return ax - bx;
58
- });
59
- const limited = filteredNormalized.slice(0, Math.max(0, limit));
60
- const payload = {
61
- activity: platform === 'android' ? (activity || '') : '',
62
- resolution: tree.resolution || { width: 0, height: 0 },
63
- elements: limited.map(e => ({ type: e.type, resourceId: e.resourceId, text: e.text, contentDesc: e.contentDesc }))
64
- };
65
- const combined = JSON.stringify(payload);
66
- const hash = crypto.createHash('sha256').update(combined).digest('hex');
67
- return { fingerprint: hash, activity: activity };
68
- }
69
- catch (e) {
70
- return { fingerprint: null, error: e instanceof Error ? e.message : String(e) };
71
- }
72
- }
1
+ export { computeScreenFingerprint } from '../../utils/ui/index.js';
@@ -1,98 +1 @@
1
- export async function scrollToElementShared(opts) {
2
- const { selector, direction = 'down', maxScrolls = 10, scrollAmount = 0.7, deviceId, fetchTree, swipe, stabilizationDelayMs = 350 } = opts;
3
- const matchElement = (el) => {
4
- if (!el)
5
- return false;
6
- if (selector.text !== undefined && selector.text !== el.text)
7
- return false;
8
- if (selector.resourceId !== undefined && selector.resourceId !== el.resourceId)
9
- return false;
10
- if (selector.contentDesc !== undefined && selector.contentDesc !== el.contentDescription)
11
- return false;
12
- if (selector.className !== undefined && selector.className !== el.type)
13
- return false;
14
- return true;
15
- };
16
- const isVisible = (el, resolution) => {
17
- if (!el)
18
- return false;
19
- if (el.visible === false)
20
- return false;
21
- if (!el.bounds || !resolution || !resolution.width || !resolution.height)
22
- return (el.visible === undefined ? true : !!el.visible);
23
- const [left, top, right, bottom] = el.bounds;
24
- const withinY = bottom > 0 && top < resolution.height;
25
- const withinX = right > 0 && left < resolution.width;
26
- return withinX && withinY;
27
- };
28
- const findVisibleMatch = (elements, resolution) => {
29
- if (!Array.isArray(elements))
30
- return null;
31
- for (const e of elements) {
32
- if (matchElement(e) && isVisible(e, resolution))
33
- return e;
34
- }
35
- return null;
36
- };
37
- // Initial check
38
- let tree = await fetchTree();
39
- if (tree.error)
40
- return { success: false, reason: tree.error, scrollsPerformed: 0 };
41
- let found = findVisibleMatch(tree.elements, tree.resolution);
42
- if (found) {
43
- return { success: true, element: { text: found.text, resourceId: found.resourceId, bounds: found.bounds }, scrollsPerformed: 0 };
44
- }
45
- const fingerprintOf = (t) => {
46
- try {
47
- return JSON.stringify((t.elements || []).map((e) => ({ text: e.text, resourceId: e.resourceId, bounds: e.bounds })));
48
- }
49
- catch {
50
- return '';
51
- }
52
- };
53
- let prevFingerprint = fingerprintOf(tree);
54
- const width = (tree.resolution && tree.resolution.width) ? tree.resolution.width : 0;
55
- const height = (tree.resolution && tree.resolution.height) ? tree.resolution.height : 0;
56
- const centerX = Math.round(width / 2) || 50;
57
- const clampPct = (v) => Math.max(0.05, Math.min(0.95, v));
58
- const computeCoords = () => {
59
- const defaultStart = direction === 'down' ? 0.8 : 0.2;
60
- const startPct = clampPct(defaultStart);
61
- const endPct = clampPct(defaultStart + (direction === 'down' ? -scrollAmount : scrollAmount));
62
- const x1 = centerX;
63
- const x2 = centerX;
64
- const y1 = Math.round((height || 100) * startPct);
65
- const y2 = Math.round((height || 100) * endPct);
66
- return { x1, y1, x2, y2 };
67
- };
68
- const duration = 300;
69
- let scrollsPerformed = 0;
70
- for (let i = 0; i < maxScrolls; i++) {
71
- const { x1, y1, x2, y2 } = computeCoords();
72
- try {
73
- await swipe(x1, y1, x2, y2, duration, deviceId);
74
- }
75
- catch (e) {
76
- // Log swipe failures to aid debugging but don't fail the overall flow
77
- try {
78
- console.warn(`scrollToElement swipe failed: ${e instanceof Error ? e.message : String(e)}`);
79
- }
80
- catch { }
81
- }
82
- scrollsPerformed++;
83
- await new Promise(resolve => setTimeout(resolve, stabilizationDelayMs));
84
- tree = await fetchTree();
85
- if (tree.error)
86
- return { success: false, reason: tree.error, scrollsPerformed: scrollsPerformed };
87
- found = findVisibleMatch(tree.elements, tree.resolution);
88
- if (found) {
89
- return { success: true, element: { text: found.text, resourceId: found.resourceId, bounds: found.bounds }, scrollsPerformed };
90
- }
91
- const fp = fingerprintOf(tree);
92
- if (fp === prevFingerprint) {
93
- return { success: false, reason: 'UI unchanged after scroll; likely end of list', scrollsPerformed: scrollsPerformed };
94
- }
95
- prevFingerprint = fp;
96
- }
97
- return { success: false, reason: 'Element not found after scrolling', scrollsPerformed: scrollsPerformed };
98
- }
1
+ export { scrollToElementShared } from '../../utils/ui/index.js';
@@ -4,7 +4,7 @@ import { getAdbCmd, execAdb, getAndroidDeviceMetadata, getDeviceInfo, delay, get
4
4
  import { createWriteStream } from "fs";
5
5
  import { promises as fsPromises } from "fs";
6
6
  import path from "path";
7
- import { computeScreenFingerprint } from "../interact/shared/fingerprint.js";
7
+ import { computeScreenFingerprint } from "../utils/ui/index.js";
8
8
  const activeLogStreams = new Map();
9
9
  export class AndroidObserve {
10
10
  async getDeviceMetadata(appId, deviceId) {
@@ -4,7 +4,7 @@ import { execCommand, getIOSDeviceMetadata, validateBundleId, getIdbCmd, getXcru
4
4
  import { createWriteStream, promises as fsPromises } from 'fs';
5
5
  import path from 'path';
6
6
  import { parseLogLine } from '../utils/android/utils.js';
7
- import { computeScreenFingerprint } from '../interact/shared/fingerprint.js';
7
+ import { computeScreenFingerprint } from '../utils/ui/index.js';
8
8
  const delay = (ms) => new Promise(resolve => setTimeout(resolve, ms));
9
9
  function parseIDBFrame(frame) {
10
10
  if (!frame)
package/dist/server.js CHANGED
@@ -273,6 +273,21 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
273
273
  }
274
274
  }
275
275
  },
276
+ {
277
+ name: "wait_for_screen_change",
278
+ description: "Wait until the current screen fingerprint differs from a provided previousFingerprint. Useful to wait for navigation/animation completion.",
279
+ inputSchema: {
280
+ type: "object",
281
+ properties: {
282
+ platform: { type: "string", enum: ["android", "ios"], description: "Optional platform override (android|ios)" },
283
+ previousFingerprint: { type: "string", description: "The fingerprint to compare against (required)" },
284
+ timeoutMs: { type: "number", description: "Timeout in ms to wait for change (default 5000)", default: 5000 },
285
+ pollIntervalMs: { type: "number", description: "Polling interval in ms (default 300)", default: 300 },
286
+ deviceId: { type: "string", description: "Optional device id/udid to target" }
287
+ },
288
+ required: ["previousFingerprint"]
289
+ }
290
+ },
276
291
  {
277
292
  name: "wait_for_element",
278
293
  description: "Wait until a UI element with matching text appears on screen or timeout is reached.",
@@ -556,6 +571,11 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
556
571
  const res = await ToolsObserve.getScreenFingerprintHandler({ platform, deviceId });
557
572
  return wrapResponse(res);
558
573
  }
574
+ if (name === "wait_for_screen_change") {
575
+ const { platform, previousFingerprint, timeoutMs, pollIntervalMs, deviceId } = (args || {});
576
+ const res = await ToolsInteract.waitForScreenChangeHandler({ platform, previousFingerprint, timeoutMs, pollIntervalMs, deviceId });
577
+ return wrapResponse(res);
578
+ }
559
579
  if (name === "wait_for_element") {
560
580
  const { platform, text, timeout, deviceId } = (args || {});
561
581
  const res = await ToolsInteract.waitForElementHandler({ platform, text, timeout, deviceId });
@@ -1,7 +1,7 @@
1
- import { spawn } from 'child_process';
2
1
  import { promises as fsPromises, existsSync } from 'fs';
3
2
  import path from 'path';
4
3
  import { detectJavaHome } from '../java.js';
4
+ import { execCmd } from '../exec.js';
5
5
  export function getAdbCmd() { return process.env.ADB_PATH || 'adb'; }
6
6
  /**
7
7
  * Prepare Gradle execution options for building an Android project.
@@ -76,76 +76,20 @@ function getAdbTimeout(args, customTimeout) {
76
76
  return 20000;
77
77
  return 120000;
78
78
  }
79
- export function execAdb(args, deviceId, options = {}) {
79
+ export async function execAdb(args, deviceId, options = {}) {
80
80
  const adbArgs = getAdbArgs(args, deviceId);
81
- return new Promise((resolve, reject) => {
82
- // Extract timeout from options if present, otherwise pass options to spawn
83
- const { timeout: customTimeout, ...spawnOptions } = options;
84
- // Use spawn instead of execFile for better stream control and to avoid potential buffering hangs
85
- const child = spawn(getAdbCmd(), adbArgs, spawnOptions);
86
- let stdout = '';
87
- let stderr = '';
88
- if (child.stdout) {
89
- child.stdout.on('data', (data) => {
90
- stdout += data.toString();
91
- });
92
- }
93
- if (child.stderr) {
94
- child.stderr.on('data', (data) => {
95
- stderr += data.toString();
96
- });
97
- }
98
- const timeoutMs = getAdbTimeout(args, customTimeout);
99
- const timeout = setTimeout(() => {
100
- child.kill();
101
- reject(new Error(`ADB command timed out after ${timeoutMs}ms: ${args.join(' ')}`));
102
- }, timeoutMs);
103
- child.on('close', (code) => {
104
- clearTimeout(timeout);
105
- if (code !== 0) {
106
- // If there's an actual error (non-zero exit code), reject
107
- reject(new Error(stderr.trim() || `Command failed with code ${code}`));
108
- }
109
- else {
110
- // If exit code is 0, resolve with stdout
111
- resolve(stdout.trim());
112
- }
113
- });
114
- child.on('error', (err) => {
115
- clearTimeout(timeout);
116
- reject(err);
117
- });
118
- });
81
+ const timeoutMs = getAdbTimeout(args, options.timeout);
82
+ const res = await execCmd(getAdbCmd(), adbArgs, { timeout: timeoutMs, env: options.env, cwd: typeof options.cwd === 'string' ? options.cwd : undefined, shell: !!options.shell });
83
+ if (res.exitCode !== 0)
84
+ throw new Error(res.stderr || `Command failed with code ${res.exitCode}`);
85
+ return res.stdout;
119
86
  }
120
87
  // Spawn adb but return full streams and exit code so callers can implement fallbacks or stream output
121
- export function spawnAdb(args, deviceId, options = {}) {
88
+ export async function spawnAdb(args, deviceId, options = {}) {
122
89
  const adbArgs = getAdbArgs(args, deviceId);
123
- return new Promise((resolve, reject) => {
124
- const { timeout: customTimeout, ...spawnOptions } = options;
125
- const child = spawn(getAdbCmd(), adbArgs, spawnOptions);
126
- let stdout = '';
127
- let stderr = '';
128
- if (child.stdout)
129
- child.stdout.on('data', d => { stdout += d.toString(); });
130
- if (child.stderr)
131
- child.stderr.on('data', d => { stderr += d.toString(); });
132
- const timeoutMs = getAdbTimeout(args, customTimeout);
133
- const timeout = setTimeout(() => {
134
- try {
135
- child.kill();
136
- }
137
- catch { }
138
- reject(new Error(`ADB command timed out after ${timeoutMs}ms: ${args.join(' ')}`));
139
- }, timeoutMs);
140
- child.on('close', (code) => {
141
- clearTimeout(timeout);
142
- resolve({ stdout: stdout.trim(), stderr: stderr.trim(), code });
143
- });
144
- child.on('error', (err) => {
145
- clearTimeout(timeout);
146
- reject(err);
147
- });
148
- });
90
+ const timeoutMs = getAdbTimeout(args, options.timeout);
91
+ const res = await execCmd(getAdbCmd(), adbArgs, { timeout: timeoutMs, env: options.env, cwd: typeof options.cwd === 'string' ? options.cwd : undefined, shell: !!options.shell });
92
+ return { stdout: res.stdout, stderr: res.stderr, code: res.exitCode };
149
93
  }
150
94
  export function getDeviceInfo(deviceId, metadata = {}) {
151
95
  return {
@@ -0,0 +1,34 @@
1
+ import { spawn } from 'child_process';
2
+ export async function execCmd(cmd, args, opts = {}) {
3
+ const { timeout = 0, env, cwd, shell } = opts;
4
+ return new Promise((resolve, reject) => {
5
+ const child = spawn(cmd, args, { env: { ...process.env, ...(env || {}) }, cwd, shell });
6
+ let stdout = '';
7
+ let stderr = '';
8
+ if (child.stdout)
9
+ child.stdout.on('data', (d) => { stdout += d.toString(); });
10
+ if (child.stderr)
11
+ child.stderr.on('data', (d) => { stderr += d.toString(); });
12
+ let timedOut = false;
13
+ const timer = timeout && timeout > 0 ? setTimeout(() => {
14
+ timedOut = true;
15
+ try {
16
+ child.kill();
17
+ }
18
+ catch { }
19
+ resolve({ exitCode: null, stdout: stdout.trim(), stderr: stderr.trim() });
20
+ }, timeout) : null;
21
+ child.on('close', (code) => {
22
+ if (timer)
23
+ clearTimeout(timer);
24
+ if (timedOut)
25
+ return;
26
+ resolve({ exitCode: code, stdout: stdout.trim(), stderr: stderr.trim() });
27
+ });
28
+ child.on('error', (err) => {
29
+ if (timer)
30
+ clearTimeout(timer);
31
+ reject(err);
32
+ });
33
+ });
34
+ }
@@ -0,0 +1,169 @@
1
+ import crypto from 'crypto';
2
+ const ANDROID_STRUCTURAL_TYPES = ['Window', 'Application', 'View', 'ViewGroup', 'LinearLayout', 'FrameLayout', 'RelativeLayout', 'ScrollView', 'RecyclerView', 'TextView', 'ImageView'];
3
+ const IOS_STRUCTURAL_TYPES = ['Window', 'Application', 'View', 'ViewController', 'UITableView', 'UICollectionView', 'UILabel', 'UIImageView', 'UIView', 'UIWindow', 'UIStackView', 'UITextView', 'UITableViewCell'];
4
+ function isDynamicText(t) {
5
+ if (!t)
6
+ return false;
7
+ const txt = t.trim();
8
+ if (!txt)
9
+ return false;
10
+ if (/\b\d{1,2}:\d{2}\b/.test(txt))
11
+ return true;
12
+ if (/\b\d{4}-\d{2}-\d{2}\b/.test(txt))
13
+ return true;
14
+ if (/^\d+(?:\.\d+)?%$/.test(txt))
15
+ return true;
16
+ if (/^\d+$/.test(txt))
17
+ return true;
18
+ if (/^[\d,]{1,10}$/.test(txt))
19
+ return true;
20
+ return false;
21
+ }
22
+ function normalizeElement(e) {
23
+ return {
24
+ type: (e.type || '').toString(),
25
+ resourceId: (e.resourceId || '').toString(),
26
+ text: typeof e.text === 'string' ? (isDynamicText(e.text) ? '' : e.text.trim().toLowerCase()) : '',
27
+ contentDesc: (e.contentDescription || '').toString(),
28
+ bounds: Array.isArray(e.bounds) ? e.bounds.slice(0, 4).map((n) => Number(n) || 0) : [0, 0, 0, 0]
29
+ };
30
+ }
31
+ export function computeScreenFingerprint(tree, current, platform, limit = 50) {
32
+ try {
33
+ if (!tree || tree.error)
34
+ return { fingerprint: null, error: tree.error };
35
+ const activity = current && (current.activity || current.shortActivity) ? (current.activity || current.shortActivity) : '';
36
+ const candidates = (tree.elements || []).filter(e => {
37
+ if (!e)
38
+ return false;
39
+ if (!e.visible)
40
+ return false;
41
+ const hasStableText = typeof e.text === 'string' && e.text.trim().length > 0;
42
+ const hasResource = !!e.resourceId;
43
+ const interactable = !!e.clickable || !!e.enabled;
44
+ const structuralList = platform === 'android' ? ANDROID_STRUCTURAL_TYPES : IOS_STRUCTURAL_TYPES;
45
+ const structurallySignificant = hasStableText || hasResource || structuralList.includes(e.type || '');
46
+ return interactable || structurallySignificant;
47
+ });
48
+ const normalized = candidates.map(normalizeElement);
49
+ const filteredNormalized = normalized.filter(e => (e.text && e.text.length > 0) || (e.resourceId && e.resourceId.length > 0) || (e.contentDesc && e.contentDesc.length > 0));
50
+ filteredNormalized.sort((a, b) => {
51
+ const ay = (a.bounds && a.bounds[1]) || 0;
52
+ const by = (b.bounds && b.bounds[1]) || 0;
53
+ if (ay !== by)
54
+ return ay - by;
55
+ const ax = (a.bounds && a.bounds[0]) || 0;
56
+ const bx = (b.bounds && b.bounds[0]) || 0;
57
+ return ax - bx;
58
+ });
59
+ const limited = filteredNormalized.slice(0, Math.max(0, limit));
60
+ const payload = {
61
+ activity: platform === 'android' ? (activity || '') : '',
62
+ resolution: tree.resolution || { width: 0, height: 0 },
63
+ elements: limited.map(e => ({ type: e.type, resourceId: e.resourceId, text: e.text, contentDesc: e.contentDesc }))
64
+ };
65
+ const combined = JSON.stringify(payload);
66
+ const hash = crypto.createHash('sha256').update(combined).digest('hex');
67
+ return { fingerprint: hash, activity: activity };
68
+ }
69
+ catch (e) {
70
+ return { fingerprint: null, error: e instanceof Error ? e.message : String(e) };
71
+ }
72
+ }
73
+ export async function scrollToElementShared(opts) {
74
+ const { selector, direction = 'down', maxScrolls = 10, scrollAmount = 0.7, deviceId, fetchTree, swipe, stabilizationDelayMs = 350 } = opts;
75
+ const matchElement = (el) => {
76
+ if (!el)
77
+ return false;
78
+ if (selector.text !== undefined && selector.text !== el.text)
79
+ return false;
80
+ if (selector.resourceId !== undefined && selector.resourceId !== el.resourceId)
81
+ return false;
82
+ if (selector.contentDesc !== undefined && selector.contentDesc !== el.contentDescription)
83
+ return false;
84
+ if (selector.className !== undefined && selector.className !== el.type)
85
+ return false;
86
+ return true;
87
+ };
88
+ const isVisible = (el, resolution) => {
89
+ if (!el)
90
+ return false;
91
+ if (el.visible === false)
92
+ return false;
93
+ if (!el.bounds || !resolution || !resolution.width || !resolution.height)
94
+ return (el.visible === undefined ? true : !!el.visible);
95
+ const [left, top, right, bottom] = el.bounds;
96
+ const withinY = bottom > 0 && top < resolution.height;
97
+ const withinX = right > 0 && left < resolution.width;
98
+ return withinX && withinY;
99
+ };
100
+ const findVisibleMatch = (elements, resolution) => {
101
+ if (!Array.isArray(elements))
102
+ return null;
103
+ for (const e of elements) {
104
+ if (matchElement(e) && isVisible(e, resolution))
105
+ return e;
106
+ }
107
+ return null;
108
+ };
109
+ // Initial check
110
+ let tree = await fetchTree();
111
+ if (tree.error)
112
+ return { success: false, reason: tree.error, scrollsPerformed: 0 };
113
+ let found = findVisibleMatch(tree.elements, tree.resolution);
114
+ if (found) {
115
+ return { success: true, element: { text: found.text, resourceId: found.resourceId, bounds: found.bounds }, scrollsPerformed: 0 };
116
+ }
117
+ const fingerprintOf = (t) => {
118
+ try {
119
+ return JSON.stringify((t.elements || []).map((e) => ({ text: e.text, resourceId: e.resourceId, bounds: e.bounds })));
120
+ }
121
+ catch {
122
+ return '';
123
+ }
124
+ };
125
+ let prevFingerprint = fingerprintOf(tree);
126
+ const width = (tree.resolution && tree.resolution.width) ? tree.resolution.width : 0;
127
+ const height = (tree.resolution && tree.resolution.height) ? tree.resolution.height : 0;
128
+ const centerX = Math.round(width / 2) || 50;
129
+ const clampPct = (v) => Math.max(0.05, Math.min(0.95, v));
130
+ const computeCoords = () => {
131
+ const defaultStart = direction === 'down' ? 0.8 : 0.2;
132
+ const startPct = clampPct(defaultStart);
133
+ const endPct = clampPct(defaultStart + (direction === 'down' ? -scrollAmount : scrollAmount));
134
+ const x1 = centerX;
135
+ const x2 = centerX;
136
+ const y1 = Math.round((height || 100) * startPct);
137
+ const y2 = Math.round((height || 100) * endPct);
138
+ return { x1, y1, x2, y2 };
139
+ };
140
+ const duration = 300;
141
+ let scrollsPerformed = 0;
142
+ for (let i = 0; i < maxScrolls; i++) {
143
+ const { x1, y1, x2, y2 } = computeCoords();
144
+ try {
145
+ await swipe(x1, y1, x2, y2, duration, deviceId);
146
+ }
147
+ catch (e) {
148
+ try {
149
+ console.warn(`scrollToElement swipe failed: ${e instanceof Error ? e.message : String(e)}`);
150
+ }
151
+ catch { }
152
+ }
153
+ scrollsPerformed++;
154
+ await new Promise(resolve => setTimeout(resolve, stabilizationDelayMs));
155
+ tree = await fetchTree();
156
+ if (tree.error)
157
+ return { success: false, reason: tree.error, scrollsPerformed: scrollsPerformed };
158
+ found = findVisibleMatch(tree.elements, tree.resolution);
159
+ if (found) {
160
+ return { success: true, element: { text: found.text, resourceId: found.resourceId, bounds: found.bounds }, scrollsPerformed };
161
+ }
162
+ const fp = fingerprintOf(tree);
163
+ if (fp === prevFingerprint) {
164
+ return { success: false, reason: 'UI unchanged after scroll; likely end of list', scrollsPerformed: scrollsPerformed };
165
+ }
166
+ prevFingerprint = fp;
167
+ }
168
+ return { success: false, reason: 'Element not found after scrolling', scrollsPerformed: scrollsPerformed };
169
+ }
package/docs/CHANGELOG.md CHANGED
@@ -2,6 +2,10 @@
2
2
 
3
3
  All notable changes to the **Mobile Debug MCP** project will be documented in this file.
4
4
 
5
+ ## [0.16.0]
6
+ - Added `wait_for_screen_change` interact tool: polls the platform-specific `get_screen_fingerprint` until it differs from a provided `previousFingerprint`, with configurable `timeoutMs` and `pollIntervalMs` and an optional stability confirmation poll to avoid reacting to transient UI flickers. Implemented at the interact layer and delegates fingerprinting to the observe implementations (Android/iOS).
7
+ - Added unit tests covering immediate change, transient null fingerprints, stability confirmation and timeout behavior: `test/interact/unit/wait_for_screen_change.test.ts`.
8
+
5
9
  ## [0.15.0]
6
10
  - Reorganised repository for cohesion: merged tool handlers into feature entrypoints (src/observe, src/interact, src/manage) and moved platform helpers and CLI tooling into src/utils/{android,ios,cli}.
7
11
  - Added computeScreenFingerprint utility used by observe/interact to normalise UI element significance across platforms (fingerprint shared between Android and iOS implementations).
@@ -72,3 +72,32 @@ Notes:
72
72
  - Android swipe uses `adb shell input swipe` with screen percentage coordinates. iOS swipe uses `idb ui swipe` command; note `idb` swipe does not accept a duration argument.
73
73
  - Unit tests are located at `test/unit/observe/scroll_to_element.test.ts` and device runners at `test/device/observe/`.
74
74
 
75
+ ---
76
+
77
+ ## wait_for_screen_change
78
+
79
+ Description:
80
+ - Waits until the current screen fingerprint differs from the provided `previousFingerprint`. Useful after taps, navigation, or other interactions that should change the visible UI.
81
+
82
+ Input example:
83
+ ```
84
+ { "platform": "android", "previousFingerprint": "<hex-fingerprint>", "timeoutMs": 5000, "pollIntervalMs": 300, "deviceId": "emulator-5554" }
85
+ ```
86
+
87
+ Success response example:
88
+ ```
89
+ { "success": true, "newFingerprint": "<hex-fingerprint>", "elapsedMs": 420 }
90
+ ```
91
+
92
+ Failure (timeout) example:
93
+ ```
94
+ { "success": false, "reason": "timeout", "lastFingerprint": "<hex-fingerprint>", "elapsedMs": 5000 }
95
+ ```
96
+
97
+ Notes:
98
+ - Always compares to the original `previousFingerprint` (baseline is not updated during polling).
99
+ - Treats `null` fingerprints as transient; continues polling rather than returning success.
100
+ - Includes a stability confirmation: after detecting a different fingerprint it waits one additional poll interval and confirms the fingerprint is stable before returning success to avoid reacting to transient flickers or animation frames.
101
+ - Default `timeoutMs` is 5000ms and default `pollIntervalMs` is 300ms; callers may override these.
102
+ - Implemented as an interact-level tool and delegates platform-specific fingerprint calculation to the observe layer (`get_screen_fingerprint`).
103
+
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mobile-debug-mcp",
3
- "version": "0.15.0",
3
+ "version": "0.16.0",
4
4
  "description": "MCP server for mobile app debugging (Android + iOS), with focus on security and reliability",
5
5
  "type": "module",
6
6
  "bin": {
@@ -1,7 +1,7 @@
1
1
  import { WaitForElementResponse, TapResponse, SwipeResponse, TypeTextResponse, PressBackResponse } from "../types.js"
2
2
  import { execAdb, getAndroidDeviceMetadata, getDeviceInfo } from "../utils/android/utils.js"
3
3
  import { AndroidObserve } from "../observe/index.js"
4
- import { scrollToElementShared } from "../interact/shared/scroll_to_element.js"
4
+ import { scrollToElementShared } from "../utils/ui/index.js"
5
5
 
6
6
 
7
7
  export class AndroidInteract {
@@ -3,6 +3,7 @@ import { iOSInteract } from './ios.js';
3
3
  export { AndroidInteract, iOSInteract };
4
4
 
5
5
  import { resolveTargetDevice } from '../utils/resolve-device.js'
6
+ import { ToolsObserve } from '../observe/index.js'
6
7
 
7
8
  export class ToolsInteract {
8
9
 
@@ -44,4 +45,46 @@ export class ToolsInteract {
44
45
  return await interact.scrollToElement(selector, direction, maxScrolls, scrollAmount, resolved.id)
45
46
  }
46
47
 
48
+ static async waitForScreenChangeHandler({ platform, previousFingerprint, timeoutMs = 5000, pollIntervalMs = 300, deviceId }: { platform?: 'android' | 'ios', previousFingerprint: string, timeoutMs?: number, pollIntervalMs?: number, deviceId?: string }) {
49
+ const start = Date.now()
50
+ let lastFingerprint: string | null = null
51
+
52
+ while (Date.now() - start < timeoutMs) {
53
+ try {
54
+ const res = await ToolsObserve.getScreenFingerprintHandler({ platform, deviceId })
55
+ const fp = (res as any)?.fingerprint ?? null
56
+ if (fp === null || fp === undefined) {
57
+ lastFingerprint = null
58
+ await new Promise(resolve => setTimeout(resolve, pollIntervalMs))
59
+ continue
60
+ }
61
+
62
+ lastFingerprint = fp
63
+
64
+ if (fp !== previousFingerprint) {
65
+ // Stability confirmation
66
+ await new Promise(resolve => setTimeout(resolve, pollIntervalMs))
67
+ try {
68
+ const confirmRes = await ToolsObserve.getScreenFingerprintHandler({ platform, deviceId })
69
+ const confirmFp = (confirmRes as any)?.fingerprint ?? null
70
+ if (confirmFp === fp) {
71
+ return { success: true, newFingerprint: fp, elapsedMs: Date.now() - start }
72
+ }
73
+ lastFingerprint = confirmFp
74
+ continue
75
+ } catch {
76
+ // ignore and continue polling
77
+ continue
78
+ }
79
+ }
80
+ } catch {
81
+ // ignore transient errors
82
+ }
83
+
84
+ await new Promise(resolve => setTimeout(resolve, pollIntervalMs))
85
+ }
86
+
87
+ return { success: false, reason: 'timeout', lastFingerprint, elapsedMs: Date.now() - start }
88
+ }
89
+
47
90
  }
@@ -2,7 +2,7 @@ import { spawn } from "child_process"
2
2
  import { WaitForElementResponse, TapResponse, SwipeResponse } from "../types.js"
3
3
  import { getIOSDeviceMetadata, getIdbCmd, isIDBInstalled } from "../utils/ios/utils.js"
4
4
  import { iOSObserve } from "../observe/index.js"
5
- import { scrollToElementShared } from "../interact/shared/scroll_to_element.js"
5
+ import { scrollToElementShared } from "../utils/ui/index.js"
6
6
 
7
7
  export class iOSInteract {
8
8
  private observe = new iOSObserve();
@@ -5,7 +5,7 @@ import { getAdbCmd, execAdb, getAndroidDeviceMetadata, getDeviceInfo, delay, get
5
5
  import { createWriteStream } from "fs"
6
6
  import { promises as fsPromises } from "fs"
7
7
  import path from "path"
8
- import { computeScreenFingerprint } from "../interact/shared/fingerprint.js"
8
+ import { computeScreenFingerprint } from "../utils/ui/index.js"
9
9
 
10
10
  const activeLogStreams: Map<string, { proc: any, file: string }> = new Map()
11
11
 
@@ -5,7 +5,7 @@ import { execCommand, getIOSDeviceMetadata, validateBundleId, getIdbCmd, getXcru
5
5
  import { createWriteStream, promises as fsPromises } from 'fs'
6
6
  import path from 'path'
7
7
  import { parseLogLine } from '../utils/android/utils.js'
8
- import { computeScreenFingerprint } from '../interact/shared/fingerprint.js'
8
+ import { computeScreenFingerprint } from '../utils/ui/index.js'
9
9
 
10
10
  const delay = (ms: number) => new Promise(resolve => setTimeout(resolve, ms));
11
11
 
package/src/server.ts CHANGED
@@ -294,6 +294,21 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
294
294
  }
295
295
  }
296
296
  },
297
+ {
298
+ name: "wait_for_screen_change",
299
+ description: "Wait until the current screen fingerprint differs from a provided previousFingerprint. Useful to wait for navigation/animation completion.",
300
+ inputSchema: {
301
+ type: "object",
302
+ properties: {
303
+ platform: { type: "string", enum: ["android", "ios"], description: "Optional platform override (android|ios)" },
304
+ previousFingerprint: { type: "string", description: "The fingerprint to compare against (required)" },
305
+ timeoutMs: { type: "number", description: "Timeout in ms to wait for change (default 5000)", default: 5000 },
306
+ pollIntervalMs: { type: "number", description: "Polling interval in ms (default 300)", default: 300 },
307
+ deviceId: { type: "string", description: "Optional device id/udid to target" }
308
+ },
309
+ required: ["previousFingerprint"]
310
+ }
311
+ },
297
312
  {
298
313
  name: "wait_for_element",
299
314
  description: "Wait until a UI element with matching text appears on screen or timeout is reached.",
@@ -322,6 +337,7 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
322
337
  required: ["platform", "text"]
323
338
  }
324
339
  },
340
+
325
341
  {
326
342
  name: "tap",
327
343
  description: "Simulate a finger tap on the device screen at specific coordinates.",
@@ -596,6 +612,12 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
596
612
  return wrapResponse(res)
597
613
  }
598
614
 
615
+ if (name === "wait_for_screen_change") {
616
+ const { platform, previousFingerprint, timeoutMs, pollIntervalMs, deviceId } = (args || {}) as any
617
+ const res = await ToolsInteract.waitForScreenChangeHandler({ platform, previousFingerprint, timeoutMs, pollIntervalMs, deviceId })
618
+ return wrapResponse(res)
619
+ }
620
+
599
621
  if (name === "wait_for_element") {
600
622
  const { platform, text, timeout, deviceId } = (args || {}) as any
601
623
  const res = await ToolsInteract.waitForElementHandler({ platform, text, timeout, deviceId })
package/src/types.ts CHANGED
@@ -143,3 +143,4 @@ export interface InstallAppResponse {
143
143
  error?: string;
144
144
  diagnostics?: any;
145
145
  }
146
+
@@ -1,8 +1,8 @@
1
- import { spawn } from 'child_process'
2
1
  import { DeviceInfo, UIElement } from "../../types.js"
3
2
  import { promises as fsPromises, existsSync } from 'fs'
4
3
  import path from 'path'
5
4
  import { detectJavaHome } from '../java.js'
5
+ import { execCmd } from '../exec.js'
6
6
 
7
7
  export function getAdbCmd() { return process.env.ADB_PATH || 'adb' }
8
8
 
@@ -83,87 +83,20 @@ import type { SpawnOptions } from 'child_process'
83
83
 
84
84
  export type SpawnOptionsWithTimeout = SpawnOptions & { timeout?: number }
85
85
 
86
- export function execAdb(args: string[], deviceId?: string, options: SpawnOptionsWithTimeout = {}): Promise<string> {
86
+ export async function execAdb(args: string[], deviceId?: string, options: SpawnOptionsWithTimeout = {}): Promise<string> {
87
87
  const adbArgs = getAdbArgs(args, deviceId)
88
- return new Promise((resolve, reject) => {
89
- // Extract timeout from options if present, otherwise pass options to spawn
90
- const { timeout: customTimeout, ...spawnOptions } = options;
91
-
92
- // Use spawn instead of execFile for better stream control and to avoid potential buffering hangs
93
- const child = spawn(getAdbCmd(), adbArgs, spawnOptions)
94
-
95
- let stdout = ''
96
- let stderr = ''
97
-
98
- if (child.stdout) {
99
- child.stdout.on('data', (data) => {
100
- stdout += data.toString()
101
- })
102
- }
103
-
104
- if (child.stderr) {
105
- child.stderr.on('data', (data) => {
106
- stderr += data.toString()
107
- })
108
- }
109
-
110
- const timeoutMs = getAdbTimeout(args, customTimeout)
111
-
112
-
113
- const timeout = setTimeout(() => {
114
- child.kill()
115
- reject(new Error(`ADB command timed out after ${timeoutMs}ms: ${args.join(' ')}`))
116
- }, timeoutMs)
117
-
118
- child.on('close', (code) => {
119
- clearTimeout(timeout)
120
- if (code !== 0) {
121
- // If there's an actual error (non-zero exit code), reject
122
- reject(new Error(stderr.trim() || `Command failed with code ${code}`))
123
- } else {
124
- // If exit code is 0, resolve with stdout
125
- resolve(stdout.trim())
126
- }
127
- })
128
-
129
- child.on('error', (err) => {
130
- clearTimeout(timeout)
131
- reject(err)
132
- })
133
- })
88
+ const timeoutMs = getAdbTimeout(args, options.timeout)
89
+ const res = await execCmd(getAdbCmd(), adbArgs, { timeout: timeoutMs, env: options.env as any, cwd: typeof options.cwd === 'string' ? options.cwd : undefined, shell: !!options.shell })
90
+ if (res.exitCode !== 0) throw new Error(res.stderr || `Command failed with code ${res.exitCode}`)
91
+ return res.stdout
134
92
  }
135
93
 
136
94
  // Spawn adb but return full streams and exit code so callers can implement fallbacks or stream output
137
- export function spawnAdb(args: string[], deviceId?: string, options: SpawnOptionsWithTimeout = {}): Promise<{ stdout: string, stderr: string, code: number | null }> {
95
+ export async function spawnAdb(args: string[], deviceId?: string, options: SpawnOptionsWithTimeout = {}): Promise<{ stdout: string, stderr: string, code: number | null }> {
138
96
  const adbArgs = getAdbArgs(args, deviceId)
139
- return new Promise((resolve, reject) => {
140
- const { timeout: customTimeout, ...spawnOptions } = options
141
- const child = spawn(getAdbCmd(), adbArgs, spawnOptions)
142
-
143
- let stdout = ''
144
- let stderr = ''
145
-
146
- if (child.stdout) child.stdout.on('data', d => { stdout += d.toString() })
147
- if (child.stderr) child.stderr.on('data', d => { stderr += d.toString() })
148
-
149
- const timeoutMs = getAdbTimeout(args, customTimeout)
150
-
151
-
152
- const timeout = setTimeout(() => {
153
- try { child.kill() } catch {}
154
- reject(new Error(`ADB command timed out after ${timeoutMs}ms: ${args.join(' ')}`))
155
- }, timeoutMs)
156
-
157
- child.on('close', (code) => {
158
- clearTimeout(timeout)
159
- resolve({ stdout: stdout.trim(), stderr: stderr.trim(), code })
160
- })
161
-
162
- child.on('error', (err) => {
163
- clearTimeout(timeout)
164
- reject(err)
165
- })
166
- })
97
+ const timeoutMs = getAdbTimeout(args, options.timeout)
98
+ const res = await execCmd(getAdbCmd(), adbArgs, { timeout: timeoutMs, env: options.env as any, cwd: typeof options.cwd === 'string' ? options.cwd : undefined, shell: !!options.shell })
99
+ return { stdout: res.stdout, stderr: res.stderr, code: res.exitCode }
167
100
  }
168
101
 
169
102
  export function getDeviceInfo(deviceId: string, metadata: Partial<DeviceInfo> = {}): DeviceInfo {
@@ -0,0 +1,33 @@
1
+ import { spawn } from 'child_process'
2
+
3
+ export type ExecOptions = { timeout?: number; env?: NodeJS.ProcessEnv; cwd?: string; shell?: boolean }
4
+
5
+ export async function execCmd(cmd: string, args: string[], opts: ExecOptions = {}): Promise<{ exitCode: number | null, stdout: string, stderr: string }> {
6
+ const { timeout = 0, env, cwd, shell } = opts
7
+ return new Promise((resolve, reject) => {
8
+ const child = spawn(cmd, args, { env: { ...process.env, ...(env || {}) }, cwd, shell })
9
+ let stdout = ''
10
+ let stderr = ''
11
+ if (child.stdout) child.stdout.on('data', (d) => { stdout += d.toString() })
12
+ if (child.stderr) child.stderr.on('data', (d) => { stderr += d.toString() })
13
+
14
+ let timedOut = false
15
+ const timer = timeout && timeout > 0 ? setTimeout(() => {
16
+ timedOut = true
17
+ try { child.kill() } catch { }
18
+ resolve({ exitCode: null, stdout: stdout.trim(), stderr: stderr.trim() })
19
+ }, timeout) : null
20
+
21
+ child.on('close', (code) => {
22
+ if (timer) clearTimeout(timer)
23
+ if (timedOut) return
24
+ resolve({ exitCode: code, stdout: stdout.trim(), stderr: stderr.trim() })
25
+ })
26
+
27
+ child.on('error', (err) => {
28
+ if (timer) clearTimeout(timer)
29
+ reject(err)
30
+ })
31
+ })
32
+ }
33
+
@@ -1,4 +1,76 @@
1
- import { UIElement, GetUITreeResponse, SwipeResponse } from '../../types.js'
1
+ import crypto from 'crypto'
2
+ import { GetUITreeResponse, GetCurrentScreenResponse, UIElement, SwipeResponse } from '../../types.js'
3
+
4
+ const ANDROID_STRUCTURAL_TYPES = ['Window','Application','View','ViewGroup','LinearLayout','FrameLayout','RelativeLayout','ScrollView','RecyclerView','TextView','ImageView']
5
+ const IOS_STRUCTURAL_TYPES = ['Window','Application','View','ViewController','UITableView','UICollectionView','UILabel','UIImageView','UIView','UIWindow','UIStackView','UITextView','UITableViewCell']
6
+
7
+ function isDynamicText(t?: string): boolean {
8
+ if (!t) return false
9
+ const txt = t.trim()
10
+ if (!txt) return false
11
+ if (/\b\d{1,2}:\d{2}\b/.test(txt)) return true
12
+ if (/\b\d{4}-\d{2}-\d{2}\b/.test(txt)) return true
13
+ if (/^\d+(?:\.\d+)?%$/.test(txt)) return true
14
+ if (/^\d+$/.test(txt)) return true
15
+ if (/^[\d,]{1,10}$/.test(txt)) return true
16
+ return false
17
+ }
18
+
19
+ function normalizeElement(e: UIElement) {
20
+ return {
21
+ type: (e.type || '').toString(),
22
+ resourceId: (e.resourceId || '').toString(),
23
+ text: typeof e.text === 'string' ? (isDynamicText(e.text) ? '' : e.text.trim().toLowerCase()) : '',
24
+ contentDesc: (e.contentDescription || '').toString(),
25
+ bounds: Array.isArray(e.bounds) ? e.bounds.slice(0,4).map((n:any)=>Number(n)||0) : [0,0,0,0]
26
+ }
27
+ }
28
+
29
+ export function computeScreenFingerprint(tree: GetUITreeResponse, current: GetCurrentScreenResponse | null, platform: 'android' | 'ios', limit: number = 50): { fingerprint: string | null; activity?: string; error?: string } {
30
+ try {
31
+ if (!tree || (tree as any).error) return { fingerprint: null, error: (tree as any).error }
32
+
33
+ const activity = current && (current.activity || (current as any).shortActivity) ? (current.activity || (current as any).shortActivity) : ''
34
+
35
+ const candidates: UIElement[] = (tree.elements || []).filter(e => {
36
+ if (!e) return false
37
+ if (!e.visible) return false
38
+ const hasStableText = typeof e.text === 'string' && e.text.trim().length > 0
39
+ const hasResource = !!e.resourceId
40
+ const interactable = !!e.clickable || !!e.enabled
41
+ const structuralList = platform === 'android' ? ANDROID_STRUCTURAL_TYPES : IOS_STRUCTURAL_TYPES
42
+ const structurallySignificant = hasStableText || hasResource || structuralList.includes(e.type || '')
43
+ return interactable || structurallySignificant
44
+ }) as UIElement[]
45
+
46
+ const normalized = candidates.map(normalizeElement)
47
+
48
+ const filteredNormalized = normalized.filter(e => (e.text && e.text.length > 0) || (e.resourceId && e.resourceId.length > 0) || (e.contentDesc && e.contentDesc.length > 0))
49
+
50
+ filteredNormalized.sort((a,b) => {
51
+ const ay = (a.bounds && a.bounds[1]) || 0
52
+ const by = (b.bounds && b.bounds[1]) || 0
53
+ if (ay !== by) return ay - by
54
+ const ax = (a.bounds && a.bounds[0]) || 0
55
+ const bx = (b.bounds && b.bounds[0]) || 0
56
+ return ax - bx
57
+ })
58
+
59
+ const limited = filteredNormalized.slice(0, Math.max(0, limit))
60
+
61
+ const payload = {
62
+ activity: platform === 'android' ? (activity || '') : '',
63
+ resolution: (tree as any).resolution || { width: 0, height: 0 },
64
+ elements: limited.map(e => ({ type: e.type, resourceId: e.resourceId, text: e.text, contentDesc: e.contentDesc }))
65
+ }
66
+
67
+ const combined = JSON.stringify(payload)
68
+ const hash = crypto.createHash('sha256').update(combined).digest('hex')
69
+ return { fingerprint: hash, activity: activity }
70
+ } catch (e) {
71
+ return { fingerprint: null, error: e instanceof Error ? e.message : String(e) }
72
+ }
73
+ }
2
74
 
3
75
  export interface ScrollSelector { text?: string; resourceId?: string; contentDesc?: string; className?: string }
4
76
 
@@ -84,7 +156,6 @@ export async function scrollToElementShared(opts: {
84
156
  try {
85
157
  await swipe(x1, y1, x2, y2, duration, deviceId)
86
158
  } catch (e) {
87
- // Log swipe failures to aid debugging but don't fail the overall flow
88
159
  try { console.warn(`scrollToElement swipe failed: ${e instanceof Error ? e.message : String(e)}`) } catch {}
89
160
  }
90
161
 
@@ -0,0 +1,32 @@
1
+ import { ToolsInteract } from '../../../src/interact/index.js'
2
+ import * as Observe from '../../../src/observe/index.js'
3
+
4
+ const original = (Observe as any).ToolsObserve.getScreenFingerprintHandler
5
+
6
+ async function runTests() {
7
+ console.log('Starting tests for wait_for_screen_change...')
8
+
9
+ // Test 1: Immediate change
10
+ let seq1: Array<string | null> = ['B','B']
11
+ ;(Observe as any).ToolsObserve.getScreenFingerprintHandler = async () => ({ fingerprint: seq1.length ? seq1.shift() : null })
12
+ const start1 = Date.now()
13
+ const res1 = await ToolsInteract.waitForScreenChangeHandler({ platform: 'android', previousFingerprint: 'A', timeoutMs: 2000, pollIntervalMs: 50 })
14
+ const elapsed1 = Date.now() - start1
15
+ console.log('Test 1: Immediate change ->', (res1 && (res1 as any).success === true && (res1 as any).newFingerprint === 'B') ? 'PASS' : 'FAIL', 'Elapsed:', elapsed1, 'ms')
16
+
17
+ // Test 2: Transient nulls then stable change
18
+ let seq2: Array<string | null> = [null, null, 'B', 'B']
19
+ ;(Observe as any).ToolsObserve.getScreenFingerprintHandler = async () => ({ fingerprint: seq2.length ? seq2.shift() : 'B' })
20
+ const res2 = await ToolsInteract.waitForScreenChangeHandler({ platform: 'android', previousFingerprint: 'A', timeoutMs: 3000, pollIntervalMs: 50 })
21
+ console.log('Test 2: Transient nulls ->', (res2 && (res2 as any).success === true && (res2 as any).newFingerprint === 'B') ? 'PASS' : 'FAIL')
22
+
23
+ // Test 3: Timeout
24
+ ;(Observe as any).ToolsObserve.getScreenFingerprintHandler = async () => ({ fingerprint: 'A' })
25
+ const res3 = await ToolsInteract.waitForScreenChangeHandler({ platform: 'android', previousFingerprint: 'A', timeoutMs: 300, pollIntervalMs: 50 })
26
+ console.log('Test 3: Timeout ->', (res3 && (res3 as any).success === false && (res3 as any).reason === 'timeout') ? 'PASS' : 'FAIL')
27
+
28
+ // Restore original
29
+ ;(Observe as any).ToolsObserve.getScreenFingerprintHandler = original
30
+ }
31
+
32
+ runTests().catch(console.error)
@@ -10,5 +10,6 @@ import '../manage/unit/build_and_install.test.ts'
10
10
  import '../manage/unit/diagnostics.test.ts'
11
11
  import '../manage/unit/detection.test.ts'
12
12
  import '../manage/unit/mcp_disable_autodetect.test.ts'
13
+ import '../interact/unit/wait_for_screen_change.test.ts'
13
14
 
14
15
  console.log('Unit tests loaded.')
@@ -1,73 +0,0 @@
1
- import crypto from 'crypto'
2
- import { GetUITreeResponse, GetCurrentScreenResponse, UIElement } from '../../types.js'
3
-
4
- const ANDROID_STRUCTURAL_TYPES = ['Window','Application','View','ViewGroup','LinearLayout','FrameLayout','RelativeLayout','ScrollView','RecyclerView','TextView','ImageView']
5
- const IOS_STRUCTURAL_TYPES = ['Window','Application','View','ViewController','UITableView','UICollectionView','UILabel','UIImageView','UIView','UIWindow','UIStackView','UITextView','UITableViewCell']
6
-
7
- function isDynamicText(t?: string): boolean {
8
- if (!t) return false
9
- const txt = t.trim()
10
- if (!txt) return false
11
- if (/\b\d{1,2}:\d{2}\b/.test(txt)) return true
12
- if (/\b\d{4}-\d{2}-\d{2}\b/.test(txt)) return true
13
- if (/^\d+(?:\.\d+)?%$/.test(txt)) return true
14
- if (/^\d+$/.test(txt)) return true
15
- if (/^[\d,]{1,10}$/.test(txt)) return true
16
- return false
17
- }
18
-
19
- function normalizeElement(e: UIElement) {
20
- return {
21
- type: (e.type || '').toString(),
22
- resourceId: (e.resourceId || '').toString(),
23
- text: typeof e.text === 'string' ? (isDynamicText(e.text) ? '' : e.text.trim().toLowerCase()) : '',
24
- contentDesc: (e.contentDescription || '').toString(),
25
- bounds: Array.isArray(e.bounds) ? e.bounds.slice(0,4).map((n:any)=>Number(n)||0) : [0,0,0,0]
26
- }
27
- }
28
-
29
- export function computeScreenFingerprint(tree: GetUITreeResponse, current: GetCurrentScreenResponse | null, platform: 'android' | 'ios', limit: number = 50): { fingerprint: string | null; activity?: string; error?: string } {
30
- try {
31
- if (!tree || (tree as any).error) return { fingerprint: null, error: (tree as any).error }
32
-
33
- const activity = current && (current.activity || (current as any).shortActivity) ? (current.activity || (current as any).shortActivity) : ''
34
-
35
- const candidates: UIElement[] = (tree.elements || []).filter(e => {
36
- if (!e) return false
37
- if (!e.visible) return false
38
- const hasStableText = typeof e.text === 'string' && e.text.trim().length > 0
39
- const hasResource = !!e.resourceId
40
- const interactable = !!e.clickable || !!e.enabled
41
- const structuralList = platform === 'android' ? ANDROID_STRUCTURAL_TYPES : IOS_STRUCTURAL_TYPES
42
- const structurallySignificant = hasStableText || hasResource || structuralList.includes(e.type || '')
43
- return interactable || structurallySignificant
44
- }) as UIElement[]
45
-
46
- const normalized = candidates.map(normalizeElement)
47
-
48
- const filteredNormalized = normalized.filter(e => (e.text && e.text.length > 0) || (e.resourceId && e.resourceId.length > 0) || (e.contentDesc && e.contentDesc.length > 0))
49
-
50
- filteredNormalized.sort((a,b) => {
51
- const ay = (a.bounds && a.bounds[1]) || 0
52
- const by = (b.bounds && b.bounds[1]) || 0
53
- if (ay !== by) return ay - by
54
- const ax = (a.bounds && a.bounds[0]) || 0
55
- const bx = (b.bounds && b.bounds[0]) || 0
56
- return ax - bx
57
- })
58
-
59
- const limited = filteredNormalized.slice(0, Math.max(0, limit))
60
-
61
- const payload = {
62
- activity: platform === 'android' ? (activity || '') : '',
63
- resolution: (tree as any).resolution || { width: 0, height: 0 },
64
- elements: limited.map(e => ({ type: e.type, resourceId: e.resourceId, text: e.text, contentDesc: e.contentDesc }))
65
- }
66
-
67
- const combined = JSON.stringify(payload)
68
- const hash = crypto.createHash('sha256').update(combined).digest('hex')
69
- return { fingerprint: hash, activity: activity }
70
- } catch (e) {
71
- return { fingerprint: null, error: e instanceof Error ? e.message : String(e) }
72
- }
73
- }