mobile-debug-mcp 0.17.0 → 0.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -24,6 +24,8 @@ A minimal, secure MCP server for AI-assisted mobile development. Build, install,
24
24
  }
25
25
  }
26
26
  ```
27
+ You will need to add ADB_PATH for Android and XCRUN_PATH and IDB_PATH for iOS.
28
+
27
29
  ## Usage
28
30
 
29
31
  Example:
@@ -3,6 +3,7 @@ import { iOSInteract } from './ios.js';
3
3
  export { AndroidInteract, iOSInteract };
4
4
  import { resolveTargetDevice } from '../utils/resolve-device.js';
5
5
  import { ToolsObserve } from '../observe/index.js';
6
+ const STABLE_IDLE_MS = 1000;
6
7
  export class ToolsInteract {
7
8
  static async getInteractionService(platform, deviceId) {
8
9
  const effectivePlatform = platform || 'android';
@@ -35,6 +36,191 @@ export class ToolsInteract {
35
36
  const { interact, resolved } = await ToolsInteract.getInteractionService(platform, deviceId);
36
37
  return await interact.scrollToElement(selector, direction, maxScrolls, scrollAmount, resolved.id);
37
38
  }
39
+ static async findElementHandler({ query, exact = false, timeoutMs = 3000, platform, deviceId }) {
40
+ // Try to use observe layer to fetch the current UI tree and perform a fast semantic search
41
+ const start = Date.now();
42
+ const deadline = start + timeoutMs;
43
+ const normalize = (s) => (s === null || s === undefined) ? '' : String(s).toLowerCase().trim();
44
+ const q = normalize(query);
45
+ if (!q)
46
+ return { found: false, error: 'Empty query' };
47
+ let best = null;
48
+ let bestScore = 0;
49
+ const scoreElement = (el) => {
50
+ if (!el || !el.visible)
51
+ return 0;
52
+ const bounds = el.bounds || [0, 0, 0, 0];
53
+ if (!Array.isArray(bounds) || bounds.length < 4)
54
+ return 0;
55
+ const [l, t, r, b] = bounds;
56
+ if (r <= l || b <= t)
57
+ return 0;
58
+ // Do not early-return on non-interactable elements — score them so we can locate their clickable ancestor later
59
+ const interactable = !!(el.clickable || el.enabled || el.focusable);
60
+ const text = normalize(el.text ?? el.label ?? el.value ?? '');
61
+ const content = normalize(el.contentDescription ?? el.contentDesc ?? el.accessibilityLabel ?? '');
62
+ const resourceId = normalize(el.resourceId ?? el.resourceID ?? el.id ?? '');
63
+ const className = normalize(el.type ?? el.class ?? '');
64
+ let score = 0;
65
+ if (exact) {
66
+ if (text && text === q)
67
+ score = 1.0;
68
+ else if (content && content === q)
69
+ score = 0.95;
70
+ }
71
+ else {
72
+ if (text && text === q)
73
+ score = 1.0;
74
+ else if (content && content === q)
75
+ score = 0.95;
76
+ else if (text && text.includes(q))
77
+ score = 0.6;
78
+ else if (content && content.includes(q))
79
+ score = 0.55;
80
+ else if (resourceId && resourceId.includes(q))
81
+ score = 0.7;
82
+ else if (className && className.includes(q))
83
+ score = 0.3;
84
+ }
85
+ if (score > 0 && interactable)
86
+ score += 0.05;
87
+ return score;
88
+ };
89
+ while (Date.now() <= deadline) {
90
+ try {
91
+ const tree = await ToolsObserve.getUITreeHandler({ platform, deviceId });
92
+ if (tree && Array.isArray(tree.elements)) {
93
+ const elements = tree.elements;
94
+ for (let i = 0; i < elements.length; i++) {
95
+ const el = elements[i];
96
+ try {
97
+ const s = scoreElement(el);
98
+ const interactable = !!(el.clickable || el.enabled || el.focusable);
99
+ if (s > bestScore) {
100
+ bestScore = s;
101
+ best = el;
102
+ if (best) {
103
+ best._index = i;
104
+ best._interactable = interactable;
105
+ }
106
+ }
107
+ if (bestScore >= 0.95)
108
+ break;
109
+ }
110
+ catch (e) {
111
+ console.error('Error scoring element:', e);
112
+ }
113
+ }
114
+ if (bestScore >= 0.95)
115
+ break;
116
+ }
117
+ }
118
+ catch (e) {
119
+ console.error('Error fetching UI tree:', e);
120
+ }
121
+ if (Date.now() > deadline)
122
+ break;
123
+ await new Promise(r => setTimeout(r, 100));
124
+ }
125
+ if (!best)
126
+ return { found: false, error: 'Element not found' };
127
+ // If the best match is not interactable, try to resolve an actionable ancestor.
128
+ try {
129
+ const tree = await ToolsObserve.getUITreeHandler({ platform, deviceId });
130
+ const elements = (tree && Array.isArray(tree.elements)) ? tree.elements : [];
131
+ let chosen = best;
132
+ const childBounds = Array.isArray(chosen?.bounds) ? chosen.bounds : null;
133
+ // Strategy 1: if parentId references an index, climb that chain
134
+ let resolvedAncestor = null;
135
+ if (childBounds && (chosen.parentId !== undefined && chosen.parentId !== null)) {
136
+ let cur = chosen;
137
+ let safety = 0;
138
+ while (cur && safety < 20 && !(cur.clickable || cur.focusable) && (cur.parentId !== undefined && cur.parentId !== null)) {
139
+ let pid = cur.parentId;
140
+ let idx = null;
141
+ if (typeof pid === 'number')
142
+ idx = pid;
143
+ else if (typeof pid === 'string' && /^\d+$/.test(pid))
144
+ idx = Number(pid);
145
+ // If parentId is not an index, try to find by matching resourceId or id field
146
+ if (idx !== null && elements[idx]) {
147
+ cur = elements[idx];
148
+ if (cur && (cur.clickable || cur.enabled || cur.focusable)) {
149
+ resolvedAncestor = cur;
150
+ break;
151
+ }
152
+ }
153
+ else if (typeof pid === 'string') {
154
+ // fallback: search elements for matching resourceId or id
155
+ const found = elements.find((el) => (el.resourceId === pid || el.id === pid));
156
+ if (found) {
157
+ cur = found;
158
+ if (cur && (cur.clickable || cur.enabled || cur.focusable)) {
159
+ resolvedAncestor = cur;
160
+ break;
161
+ }
162
+ // otherwise continue climbing if this found element has its own parentId
163
+ }
164
+ else {
165
+ break;
166
+ }
167
+ }
168
+ else {
169
+ break;
170
+ }
171
+ safety++;
172
+ }
173
+ }
174
+ // Strategy 2: fallback - find a clickable element whose bounds fully contain the child's bounds
175
+ if (!resolvedAncestor && childBounds) {
176
+ const [cl, ct, cr, cb] = childBounds;
177
+ // find candidates that are clickable and contain the child bounds
178
+ const candidates = elements.filter((el) => el && (el.clickable || el.focusable) && Array.isArray(el.bounds) && el.bounds.length >= 4).map((el) => ({ el, bounds: el.bounds }));
179
+ let bestCandidate = null;
180
+ let bestCandidateArea = Infinity;
181
+ for (const c of candidates) {
182
+ const [pl, pt, pr, pb] = c.bounds;
183
+ if (pl <= cl && pt <= ct && pr >= cr && pb >= cb) {
184
+ const area = (pr - pl) * (pb - pt);
185
+ if (area < bestCandidateArea) {
186
+ bestCandidateArea = area;
187
+ bestCandidate = c.el;
188
+ }
189
+ }
190
+ }
191
+ if (bestCandidate)
192
+ resolvedAncestor = bestCandidate;
193
+ }
194
+ if (resolvedAncestor) {
195
+ best = resolvedAncestor;
196
+ // small score bump to reflect actionability
197
+ bestScore = Math.min(1, bestScore + 0.02);
198
+ }
199
+ }
200
+ catch (e) {
201
+ console.error('Error resolving ancestor:', e);
202
+ }
203
+ if (!best)
204
+ return { found: false, error: 'Element not found' };
205
+ const boundsObj = Array.isArray(best.bounds) ? { left: best.bounds[0], top: best.bounds[1], right: best.bounds[2], bottom: best.bounds[3] } : null;
206
+ const tapCoordinates = boundsObj ? { x: Math.floor((boundsObj.left + boundsObj.right) / 2), y: Math.floor((boundsObj.top + boundsObj.bottom) / 2) } : null;
207
+ const outEl = {
208
+ text: best.text ?? null,
209
+ resourceId: best.resourceId ?? null,
210
+ contentDesc: best.contentDescription ?? best.contentDesc ?? null,
211
+ class: best.type ?? best.class ?? null,
212
+ bounds: boundsObj,
213
+ clickable: !!best.clickable,
214
+ enabled: !!best.enabled,
215
+ tapCoordinates,
216
+ telemetry: {
217
+ matchedIndex: best?._index ?? null,
218
+ matchedInteractable: !!best?._interactable
219
+ }
220
+ };
221
+ const scoreVal = Math.min(1, Number(bestScore.toFixed(3)));
222
+ return { found: true, element: outEl, score: scoreVal, confidence: scoreVal };
223
+ }
38
224
  static async waitForScreenChangeHandler({ platform, previousFingerprint, timeoutMs = 5000, pollIntervalMs = 300, deviceId }) {
39
225
  const start = Date.now();
40
226
  let lastFingerprint = null;
@@ -60,17 +246,180 @@ export class ToolsInteract {
60
246
  lastFingerprint = confirmFp;
61
247
  continue;
62
248
  }
63
- catch {
64
- // ignore and continue polling
249
+ catch (e) {
250
+ console.error('Error confirming fingerprint:', e);
65
251
  continue;
66
252
  }
67
253
  }
68
254
  }
69
- catch {
70
- // ignore transient errors
255
+ catch (e) {
256
+ console.error('Error getting screen fingerprint:', e);
71
257
  }
72
258
  await new Promise(resolve => setTimeout(resolve, pollIntervalMs));
73
259
  }
74
260
  return { success: false, reason: 'timeout', lastFingerprint, elapsedMs: Date.now() - start };
75
261
  }
262
+ static async observeUntilHandler({ type, query, timeoutMs = 5000, pollIntervalMs = 200, platform, deviceId }) {
263
+ const start = Date.now();
264
+ const deadline = start + (timeoutMs || 0);
265
+ const q = (query === null || query === undefined) ? '' : String(query);
266
+ // Baseline state
267
+ let initialFingerprint = null;
268
+ try {
269
+ const fpRes = await ToolsObserve.getScreenFingerprintHandler({ platform, deviceId });
270
+ initialFingerprint = fpRes?.fingerprint ?? null;
271
+ }
272
+ catch (err) {
273
+ console.error('observeUntil: error getting initial fingerprint', err);
274
+ initialFingerprint = null;
275
+ }
276
+ // For logs, capture a baseline snapshot (count or last line) to avoid matching historical lines
277
+ let baselineLastLine = null;
278
+ try {
279
+ const gl = await ToolsObserve.getLogsHandler({ platform, deviceId, lines: 200 });
280
+ const logsArr = Array.isArray(gl.logs) ? gl.logs : [];
281
+ baselineLastLine = logsArr.length ? logsArr[logsArr.length - 1] : null;
282
+ }
283
+ catch (err) {
284
+ // non-fatal but surface warning to aid debugging
285
+ try {
286
+ console.warn('observeUntil: failed to get baseline logs (non-fatal):', err instanceof Error ? err.message : String(err));
287
+ }
288
+ catch { }
289
+ }
290
+ let lastChangeAt = Date.now();
291
+ let prevFingerprint = initialFingerprint;
292
+ const sleep = (ms) => new Promise(r => setTimeout(r, ms));
293
+ // Telemetry
294
+ let pollCount = 0;
295
+ let timeToMatch = null;
296
+ let matchSource = null;
297
+ while (Date.now() <= deadline) {
298
+ pollCount++;
299
+ try {
300
+ if (type === 'ui') {
301
+ // fast findElement with short timeout to avoid blocking
302
+ try {
303
+ const found = await ToolsInteract.findElementHandler({ query: q, exact: false, timeoutMs: Math.min(500, timeoutMs || 500), platform, deviceId });
304
+ if (found && found.found) {
305
+ timeToMatch = Date.now() - start;
306
+ // determine matchSource heuristics
307
+ const el = found.element || {};
308
+ if (el && el.resourceId && String(el.resourceId).toLowerCase().includes(q.toLowerCase()))
309
+ matchSource = 'ui-resourceId';
310
+ else if (el && el.text && String(el.text).toLowerCase() === q.toLowerCase())
311
+ matchSource = 'ui-exact';
312
+ else
313
+ matchSource = 'ui-partial';
314
+ return { success: true, type: 'ui', matched: true, details: `UI element matched '${q}'`, timestamp: Date.now(), element: found.element, telemetry: { pollCount, timeToMatch, elapsedMs: Date.now() - start, matchSource } };
315
+ }
316
+ }
317
+ catch (err) {
318
+ console.error('observeUntil(ui) find error:', err);
319
+ }
320
+ }
321
+ else if (type === 'log') {
322
+ try {
323
+ // Try reading from active stream first
324
+ const stream = await ToolsObserve.readLogStreamHandler({ platform, sessionId: 'default', limit: 200 });
325
+ const entries = (stream && Array.isArray(stream.entries)) ? stream.entries : [];
326
+ for (const ent of entries) {
327
+ const msg = ent && (ent.message || ent.msg || ent) ? (ent.message || ent.msg || ent) : '';
328
+ if (q && String(msg).includes(q)) {
329
+ timeToMatch = Date.now() - start;
330
+ matchSource = 'log-stream';
331
+ return { success: true, type: 'log', matched: true, details: `Log matched '${q}'`, timestamp: Date.now(), log: { message: msg, raw: ent }, telemetry: { pollCount, timeToMatch, elapsedMs: Date.now() - start, matchSource } };
332
+ }
333
+ }
334
+ // Fallback to snapshot logs
335
+ const gl = await ToolsObserve.getLogsHandler({ platform, deviceId, lines: 200 });
336
+ const logsArr = Array.isArray(gl && gl.logs) ? gl.logs : [];
337
+ // Only consider new lines after baselineLastLine when possible
338
+ let startIndex = 0;
339
+ if (baselineLastLine) {
340
+ const idx = logsArr.lastIndexOf(baselineLastLine);
341
+ startIndex = idx >= 0 ? idx + 1 : 0;
342
+ }
343
+ for (let i = startIndex; i < logsArr.length; i++) {
344
+ const line = logsArr[i];
345
+ if (q && String(line).includes(q)) {
346
+ timeToMatch = Date.now() - start;
347
+ matchSource = 'log-snapshot';
348
+ return { success: true, type: 'log', matched: true, details: `Log matched '${q}'`, timestamp: Date.now(), log: { message: line }, telemetry: { pollCount, timeToMatch, elapsedMs: Date.now() - start, matchSource } };
349
+ }
350
+ }
351
+ }
352
+ catch (err) {
353
+ console.error('observeUntil(log) error:', err);
354
+ }
355
+ }
356
+ else if (type === 'screen') {
357
+ try {
358
+ const fpRes = await ToolsObserve.getScreenFingerprintHandler({ platform, deviceId });
359
+ const fp = fpRes?.fingerprint ?? null;
360
+ if (fp !== null && fp !== undefined && fp !== initialFingerprint) {
361
+ if (q) {
362
+ // optionally validate query against new screen context
363
+ try {
364
+ const found = await ToolsInteract.findElementHandler({ query: q, exact: false, timeoutMs: Math.min(500, timeoutMs || 500), platform, deviceId });
365
+ if (found && found.found) {
366
+ timeToMatch = Date.now() - start;
367
+ matchSource = 'screen-validated-ui';
368
+ return { success: true, type: 'screen', matched: true, details: `Screen changed and query matched on new screen`, timestamp: Date.now(), newFingerprint: fp, element: found.element, telemetry: { pollCount, timeToMatch, elapsedMs: Date.now() - start, matchSource } };
369
+ }
370
+ }
371
+ catch (err) {
372
+ console.error('observeUntil(screen) find error:', err);
373
+ }
374
+ // If query provided but not matched yet, continue polling until timeout
375
+ }
376
+ else {
377
+ timeToMatch = Date.now() - start;
378
+ matchSource = 'screen-fingerprint';
379
+ return { success: true, type: 'screen', matched: true, details: 'Screen fingerprint changed', timestamp: Date.now(), newFingerprint: fp, telemetry: { pollCount, timeToMatch, elapsedMs: Date.now() - start, matchSource } };
380
+ }
381
+ }
382
+ }
383
+ catch (err) {
384
+ console.error('observeUntil(screen) error:', err);
385
+ }
386
+ }
387
+ else if (type === 'idle') {
388
+ try {
389
+ const fpRes = await ToolsObserve.getScreenFingerprintHandler({ platform, deviceId });
390
+ const fp = fpRes?.fingerprint ?? null;
391
+ if (fp !== prevFingerprint) {
392
+ prevFingerprint = fp;
393
+ lastChangeAt = Date.now();
394
+ }
395
+ else {
396
+ if (Date.now() - lastChangeAt >= STABLE_IDLE_MS) {
397
+ timeToMatch = Date.now() - start;
398
+ matchSource = 'idle-stable';
399
+ return { success: true, type: 'idle', matched: true, details: `UI stable for ${STABLE_IDLE_MS}ms`, timestamp: Date.now(), fingerprint: fp, telemetry: { pollCount, timeToMatch, elapsedMs: Date.now() - start, matchSource } };
400
+ }
401
+ }
402
+ }
403
+ catch (err) {
404
+ console.error('observeUntil(idle) error:', err);
405
+ }
406
+ }
407
+ }
408
+ catch (err) {
409
+ console.error('observeUntil: unexpected error', err);
410
+ }
411
+ // Respect poll interval and avoid tight loop
412
+ await sleep(pollIntervalMs || 200);
413
+ }
414
+ // On timeout, capture a failure snapshot to aid debugging (best-effort)
415
+ let snapshot = null;
416
+ try {
417
+ snapshot = await ToolsObserve.captureDebugSnapshotHandler({ reason: `observe_until timeout for ${type}`, includeLogs: true, platform, deviceId });
418
+ }
419
+ catch (err) {
420
+ snapshot = { error: err instanceof Error ? err.message : String(err) };
421
+ }
422
+ const elapsed = Date.now() - start;
423
+ return { success: false, error: 'Timeout waiting for condition', type, timeoutMs, telemetry: { pollCount, elapsedMs: elapsed, matchSource: null }, snapshot };
424
+ }
76
425
  }
package/dist/server.js CHANGED
@@ -332,6 +332,21 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
332
332
  required: ["platform", "text"]
333
333
  }
334
334
  },
335
+ {
336
+ name: "find_element",
337
+ description: "Find a UI element by semantic query (text, content-desc, resource-id, class). Returns best match.",
338
+ inputSchema: {
339
+ type: "object",
340
+ properties: {
341
+ query: { type: "string", description: "Search query (text or label)" },
342
+ exact: { type: "boolean", description: "Require exact match (true/false)", default: false },
343
+ timeoutMs: { type: "number", description: "Timeout in ms to keep searching", default: 3000 },
344
+ platform: { type: "string", enum: ["android", "ios"], description: "Optional platform override" },
345
+ deviceId: { type: "string", description: "Optional device serial/udid" }
346
+ },
347
+ required: ["query"]
348
+ }
349
+ },
335
350
  {
336
351
  name: "tap",
337
352
  description: "Simulate a finger tap on the device screen at specific coordinates.",
@@ -602,6 +617,11 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
602
617
  const res = await ToolsInteract.waitForElementHandler({ platform, text, timeout, deviceId });
603
618
  return wrapResponse(res);
604
619
  }
620
+ if (name === "find_element") {
621
+ const { query, exact = false, timeoutMs = 3000, platform, deviceId } = (args || {});
622
+ const res = await ToolsInteract.findElementHandler({ query, exact, timeoutMs, platform, deviceId });
623
+ return wrapResponse(res);
624
+ }
605
625
  if (name === "tap") {
606
626
  const { platform, x, y, deviceId } = (args || {});
607
627
  const res = await ToolsInteract.tapHandler({ platform, x, y, deviceId });
package/docs/CHANGELOG.md CHANGED
@@ -2,6 +2,15 @@
2
2
 
3
3
  All notable changes to the **Mobile Debug MCP** project will be documented in this file.
4
4
 
5
+ ## [0.19.0]
6
+
7
+ - Added `observe_until` interaction tool: waits for UI, log, screen fingerprint or idle conditions with configurable polling and timeout. Returns rich details on match (element info, log line, new fingerprint).
8
+
9
+
10
+ ## [0.18.0]
11
+ - Added `find_element` interact tool: semantic UI element search with actionable tap coordinates and lightweight telemetry. The tool searches the UI tree for the best match by text, content description, resource-id, and class, scores candidates (exact, partial, resource-id), and returns the most relevant visible element. When a matching node is non-interactable (e.g., Compose Text child), the tool locates a clickable ancestor (parent or containing element) and returns actionable tapCoordinates (x,y). The handler also returns a `confidence` value and `telemetry` metadata (matchedIndex, matchedInteractable) to aid agent decision-making and logging. Implemented as `ToolsInteract.findElementHandler` and covered by unit tests.
12
+
13
+
5
14
  ## [0.17.0]
6
15
  - Added `capture_debug_snapshot` observe tool: captures a full debugging snapshot including screenshot (base64), UI tree, current activity (Android), screen fingerprint, and recent logs (prefers active log stream, falls back to snapshot logs). Returns a single structured JSON object and includes per-part error fields for partial failures. Implemented as `ToolsObserve.captureDebugSnapshotHandler` and registered in the server.
7
16
 
@@ -101,3 +101,105 @@ Notes:
101
101
  - Default `timeoutMs` is 5000ms and default `pollIntervalMs` is 300ms; callers may override these.
102
102
  - Implemented as an interact-level tool and delegates platform-specific fingerprint calculation to the observe layer (`get_screen_fingerprint`).
103
103
 
104
+ ---
105
+
106
+ ## find_element
107
+
108
+ Purpose:
109
+
110
+ Locate a UI element on the current screen using semantic matching and return an actionable element descriptor (including tap coordinates) and confidence telemetry.
111
+
112
+ Input:
113
+
114
+ ```json
115
+ { "query": "string", "exact": false, "timeoutMs": 3000, "platform": "android|ios", "deviceId": "optional device id" }
116
+ ```
117
+
118
+ Behaviour:
119
+
120
+ - Fetches the current UI tree (get_ui_tree) and scores visible elements using: text, content description, resource-id, and class name.
121
+ - Normalises strings (lowercase, trimmed). If exact=true require exact match; otherwise allow partial matches (contains) and resource-id/class matches.
122
+ - Considers element bounds and visibility; scores non-interactable children as matches and attempts to resolve a clickable ancestor (parent index or containing clickable element) to produce an actionable element.
123
+ - Retries until timeoutMs; stops early for high-confidence matches.
124
+ - Does not block on long operations and returns partial results where appropriate.
125
+
126
+ Output:
127
+
128
+ ```json
129
+ {
130
+ "found": true,
131
+ "element": {
132
+ "text": "Login",
133
+ "resourceId": "com.example:id/login",
134
+ "contentDesc": null,
135
+ "class": "android.widget.Button",
136
+ "bounds": { "left":0, "top":0, "right":100, "bottom":50 },
137
+ "clickable": true,
138
+ "enabled": true,
139
+ "tapCoordinates": { "x":50, "y":25 },
140
+ "telemetry": { "matchedIndex": 3, "matchedInteractable": true }
141
+ },
142
+ "score": 1.0,
143
+ "confidence": 1.0
144
+ }
145
+ ```
146
+
147
+ Notes:
148
+
149
+ - `tapCoordinates` are the recommended center point to use for `tap` calls.
150
+ - `confidence` mirrors the internal scoring (0..1) and is suitable for telemetry or logging to decide whether to proceed with an automated action.
151
+ - The tool favours actionable (clickable/focusable) targets; when a matching node is not directly actionable, it finds the smallest containing clickable ancestor.
152
+ - Unit tests for edge cases (parent-clickable child-text, resource-id matches, fuzzy matching) are under `test/observe/unit/find_element.test.ts`.
153
+
154
+ ---
155
+
156
+ ## observe_until
157
+
158
+ Purpose:
159
+ - Wait for a condition to occur on the device: UI element appearance, a log line, a screen fingerprint change, or an idle/stable screen state.
160
+
161
+ Supported types and behavior:
162
+ - ui: Delegates to `find_element` to perform a semantic search of the UI tree. Returns the matched element descriptor (including tapCoordinates) when found.
163
+ - log: Reads the active log stream (via `start_log_stream`/`readLogStreamHandler`) and falls back to a snapshot of recent logs (`getLogsHandler`). Matches when the query substring appears in a new log line after a captured baseline.
164
+ - screen: Compares screen fingerprints (visual checks) against an initial baseline and returns when fingerprint changes. If `query` is provided it will attempt a `find_element` on the new screen to validate the expected content.
165
+ - idle: Waits until the screen fingerprint remains stable for a short stability window (default 1000ms).
166
+
167
+ Input (ToolsInteract.observeUntilHandler):
168
+ ```
169
+ { "type": "ui|log|screen|idle", "query": "optional string", "timeoutMs": 5000, "pollIntervalMs": 200, "platform": "android|ios", "deviceId": "optional device id" }
170
+ ```
171
+
172
+ Success response highlights:
173
+ - success: true
174
+ - type: requested type
175
+ - matched: true
176
+ - details: human-friendly explanation
177
+ - timestamp: epoch ms
178
+ - element: (for ui/screen when matched) actionable element metadata with tapCoordinates
179
+ - log: (for log) matched log message and raw entry
180
+ - newFingerprint: (for screen) new fingerprint value
181
+
182
+ Failure/timeout response:
183
+ - success: false
184
+ - error or reason: explanation
185
+ - type: requested type
186
+ - timeoutMs: value used
187
+
188
+ Notes & tips:
189
+ - Defaults (timeoutMs=5000, pollIntervalMs=200) balance responsiveness with device query overhead; adjust in tests or scripts as needed.
190
+ - For UI-sensitive flows prefer type='ui' rather than relying solely on visual fingerprint changes, as some UI updates don't alter the fingerprint.
191
+
192
+ Tests:
193
+ - Unit: `test/interact/unit/observe_until.test.ts`
194
+ - Device runner: `test/interact/device/observe_until_device.ts` (requires devices/emulators and adb/xcrun in PATH)
195
+
196
+ Example:
197
+ ```
198
+ // Wait up to 5s for a button labeled "Generate Session" on Android
199
+ ToolsInteract.observeUntilHandler({ type: 'ui', query: 'Generate Session', timeoutMs: 5000, platform: 'android' })
200
+ ```
201
+
202
+ Troubleshooting:
203
+ - If observe_until(log) never matches, ensure log streaming is started for the target package and baseline logs captured correctly.
204
+ - If observe_until(screen) times out despite visible UI change, try type='ui' to validate content-level changes.
205
+
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mobile-debug-mcp",
3
- "version": "0.17.0",
3
+ "version": "0.19.0",
4
4
  "description": "MCP server for mobile app debugging (Android + iOS), with focus on security and reliability",
5
5
  "type": "module",
6
6
  "bin": {
@@ -7,6 +7,30 @@ import { ToolsObserve } from '../observe/index.js'
7
7
 
8
8
  interface ScreenFingerprintResponse { fingerprint: string | null }
9
9
 
10
+ interface UiElement {
11
+ text?: string | null
12
+ label?: string | null
13
+ value?: string | null
14
+ contentDescription?: string | null
15
+ contentDesc?: string | null
16
+ accessibilityLabel?: string | null
17
+ resourceId?: string | null
18
+ resourceID?: string | null
19
+ id?: string | null
20
+ type?: string | null
21
+ class?: string | null
22
+ bounds?: number[] | null
23
+ clickable?: boolean
24
+ enabled?: boolean
25
+ focusable?: boolean
26
+ visible?: boolean
27
+ parentId?: number | string | null
28
+ _index?: number
29
+ _interactable?: boolean
30
+ }
31
+
32
+ const STABLE_IDLE_MS = 1000
33
+
10
34
  export class ToolsInteract {
11
35
 
12
36
  private static async getInteractionService(platform?: 'android' | 'ios', deviceId?: string) {
@@ -47,6 +71,160 @@ export class ToolsInteract {
47
71
  return await interact.scrollToElement(selector, direction, maxScrolls, scrollAmount, resolved.id)
48
72
  }
49
73
 
74
+ static async findElementHandler({ query, exact = false, timeoutMs = 3000, platform, deviceId }: { query: string, exact?: boolean, timeoutMs?: number, platform?: 'android' | 'ios', deviceId?: string }) {
75
+ // Try to use observe layer to fetch the current UI tree and perform a fast semantic search
76
+ const start = Date.now()
77
+ const deadline = start + timeoutMs
78
+ const normalize = (s: any) => (s === null || s === undefined) ? '' : String(s).toLowerCase().trim()
79
+
80
+ const q = normalize(query)
81
+ if (!q) return { found: false, error: 'Empty query' }
82
+
83
+ let best: UiElement | null = null
84
+ let bestScore = 0
85
+
86
+ const scoreElement = (el: UiElement | null) => {
87
+ if (!el || !el.visible) return 0
88
+ const bounds = el.bounds || [0,0,0,0]
89
+ if (!Array.isArray(bounds) || bounds.length < 4) return 0
90
+ const [l,t,r,b] = bounds
91
+ if (r <= l || b <= t) return 0
92
+ // Do not early-return on non-interactable elements — score them so we can locate their clickable ancestor later
93
+ const interactable = !!(el.clickable || el.enabled || el.focusable)
94
+
95
+ const text = normalize(el.text ?? el.label ?? el.value ?? '')
96
+ const content = normalize(el.contentDescription ?? el.contentDesc ?? el.accessibilityLabel ?? '')
97
+ const resourceId = normalize(el.resourceId ?? el.resourceID ?? el.id ?? '')
98
+ const className = normalize(el.type ?? el.class ?? '')
99
+
100
+ let score = 0
101
+ if (exact) {
102
+ if (text && text === q) score = 1.0
103
+ else if (content && content === q) score = 0.95
104
+ } else {
105
+ if (text && text === q) score = 1.0
106
+ else if (content && content === q) score = 0.95
107
+ else if (text && text.includes(q)) score = 0.6
108
+ else if (content && content.includes(q)) score = 0.55
109
+ else if (resourceId && resourceId.includes(q)) score = 0.7
110
+ else if (className && className.includes(q)) score = 0.3
111
+ }
112
+ if (score > 0 && interactable) score += 0.05
113
+ return score
114
+ }
115
+
116
+ while (Date.now() <= deadline) {
117
+ try {
118
+ const tree = await ToolsObserve.getUITreeHandler({ platform, deviceId })
119
+ if (tree && Array.isArray((tree as any).elements)) {
120
+ const elements = ((tree as any).elements as UiElement[])
121
+ for (let i = 0; i < elements.length; i++) {
122
+ const el = elements[i]
123
+ try {
124
+ const s = scoreElement(el)
125
+ const interactable = !!(el.clickable || el.enabled || (el as any).focusable)
126
+ if (s > bestScore) {
127
+ bestScore = s
128
+ best = el as UiElement
129
+ if (best) { best._index = i; best._interactable = interactable }
130
+ }
131
+ if (bestScore >= 0.95) break
132
+ } catch (e) { console.error('Error scoring element:', e) }
133
+ }
134
+ if (bestScore >= 0.95) break
135
+ }
136
+ } catch (e) { console.error('Error fetching UI tree:', e) }
137
+ if (Date.now() > deadline) break
138
+ await new Promise(r => setTimeout(r, 100))
139
+ }
140
+
141
+ if (!best) return { found: false, error: 'Element not found' }
142
+
143
+ // If the best match is not interactable, try to resolve an actionable ancestor.
144
+ try {
145
+ const tree = await ToolsObserve.getUITreeHandler({ platform, deviceId }) as any
146
+ const elements = (tree && Array.isArray(tree.elements)) ? (tree.elements as UiElement[]) : []
147
+ let chosen = best as any
148
+ const childBounds = Array.isArray(chosen?.bounds) ? chosen.bounds : null
149
+
150
+ // Strategy 1: if parentId references an index, climb that chain
151
+ let resolvedAncestor: any = null
152
+ if (childBounds && (chosen.parentId !== undefined && chosen.parentId !== null)) {
153
+ let cur = chosen
154
+ let safety = 0
155
+ while (cur && safety < 20 && !(cur.clickable || cur.focusable) && (cur.parentId !== undefined && cur.parentId !== null)) {
156
+ let pid = cur.parentId
157
+ let idx: number | null = null
158
+ if (typeof pid === 'number') idx = pid
159
+ else if (typeof pid === 'string' && /^\d+$/.test(pid)) idx = Number(pid)
160
+ // If parentId is not an index, try to find by matching resourceId or id field
161
+ if (idx !== null && elements[idx]) {
162
+ cur = elements[idx]
163
+ if (cur && (cur.clickable || cur.enabled || cur.focusable)) { resolvedAncestor = cur; break }
164
+ } else if (typeof pid === 'string') {
165
+ // fallback: search elements for matching resourceId or id
166
+ const found = elements.find((el: UiElement)=> (el.resourceId === pid || el.id === pid))
167
+ if (found) {
168
+ cur = found
169
+ if (cur && (cur.clickable || cur.enabled || cur.focusable)) { resolvedAncestor = cur; break }
170
+ // otherwise continue climbing if this found element has its own parentId
171
+ } else {
172
+ break
173
+ }
174
+ } else {
175
+ break
176
+ }
177
+ safety++
178
+ }
179
+ }
180
+
181
+ // Strategy 2: fallback - find a clickable element whose bounds fully contain the child's bounds
182
+ if (!resolvedAncestor && childBounds) {
183
+ const [cl,ct,cr,cb] = childBounds
184
+ // find candidates that are clickable and contain the child bounds
185
+ const candidates = elements.filter((el: UiElement)=> el && (el.clickable || el.focusable) && Array.isArray(el.bounds) && el.bounds!.length>=4).map((el: UiElement)=>({el, bounds: el.bounds! as number[]}))
186
+ let bestCandidate: any = null
187
+ let bestCandidateArea = Infinity
188
+ for (const c of candidates) {
189
+ const [pl,pt,pr,pb] = c.bounds
190
+ if (pl <= cl && pt <= ct && pr >= cr && pb >= cb) {
191
+ const area = (pr-pl) * (pb-pt)
192
+ if (area < bestCandidateArea) { bestCandidateArea = area; bestCandidate = c.el }
193
+ }
194
+ }
195
+ if (bestCandidate) resolvedAncestor = bestCandidate
196
+ }
197
+
198
+ if (resolvedAncestor) {
199
+ best = resolvedAncestor
200
+ // small score bump to reflect actionability
201
+ bestScore = Math.min(1, bestScore + 0.02)
202
+ }
203
+ } catch (e) { console.error('Error resolving ancestor:', e) }
204
+
205
+ if (!best) return { found: false, error: 'Element not found' }
206
+
207
+ const boundsObj = Array.isArray(best.bounds) ? { left: best.bounds[0], top: best.bounds[1], right: best.bounds[2], bottom: best.bounds[3] } : null
208
+ const tapCoordinates = boundsObj ? { x: Math.floor((boundsObj.left + boundsObj.right) / 2), y: Math.floor((boundsObj.top + boundsObj.bottom) / 2) } : null
209
+
210
+ const outEl = {
211
+ text: best.text ?? null,
212
+ resourceId: best.resourceId ?? null,
213
+ contentDesc: best.contentDescription ?? best.contentDesc ?? null,
214
+ class: best.type ?? best.class ?? null,
215
+ bounds: boundsObj,
216
+ clickable: !!best.clickable,
217
+ enabled: !!best.enabled,
218
+ tapCoordinates,
219
+ telemetry: {
220
+ matchedIndex: best?._index ?? null,
221
+ matchedInteractable: !!best?._interactable
222
+ }
223
+ }
224
+ const scoreVal = Math.min(1, Number(bestScore.toFixed(3)))
225
+ return { found: true, element: outEl, score: scoreVal, confidence: scoreVal }
226
+ }
227
+
50
228
  static async waitForScreenChangeHandler({ platform, previousFingerprint, timeoutMs = 5000, pollIntervalMs = 300, deviceId }: { platform?: 'android' | 'ios', previousFingerprint: string, timeoutMs?: number, pollIntervalMs?: number, deviceId?: string }) {
51
229
  const start = Date.now()
52
230
  let lastFingerprint: string | null = null
@@ -74,14 +252,9 @@ export class ToolsInteract {
74
252
  }
75
253
  lastFingerprint = confirmFp
76
254
  continue
77
- } catch {
78
- // ignore and continue polling
79
- continue
80
- }
255
+ } catch (e) { console.error('Error confirming fingerprint:', e); continue }
81
256
  }
82
- } catch {
83
- // ignore transient errors
84
- }
257
+ } catch (e) { console.error('Error getting screen fingerprint:', e) }
85
258
 
86
259
  await new Promise(resolve => setTimeout(resolve, pollIntervalMs))
87
260
  }
@@ -89,4 +262,147 @@ export class ToolsInteract {
89
262
  return { success: false, reason: 'timeout', lastFingerprint, elapsedMs: Date.now() - start }
90
263
  }
91
264
 
265
+ static async observeUntilHandler({ type, query, timeoutMs = 5000, pollIntervalMs = 200, platform, deviceId }: { type: 'ui' | 'log' | 'screen' | 'idle', query?: string, timeoutMs?: number, pollIntervalMs?: number, platform?: 'android' | 'ios', deviceId?: string }) {
266
+ const start = Date.now()
267
+ const deadline = start + (timeoutMs || 0)
268
+ const q = (query === null || query === undefined) ? '' : String(query)
269
+
270
+ // Baseline state
271
+ let initialFingerprint: string | null = null
272
+ try {
273
+ const fpRes = await ToolsObserve.getScreenFingerprintHandler({ platform, deviceId }) as ScreenFingerprintResponse | null
274
+ initialFingerprint = fpRes?.fingerprint ?? null
275
+ } catch (err) { console.error('observeUntil: error getting initial fingerprint', err); initialFingerprint = null }
276
+
277
+ // For logs, capture a baseline snapshot (count or last line) to avoid matching historical lines
278
+ let baselineLastLine: string | null = null
279
+ try {
280
+ const gl = await ToolsObserve.getLogsHandler({ platform, deviceId, lines: 200 })
281
+ const logsArr = Array.isArray((gl as any).logs) ? (gl as any).logs : []
282
+ baselineLastLine = logsArr.length ? logsArr[logsArr.length - 1] : null
283
+ } catch (err) {
284
+ // non-fatal but surface warning to aid debugging
285
+ try { console.warn('observeUntil: failed to get baseline logs (non-fatal):', err instanceof Error ? err.message : String(err)) } catch { }
286
+ }
287
+
288
+
289
+ let lastChangeAt = Date.now()
290
+ let prevFingerprint = initialFingerprint
291
+
292
+ const sleep = (ms: number) => new Promise(r => setTimeout(r, ms))
293
+
294
+ // Telemetry
295
+ let pollCount = 0
296
+ let timeToMatch: number | null = null
297
+ let matchSource: string | null = null
298
+
299
+ while (Date.now() <= deadline) {
300
+ pollCount++
301
+ try {
302
+ if (type === 'ui') {
303
+ // fast findElement with short timeout to avoid blocking
304
+ try {
305
+ const found = await ToolsInteract.findElementHandler({ query: q, exact: false, timeoutMs: Math.min(500, timeoutMs || 500), platform, deviceId })
306
+ if (found && (found as any).found) {
307
+ timeToMatch = Date.now() - start
308
+ // determine matchSource heuristics
309
+ const el = (found as any).element || {}
310
+ if (el && el.resourceId && String(el.resourceId).toLowerCase().includes(q.toLowerCase())) matchSource = 'ui-resourceId'
311
+ else if (el && el.text && String(el.text).toLowerCase() === q.toLowerCase()) matchSource = 'ui-exact'
312
+ else matchSource = 'ui-partial'
313
+
314
+ return { success: true, type: 'ui', matched: true, details: `UI element matched '${q}'`, timestamp: Date.now(), element: (found as any).element, telemetry: { pollCount, timeToMatch, elapsedMs: Date.now() - start, matchSource } }
315
+ }
316
+ } catch (err) { console.error('observeUntil(ui) find error:', err) }
317
+ } else if (type === 'log') {
318
+ try {
319
+ // Try reading from active stream first
320
+ const stream = await ToolsObserve.readLogStreamHandler({ platform, sessionId: 'default', limit: 200 }) as any
321
+ const entries = (stream && Array.isArray(stream.entries)) ? stream.entries : []
322
+ for (const ent of entries) {
323
+ const msg = ent && (ent.message || ent.msg || ent) ? (ent.message || ent.msg || ent) : ''
324
+ if (q && String(msg).includes(q)) {
325
+ timeToMatch = Date.now() - start
326
+ matchSource = 'log-stream'
327
+ return { success: true, type: 'log', matched: true, details: `Log matched '${q}'`, timestamp: Date.now(), log: { message: msg, raw: ent }, telemetry: { pollCount, timeToMatch, elapsedMs: Date.now() - start, matchSource } }
328
+ }
329
+ }
330
+
331
+ // Fallback to snapshot logs
332
+ const gl = await ToolsObserve.getLogsHandler({ platform, deviceId, lines: 200 }) as any
333
+ const logsArr = Array.isArray(gl && gl.logs) ? gl.logs : []
334
+ // Only consider new lines after baselineLastLine when possible
335
+ let startIndex = 0
336
+ if (baselineLastLine) {
337
+ const idx = logsArr.lastIndexOf(baselineLastLine)
338
+ startIndex = idx >= 0 ? idx + 1 : 0
339
+ }
340
+ for (let i = startIndex; i < logsArr.length; i++) {
341
+ const line = logsArr[i]
342
+ if (q && String(line).includes(q)) {
343
+ timeToMatch = Date.now() - start
344
+ matchSource = 'log-snapshot'
345
+ return { success: true, type: 'log', matched: true, details: `Log matched '${q}'`, timestamp: Date.now(), log: { message: line }, telemetry: { pollCount, timeToMatch, elapsedMs: Date.now() - start, matchSource } }
346
+ }
347
+ }
348
+ } catch (err) { console.error('observeUntil(log) error:', err) }
349
+ } else if (type === 'screen') {
350
+ try {
351
+ const fpRes = await ToolsObserve.getScreenFingerprintHandler({ platform, deviceId }) as ScreenFingerprintResponse | null
352
+ const fp = fpRes?.fingerprint ?? null
353
+ if (fp !== null && fp !== undefined && fp !== initialFingerprint) {
354
+ if (q) {
355
+ // optionally validate query against new screen context
356
+ try {
357
+ const found = await ToolsInteract.findElementHandler({ query: q, exact: false, timeoutMs: Math.min(500, timeoutMs || 500), platform, deviceId })
358
+ if (found && (found as any).found) {
359
+ timeToMatch = Date.now() - start
360
+ matchSource = 'screen-validated-ui'
361
+ return { success: true, type: 'screen', matched: true, details: `Screen changed and query matched on new screen`, timestamp: Date.now(), newFingerprint: fp, element: (found as any).element, telemetry: { pollCount, timeToMatch, elapsedMs: Date.now() - start, matchSource } }
362
+ }
363
+ } catch (err) { console.error('observeUntil(screen) find error:', err) }
364
+ // If query provided but not matched yet, continue polling until timeout
365
+ } else {
366
+ timeToMatch = Date.now() - start
367
+ matchSource = 'screen-fingerprint'
368
+ return { success: true, type: 'screen', matched: true, details: 'Screen fingerprint changed', timestamp: Date.now(), newFingerprint: fp, telemetry: { pollCount, timeToMatch, elapsedMs: Date.now() - start, matchSource } }
369
+ }
370
+ }
371
+ } catch (err) { console.error('observeUntil(screen) error:', err) }
372
+ } else if (type === 'idle') {
373
+ try {
374
+ const fpRes = await ToolsObserve.getScreenFingerprintHandler({ platform, deviceId }) as ScreenFingerprintResponse | null
375
+ const fp = fpRes?.fingerprint ?? null
376
+ if (fp !== prevFingerprint) {
377
+ prevFingerprint = fp
378
+ lastChangeAt = Date.now()
379
+ } else {
380
+ if (Date.now() - lastChangeAt >= STABLE_IDLE_MS) {
381
+ timeToMatch = Date.now() - start
382
+ matchSource = 'idle-stable'
383
+ return { success: true, type: 'idle', matched: true, details: `UI stable for ${STABLE_IDLE_MS}ms`, timestamp: Date.now(), fingerprint: fp, telemetry: { pollCount, timeToMatch, elapsedMs: Date.now() - start, matchSource } }
384
+ }
385
+ }
386
+ } catch (err) { console.error('observeUntil(idle) error:', err) }
387
+ }
388
+ } catch (err) {
389
+ console.error('observeUntil: unexpected error', err)
390
+ }
391
+
392
+ // Respect poll interval and avoid tight loop
393
+ await sleep(pollIntervalMs || 200)
394
+ }
395
+
396
+ // On timeout, capture a failure snapshot to aid debugging (best-effort)
397
+ let snapshot: any = null
398
+ try {
399
+ snapshot = await ToolsObserve.captureDebugSnapshotHandler({ reason: `observe_until timeout for ${type}`, includeLogs: true, platform, deviceId })
400
+ } catch (err) {
401
+ snapshot = { error: err instanceof Error ? err.message : String(err) }
402
+ }
403
+
404
+ const elapsed = Date.now() - start
405
+ return { success: false, error: 'Timeout waiting for condition', type, timeoutMs, telemetry: { pollCount, elapsedMs: elapsed, matchSource: null }, snapshot }
406
+ }
407
+
92
408
  }
package/src/server.ts CHANGED
@@ -354,6 +354,21 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
354
354
  required: ["platform", "text"]
355
355
  }
356
356
  },
357
+ {
358
+ name: "find_element",
359
+ description: "Find a UI element by semantic query (text, content-desc, resource-id, class). Returns best match.",
360
+ inputSchema: {
361
+ type: "object",
362
+ properties: {
363
+ query: { type: "string", description: "Search query (text or label)" },
364
+ exact: { type: "boolean", description: "Require exact match (true/false)", default: false },
365
+ timeoutMs: { type: "number", description: "Timeout in ms to keep searching", default: 3000 },
366
+ platform: { type: "string", enum: ["android","ios"], description: "Optional platform override" },
367
+ deviceId: { type: "string", description: "Optional device serial/udid" }
368
+ },
369
+ required: ["query"]
370
+ }
371
+ },
357
372
 
358
373
  {
359
374
  name: "tap",
@@ -647,6 +662,12 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
647
662
  return wrapResponse(res)
648
663
  }
649
664
 
665
+ if (name === "find_element") {
666
+ const { query, exact = false, timeoutMs = 3000, platform, deviceId } = (args || {}) as any
667
+ const res = await ToolsInteract.findElementHandler({ query, exact, timeoutMs, platform, deviceId })
668
+ return wrapResponse(res)
669
+ }
670
+
650
671
  if (name === "tap") {
651
672
  const { platform, x, y, deviceId } = (args || {}) as any
652
673
  const res = await ToolsInteract.tapHandler({ platform, x, y, deviceId })
@@ -0,0 +1,24 @@
1
+ (async function main(){
2
+ try{
3
+ const inter = await import('../../src/interact/index.ts')
4
+ const manage = await import('../../src/manage/index.ts')
5
+ const ToolsInteract = (inter as any).ToolsInteract
6
+ const ToolsManage = (manage as any).ToolsManage
7
+
8
+ const ANDROID_ID = process.env.ANDROID_DEVICE || 'emulator-5554'
9
+ const IOS_UDID = process.env.IOS_DEVICE || '2EFFD8FD-5D09-47CC-95F8-28BBE30AF7ED'
10
+ console.log('Device test starting. Android:', ANDROID_ID, 'iOS:', IOS_UDID)
11
+
12
+ // Start modul8 on both platforms if present
13
+ try { await ToolsManage.startAppHandler({ platform: 'android', appId: 'com.ideamechanics.modul8', deviceId: ANDROID_ID }); console.log('Started android app (if installed)') } catch(e){ console.error('Android start skipped:', e.message || e) }
14
+ try { await ToolsManage.startAppHandler({ platform: 'ios', appId: 'com.ideamechanics.modul8.Modul8', deviceId: IOS_UDID }); console.log('Started ios app (if installed)') } catch(e){ console.error('iOS start skipped:', e.message || e) }
15
+
16
+ // Observe UI for Generate Session on both devices (will timeout if not present)
17
+ const aRes = await ToolsInteract.observeUntilHandler({ type: 'ui', query: 'Generate Session', timeoutMs: 20000, pollIntervalMs: 500, platform: 'android', deviceId: ANDROID_ID })
18
+ console.log('Android observe result:', JSON.stringify(aRes, null, 2))
19
+
20
+ const iRes = await ToolsInteract.observeUntilHandler({ type: 'ui', query: 'Generate Session', timeoutMs: 20000, pollIntervalMs: 500, platform: 'ios', deviceId: IOS_UDID })
21
+ console.log('iOS observe result:', JSON.stringify(iRes, null, 2))
22
+
23
+ } catch (e) { console.error('ERR', e); process.exit(1) }
24
+ })()
@@ -0,0 +1,76 @@
1
+ import { ToolsInteract } from '../../../src/interact/index.js'
2
+ import * as Observe from '../../../src/observe/index.js'
3
+
4
+ async function runTests() {
5
+ console.log('Starting observe_until unit tests...')
6
+
7
+ const origFind = (ToolsInteract as any).findElementHandler
8
+ const origReadLog = (Observe as any).ToolsObserve.readLogStreamHandler
9
+ const origGetLogs = (Observe as any).ToolsObserve.getLogsHandler
10
+ const origGetFp = (Observe as any).ToolsObserve.getScreenFingerprintHandler
11
+ const origResolveObserve = (Observe as any).ToolsObserve.resolveObserve
12
+ const origGetScreenFp = (Observe as any).ToolsObserve.getScreenFingerprintHandler
13
+
14
+ try {
15
+ // Timeout / snapshot case: ensure snapshot captured when condition not met
16
+ const origCapture = (Observe as any).ToolsObserve.captureDebugSnapshotHandler
17
+ ;(Observe as any).ToolsObserve.captureDebugSnapshotHandler = async ({ reason }: any) => ({ reason, fingerprint: 'snap-123', ui_tree: null, logs: [] })
18
+ // make findElement always fail
19
+ (ToolsInteract as any).findElementHandler = async () => ({ found: false })
20
+ const resTimeout = await ToolsInteract.observeUntilHandler({ type: 'ui', query: 'WillNeverExist', timeoutMs: 500, pollIntervalMs: 100, platform: 'android' })
21
+ const okTimeout = resTimeout && !(resTimeout as any).success && (resTimeout as any).snapshot && (resTimeout as any).snapshot.fingerprint === 'snap-123' && (resTimeout as any).telemetry && (resTimeout as any).telemetry.pollCount > 0
22
+ console.log('Timeout Snapshot Test:', okTimeout ? 'PASS' : 'FAIL', JSON.stringify((resTimeout as any).telemetry || {}, null, 2))
23
+ ;(Observe as any).ToolsObserve.captureDebugSnapshotHandler = origCapture
24
+
25
+ // UI condition: findElement returns found on 2nd call
26
+ let calls = 0
27
+ ;(ToolsInteract as any).findElementHandler = async (args) => {
28
+ calls++
29
+ const query = (args && (args.query || args)) || ''
30
+ if (calls >= 2) return { found: true, element: { text: query } }
31
+ return { found: false }
32
+ }
33
+
34
+ const resUi = await ToolsInteract.observeUntilHandler({ type: 'ui', query: 'Generate Session', timeoutMs: 3000, pollIntervalMs: 100, platform: 'android' })
35
+ const okUi = resUi && (resUi as any).success && (resUi as any).telemetry && (resUi as any).telemetry.pollCount > 0 && (resUi as any).telemetry.timeToMatch >= 0
36
+ console.log('UI Test:', okUi ? 'PASS' : 'FAIL', JSON.stringify((resUi as any).telemetry || {}, null, 2))
37
+
38
+ // Log condition: stream empty, snapshot contains matching line
39
+ ;(Observe as any).ToolsObserve.readLogStreamHandler = async () => ({ entries: [ { message: 'nothing' } ] })
40
+ let glCalls = 0
41
+ ;(Observe as any).ToolsObserve.getLogsHandler = async () => {
42
+ glCalls++
43
+ if (glCalls === 1) return { device: {}, logs: ['INFO start'] }
44
+ return { device: {}, logs: ['INFO start', 'ERROR Exception occurred', 'Server: Boom'] }
45
+ }
46
+
47
+ const resLog = await ToolsInteract.observeUntilHandler({ type: 'log', query: 'Server', timeoutMs: 3000, pollIntervalMs: 100, platform: 'android' })
48
+ const okLog = resLog && (resLog as any).success && (resLog as any).telemetry && (resLog as any).telemetry.pollCount > 0 && (resLog as any).telemetry.matchSource === 'log-snapshot'
49
+ console.log('Log Test:', okLog ? 'PASS' : 'FAIL', JSON.stringify((resLog as any).telemetry || {}, null, 2))
50
+
51
+ // Screen condition: fingerprint changes after a few polls
52
+ let seq = ['A', 'A', 'B']
53
+ ;(Observe as any).ToolsObserve.getScreenFingerprintHandler = async () => ({ fingerprint: seq.length ? seq.shift() : null })
54
+ const resScreen = await ToolsInteract.observeUntilHandler({ type: 'screen', timeoutMs: 3000, pollIntervalMs: 100, platform: 'android' })
55
+ const okScreen = resScreen && (resScreen as any).success && (resScreen as any).telemetry && (resScreen as any).telemetry.matchSource === 'screen-fingerprint'
56
+ console.log('Screen Test:', okScreen ? 'PASS' : 'FAIL', JSON.stringify((resScreen as any).telemetry || {}, null, 2))
57
+
58
+ // Idle condition: stable fingerprints observed
59
+ let idleSeq = ['X', 'X', 'X']
60
+ ;(Observe as any).ToolsObserve.getScreenFingerprintHandler = async () => ({ fingerprint: idleSeq.length ? idleSeq.shift() : 'X' })
61
+ const resIdle = await ToolsInteract.observeUntilHandler({ type: 'idle', timeoutMs: 3000, pollIntervalMs: 100, platform: 'android' })
62
+ const okIdle = resIdle && (resIdle as any).success && (resIdle as any).telemetry && (resIdle as any).telemetry.matchSource === 'idle-stable'
63
+ console.log('Idle Test:', okIdle ? 'PASS' : 'FAIL', JSON.stringify((resIdle as any).telemetry || {}, null, 2))
64
+
65
+ } finally {
66
+ ;(ToolsInteract as any).findElementHandler = origFind
67
+ ;(Observe as any).ToolsObserve.readLogStreamHandler = origReadLog
68
+ ;(Observe as any).ToolsObserve.getLogsHandler = origGetLogs
69
+ ;(Observe as any).ToolsObserve.getScreenFingerprintHandler = origGetFp
70
+ ;(Observe as any).ToolsObserve.resolveObserve = origResolveObserve
71
+ ;(Observe as any).ToolsObserve.getScreenFingerprintHandler = origGetScreenFp
72
+ ;(Observe as any).ToolsObserve.getScreenFingerprintHandler = origGetScreenFp
73
+ }
74
+ }
75
+
76
+ runTests().catch(console.error)
@@ -0,0 +1,85 @@
1
+ import { ToolsInteract } from '../../../src/interact/index.js'
2
+ import { ToolsObserve } from '../../../src/observe/index.js'
3
+
4
+ async function run() {
5
+ process.stdout.write('Starting find_element unit tests...\n')
6
+
7
+ const origGetTree = (ToolsObserve as any).getUITreeHandler
8
+
9
+ try {
10
+ // Test 1: exact text match
11
+ (ToolsObserve as any).getUITreeHandler = async () => ({
12
+ device: { platform: 'android', id: 'mock' },
13
+ screen: '',
14
+ resolution: { width: 1080, height: 1920 },
15
+ elements: [
16
+ { text: 'Login', type: 'android.widget.Button', contentDescription: null, clickable: true, enabled: true, visible: true, bounds: [10,10,100,60], resourceId: 'btn_login' },
17
+ { text: 'Cancel', type: 'android.widget.Button', contentDescription: null, clickable: true, enabled: true, visible: true, bounds: [110,10,200,60], resourceId: 'btn_cancel' }
18
+ ]
19
+ })
20
+
21
+ const res1: any = await ToolsInteract.findElementHandler({ query: 'login', exact: true, platform: 'android' })
22
+ process.stdout.write('res1 ' + JSON.stringify(res1, null, 2) + '\n');
23
+ const pass1 = res1.found === true && res1.element && res1.element.resourceId === 'btn_login' && res1.element.tapCoordinates && typeof res1.element.tapCoordinates.x === 'number' && typeof res1.element.tapCoordinates.y === 'number' && typeof res1.confidence === 'number'
24
+ process.stdout.write('Test 1: ' + (pass1 ? 'PASS' : 'FAIL') + '\n');
25
+
26
+ // Test 2: partial match & scoring
27
+ (ToolsObserve as any).getUITreeHandler = async () => ({
28
+ device: { platform: 'android', id: 'mock' },
29
+ screen: '',
30
+ resolution: { width: 1080, height: 1920 },
31
+ elements: [
32
+ { text: 'Sign in', type: 'android.widget.Button', contentDescription: null, clickable: true, enabled: true, visible: true, bounds: [10,10,100,60], resourceId: 'btn_signin' },
33
+ { text: 'Login with Email', type: 'android.widget.Button', contentDescription: null, clickable: true, enabled: true, visible: true, bounds: [110,10,300,60], resourceId: 'btn_login_email' }
34
+ ]
35
+ })
36
+
37
+ const res2: any = await ToolsInteract.findElementHandler({ query: 'login', exact: false, platform: 'android' })
38
+ process.stdout.write('res2 ' + JSON.stringify(res2, null, 2) + '\n');
39
+ const pass2 = res2.found === true && res2.element && res2.element.resourceId === 'btn_login_email' && res2.element.tapCoordinates && typeof res2.element.tapCoordinates.x === 'number' && typeof res2.element.tapCoordinates.y === 'number' && typeof res2.confidence === 'number'
40
+ process.stdout.write('Test 2: ' + (pass2 ? 'PASS' : 'FAIL') + '\n');
41
+
42
+ // Test 3: resourceId match
43
+ (ToolsObserve as any).getUITreeHandler = async () => ({
44
+ device: { platform: 'android', id: 'mock' },
45
+ screen: '',
46
+ resolution: { width: 1080, height: 1920 },
47
+ elements: [
48
+ { text: null, type: 'android.widget.ImageView', contentDescription: null, clickable: false, enabled: true, visible: true, bounds: [0,0,50,50], resourceId: 'icon_login' }
49
+ ]
50
+ })
51
+
52
+ const res3: any = await ToolsInteract.findElementHandler({ query: 'icon_login', exact: false, platform: 'android' })
53
+ process.stdout.write('res3 ' + JSON.stringify(res3, null, 2) + '\n');
54
+ const pass3 = res3.found === true && res3.element && res3.element.resourceId === 'icon_login' && res3.element.tapCoordinates && typeof res3.element.tapCoordinates.x === 'number' && typeof res3.element.tapCoordinates.y === 'number' && typeof res3.confidence === 'number'
55
+ process.stdout.write('Test 3: ' + (pass3 ? 'PASS' : 'FAIL') + '\n');
56
+
57
+ // Test 4: parent-clickable child-text scenario
58
+ (ToolsObserve as any).getUITreeHandler = async () => ({
59
+ device: { platform: 'android', id: 'mock' },
60
+ screen: '',
61
+ resolution: { width: 1080, height: 1920 },
62
+ elements: [
63
+ { text: null, type: 'android.view.View', contentDescription: null, clickable: true, enabled: true, visible: true, bounds: [0,0,400,100], resourceId: 'btn_generate', children: [1] },
64
+ { text: 'Generate Session', type: 'android.widget.TextView', contentDescription: null, clickable: false, enabled: true, visible: true, bounds: [10,10,390,90], resourceId: null, parentId: 0 }
65
+ ]
66
+ })
67
+
68
+ const res4: any = await ToolsInteract.findElementHandler({ query: 'generate', exact: false, platform: 'android', timeoutMs: 300 })
69
+ process.stdout.write('res4 ' + JSON.stringify(res4, null, 2) + '\n');
70
+ const pass4 = res4.found === true && res4.element && res4.element.clickable === true && res4.element.resourceId === 'btn_generate' && res4.element.tapCoordinates && typeof res4.element.tapCoordinates.x === 'number' && typeof res4.element.tapCoordinates.y === 'number' && typeof res4.confidence === 'number'
71
+ process.stdout.write('Test 4: ' + (pass4 ? 'PASS' : 'FAIL') + '\n');
72
+
73
+ // Test 5: not found
74
+ (ToolsObserve as any).getUITreeHandler = async () => ({ device: { platform: 'android', id: 'mock' }, screen: '', resolution: { width: 1080, height: 1920 }, elements: [] })
75
+ const res5: any = await ToolsInteract.findElementHandler({ query: 'nope', exact: false, platform: 'android', timeoutMs: 300 })
76
+ process.stdout.write('res5 ' + JSON.stringify(res5, null, 2) + '\n');
77
+ const pass5 = res5.found === false
78
+ process.stdout.write('Test 5: ' + (pass5 ? 'PASS' : 'FAIL') + '\n');
79
+
80
+ } finally {
81
+ ;(ToolsObserve as any).getUITreeHandler = origGetTree
82
+ }
83
+ }
84
+
85
+ run().catch(console.error)
@@ -12,5 +12,7 @@ import '../manage/unit/detection.test.ts'
12
12
  import '../manage/unit/mcp_disable_autodetect.test.ts'
13
13
  import '../interact/unit/wait_for_screen_change.test.ts'
14
14
  import '../observe/unit/capture_debug_snapshot.test.ts'
15
+ import '../observe/unit/find_element.test.ts'
16
+ import '../interact/unit/observe_until.test.ts'
15
17
 
16
18
  console.log('Unit tests loaded.')