mobile-debug-mcp 0.16.0 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -24,6 +24,8 @@ A minimal, secure MCP server for AI-assisted mobile development. Build, install,
24
24
  }
25
25
  }
26
26
  ```
27
+ You will need to add ADB_PATH for Android and XCRUN_PATH and IDB_PATH for iOS.
28
+
27
29
  ## Usage
28
30
 
29
31
  Example:
@@ -35,6 +35,191 @@ export class ToolsInteract {
35
35
  const { interact, resolved } = await ToolsInteract.getInteractionService(platform, deviceId);
36
36
  return await interact.scrollToElement(selector, direction, maxScrolls, scrollAmount, resolved.id);
37
37
  }
38
+ static async findElementHandler({ query, exact = false, timeoutMs = 3000, platform, deviceId }) {
39
+ // Try to use observe layer to fetch the current UI tree and perform a fast semantic search
40
+ const start = Date.now();
41
+ const deadline = start + timeoutMs;
42
+ const normalize = (s) => (s === null || s === undefined) ? '' : String(s).toLowerCase().trim();
43
+ const q = normalize(query);
44
+ if (!q)
45
+ return { found: false, error: 'Empty query' };
46
+ let best = null;
47
+ let bestScore = 0;
48
+ const scoreElement = (el) => {
49
+ if (!el || !el.visible)
50
+ return 0;
51
+ const bounds = el.bounds || [0, 0, 0, 0];
52
+ if (!Array.isArray(bounds) || bounds.length < 4)
53
+ return 0;
54
+ const [l, t, r, b] = bounds;
55
+ if (r <= l || b <= t)
56
+ return 0;
57
+ // Do not early-return on non-interactable elements — score them so we can locate their clickable ancestor later
58
+ const interactable = !!(el.clickable || el.enabled || el.focusable);
59
+ const text = normalize(el.text ?? el.label ?? el.value ?? '');
60
+ const content = normalize(el.contentDescription ?? el.contentDesc ?? el.accessibilityLabel ?? '');
61
+ const resourceId = normalize(el.resourceId ?? el.resourceID ?? el.id ?? '');
62
+ const className = normalize(el.type ?? el.class ?? '');
63
+ let score = 0;
64
+ if (exact) {
65
+ if (text && text === q)
66
+ score = 1.0;
67
+ else if (content && content === q)
68
+ score = 0.95;
69
+ }
70
+ else {
71
+ if (text && text === q)
72
+ score = 1.0;
73
+ else if (content && content === q)
74
+ score = 0.95;
75
+ else if (text && text.includes(q))
76
+ score = 0.6;
77
+ else if (content && content.includes(q))
78
+ score = 0.55;
79
+ else if (resourceId && resourceId.includes(q))
80
+ score = 0.7;
81
+ else if (className && className.includes(q))
82
+ score = 0.3;
83
+ }
84
+ if (score > 0 && interactable)
85
+ score += 0.05;
86
+ return score;
87
+ };
88
+ while (Date.now() <= deadline) {
89
+ try {
90
+ const tree = await ToolsObserve.getUITreeHandler({ platform, deviceId });
91
+ if (tree && Array.isArray(tree.elements)) {
92
+ const elements = tree.elements;
93
+ for (let i = 0; i < elements.length; i++) {
94
+ const el = elements[i];
95
+ try {
96
+ const s = scoreElement(el);
97
+ const interactable = !!(el.clickable || el.enabled || el.focusable);
98
+ if (s > bestScore) {
99
+ bestScore = s;
100
+ best = el;
101
+ if (best) {
102
+ best._index = i;
103
+ best._interactable = interactable;
104
+ }
105
+ }
106
+ if (bestScore >= 0.95)
107
+ break;
108
+ }
109
+ catch (e) {
110
+ console.error('Error scoring element:', e);
111
+ }
112
+ }
113
+ if (bestScore >= 0.95)
114
+ break;
115
+ }
116
+ }
117
+ catch (e) {
118
+ console.error('Error fetching UI tree:', e);
119
+ }
120
+ if (Date.now() > deadline)
121
+ break;
122
+ await new Promise(r => setTimeout(r, 100));
123
+ }
124
+ if (!best)
125
+ return { found: false, error: 'Element not found' };
126
+ // If the best match is not interactable, try to resolve an actionable ancestor.
127
+ try {
128
+ const tree = await ToolsObserve.getUITreeHandler({ platform, deviceId });
129
+ const elements = (tree && Array.isArray(tree.elements)) ? tree.elements : [];
130
+ let chosen = best;
131
+ const childBounds = Array.isArray(chosen?.bounds) ? chosen.bounds : null;
132
+ // Strategy 1: if parentId references an index, climb that chain
133
+ let resolvedAncestor = null;
134
+ if (childBounds && (chosen.parentId !== undefined && chosen.parentId !== null)) {
135
+ let cur = chosen;
136
+ let safety = 0;
137
+ while (cur && safety < 20 && !(cur.clickable || cur.focusable) && (cur.parentId !== undefined && cur.parentId !== null)) {
138
+ let pid = cur.parentId;
139
+ let idx = null;
140
+ if (typeof pid === 'number')
141
+ idx = pid;
142
+ else if (typeof pid === 'string' && /^\d+$/.test(pid))
143
+ idx = Number(pid);
144
+ // If parentId is not an index, try to find by matching resourceId or id field
145
+ if (idx !== null && elements[idx]) {
146
+ cur = elements[idx];
147
+ if (cur && (cur.clickable || cur.enabled || cur.focusable)) {
148
+ resolvedAncestor = cur;
149
+ break;
150
+ }
151
+ }
152
+ else if (typeof pid === 'string') {
153
+ // fallback: search elements for matching resourceId or id
154
+ const found = elements.find((el) => (el.resourceId === pid || el.id === pid));
155
+ if (found) {
156
+ cur = found;
157
+ if (cur && (cur.clickable || cur.enabled || cur.focusable)) {
158
+ resolvedAncestor = cur;
159
+ break;
160
+ }
161
+ // otherwise continue climbing if this found element has its own parentId
162
+ }
163
+ else {
164
+ break;
165
+ }
166
+ }
167
+ else {
168
+ break;
169
+ }
170
+ safety++;
171
+ }
172
+ }
173
+ // Strategy 2: fallback - find a clickable element whose bounds fully contain the child's bounds
174
+ if (!resolvedAncestor && childBounds) {
175
+ const [cl, ct, cr, cb] = childBounds;
176
+ // find candidates that are clickable and contain the child bounds
177
+ const candidates = elements.filter((el) => el && (el.clickable || el.focusable) && Array.isArray(el.bounds) && el.bounds.length >= 4).map((el) => ({ el, bounds: el.bounds }));
178
+ let bestCandidate = null;
179
+ let bestCandidateArea = Infinity;
180
+ for (const c of candidates) {
181
+ const [pl, pt, pr, pb] = c.bounds;
182
+ if (pl <= cl && pt <= ct && pr >= cr && pb >= cb) {
183
+ const area = (pr - pl) * (pb - pt);
184
+ if (area < bestCandidateArea) {
185
+ bestCandidateArea = area;
186
+ bestCandidate = c.el;
187
+ }
188
+ }
189
+ }
190
+ if (bestCandidate)
191
+ resolvedAncestor = bestCandidate;
192
+ }
193
+ if (resolvedAncestor) {
194
+ best = resolvedAncestor;
195
+ // small score bump to reflect actionability
196
+ bestScore = Math.min(1, bestScore + 0.02);
197
+ }
198
+ }
199
+ catch (e) {
200
+ console.error('Error resolving ancestor:', e);
201
+ }
202
+ if (!best)
203
+ return { found: false, error: 'Element not found' };
204
+ const boundsObj = Array.isArray(best.bounds) ? { left: best.bounds[0], top: best.bounds[1], right: best.bounds[2], bottom: best.bounds[3] } : null;
205
+ const tapCoordinates = boundsObj ? { x: Math.floor((boundsObj.left + boundsObj.right) / 2), y: Math.floor((boundsObj.top + boundsObj.bottom) / 2) } : null;
206
+ const outEl = {
207
+ text: best.text ?? null,
208
+ resourceId: best.resourceId ?? null,
209
+ contentDesc: best.contentDescription ?? best.contentDesc ?? null,
210
+ class: best.type ?? best.class ?? null,
211
+ bounds: boundsObj,
212
+ clickable: !!best.clickable,
213
+ enabled: !!best.enabled,
214
+ tapCoordinates,
215
+ telemetry: {
216
+ matchedIndex: best?._index ?? null,
217
+ matchedInteractable: !!best?._interactable
218
+ }
219
+ };
220
+ const scoreVal = Math.min(1, Number(bestScore.toFixed(3)));
221
+ return { found: true, element: outEl, score: scoreVal, confidence: scoreVal };
222
+ }
38
223
  static async waitForScreenChangeHandler({ platform, previousFingerprint, timeoutMs = 5000, pollIntervalMs = 300, deviceId }) {
39
224
  const start = Date.now();
40
225
  let lastFingerprint = null;
@@ -60,14 +245,14 @@ export class ToolsInteract {
60
245
  lastFingerprint = confirmFp;
61
246
  continue;
62
247
  }
63
- catch {
64
- // ignore and continue polling
248
+ catch (e) {
249
+ console.error('Error confirming fingerprint:', e);
65
250
  continue;
66
251
  }
67
252
  }
68
253
  }
69
- catch {
70
- // ignore transient errors
254
+ catch (e) {
255
+ console.error('Error getting screen fingerprint:', e);
71
256
  }
72
257
  await new Promise(resolve => setTimeout(resolve, pollIntervalMs));
73
258
  }
@@ -82,4 +82,107 @@ export class ToolsObserve {
82
82
  // Both observes implement getScreenFingerprint
83
83
  return await observe.getScreenFingerprint(resolved.id);
84
84
  }
85
+ static async captureDebugSnapshotHandler({ reason, includeLogs = true, logLines = 200, platform, appId, deviceId, sessionId } = {}) {
86
+ const timestamp = Date.now();
87
+ const out = { timestamp, reason: reason || '', activity: null, fingerprint: null, screenshot: null, ui_tree: null, logs: [] };
88
+ // Parallel fetches for performance: screenshot, current screen, fingerprint, ui tree, and log stream/get logs
89
+ const sid = sessionId || 'default';
90
+ const tasks = {
91
+ screenshot: ToolsObserve.captureScreenshotHandler({ platform, deviceId }),
92
+ currentScreen: (!platform || platform === 'android') ? ToolsObserve.getCurrentScreenHandler({ deviceId }) : Promise.resolve(null),
93
+ fingerprint: ToolsObserve.getScreenFingerprintHandler({ platform, deviceId }),
94
+ uiTree: ToolsObserve.getUITreeHandler({ platform, deviceId }),
95
+ readLogStream: includeLogs ? ToolsObserve.readLogStreamHandler({ platform, sessionId: sid, limit: logLines }) : Promise.resolve({ entries: [] }),
96
+ };
97
+ const results = await Promise.allSettled(Object.values(tasks));
98
+ const keys = Object.keys(tasks);
99
+ // Map results back to keys
100
+ for (let i = 0; i < results.length; i++) {
101
+ const key = keys[i];
102
+ const res = results[i];
103
+ if (res.status === 'fulfilled') {
104
+ const val = res.value;
105
+ if (key === 'screenshot') {
106
+ out.screenshot = val && val.screenshot ? val.screenshot : null;
107
+ }
108
+ else if (key === 'currentScreen') {
109
+ out.activity = val && ((val.activity || val.shortActivity)) ? (val.activity || val.shortActivity) : out.activity || '';
110
+ }
111
+ else if (key === 'fingerprint') {
112
+ if (val && val.fingerprint)
113
+ out.fingerprint = val.fingerprint;
114
+ if (val && val.activity)
115
+ out.activity = out.activity || val.activity;
116
+ if (val && val.error)
117
+ out.fingerprint_error = val.error;
118
+ }
119
+ else if (key === 'uiTree') {
120
+ out.ui_tree = val;
121
+ if (val && val.error)
122
+ out.ui_tree_error = val.error;
123
+ }
124
+ else if (key === 'readLogStream') {
125
+ // handle below after evaluating fallback
126
+ // temporarily attach to out._streamEntries
127
+ out._streamEntries = val && val.entries ? val.entries : [];
128
+ }
129
+ }
130
+ else {
131
+ const errMsg = res.reason instanceof Error ? res.reason.message : String(res.reason);
132
+ if (key === 'screenshot')
133
+ out.screenshot_error = errMsg;
134
+ if (key === 'currentScreen')
135
+ out.activity_error = errMsg;
136
+ if (key === 'fingerprint') {
137
+ out.fingerprint = null;
138
+ out.fingerprint_error = errMsg;
139
+ }
140
+ if (key === 'uiTree') {
141
+ out.ui_tree = null;
142
+ out.ui_tree_error = errMsg;
143
+ }
144
+ if (key === 'readLogStream') {
145
+ out._streamEntries = [];
146
+ out.logs_error = errMsg;
147
+ }
148
+ }
149
+ }
150
+ // Logs: prefer stream entries, fallback to snapshot logs when empty
151
+ if (includeLogs) {
152
+ try {
153
+ let entries = Array.isArray(out._streamEntries) ? out._streamEntries : [];
154
+ if (!entries || entries.length === 0) {
155
+ const gl = await ToolsObserve.getLogsHandler({ platform, appId, deviceId, lines: logLines });
156
+ const raw = (gl && gl.logs) ? gl.logs : [];
157
+ entries = raw.slice(-Math.max(0, logLines)).map(line => {
158
+ const level = /\b(FATAL EXCEPTION|ERROR| E )\b/i.test(line) ? 'ERROR' : /\b(WARN| W )\b/i.test(line) ? 'WARN' : 'INFO';
159
+ return { timestamp: null, level, message: line };
160
+ });
161
+ }
162
+ else {
163
+ entries = entries.map(ent => {
164
+ const msg = (ent && (ent.message || ent.msg)) ? (ent.message || ent.msg) : (typeof ent === 'string' ? ent : JSON.stringify(ent));
165
+ const levelRaw = (ent && (ent.level || ent.levelName || ent._level)) ? (ent.level || ent.levelName || ent._level) : '';
166
+ const level = (levelRaw && String(levelRaw)).toString().toUpperCase() || (/\bERROR\b/i.test(msg) ? 'ERROR' : /\bWARN\b/i.test(msg) ? 'WARN' : 'INFO');
167
+ let tsNum = null;
168
+ const maybeIso = ent && (ent._iso || ent.timestamp);
169
+ if (maybeIso && typeof maybeIso === 'string') {
170
+ const d = new Date(maybeIso);
171
+ if (!isNaN(d.getTime()))
172
+ tsNum = d.getTime();
173
+ }
174
+ return { timestamp: tsNum, level, message: msg };
175
+ });
176
+ }
177
+ out.logs = entries;
178
+ }
179
+ catch (e) {
180
+ out.logs = [];
181
+ out.logs_error = e instanceof Error ? e.message : String(e);
182
+ }
183
+ }
184
+ // Clean up internal temporary field
185
+ delete out._streamEntries;
186
+ return out;
187
+ }
85
188
  }
package/dist/server.js CHANGED
@@ -195,6 +195,22 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
195
195
  required: ["platform"]
196
196
  }
197
197
  },
198
+ {
199
+ name: "capture_debug_snapshot",
200
+ description: "Capture a complete debug snapshot (screenshot, ui tree, activity, fingerprint, logs). Returns structured JSON.",
201
+ inputSchema: {
202
+ type: "object",
203
+ properties: {
204
+ reason: { type: "string", description: "Optional reason for snapshot" },
205
+ includeLogs: { type: "boolean", description: "Whether to include logs", default: true },
206
+ logLines: { type: "number", description: "Maximum number of log lines to include", default: 200 },
207
+ platform: { type: "string", enum: ["android", "ios"], description: "Optional platform override" },
208
+ appId: { type: "string", description: "Optional appId to scope logs (package/bundle id)" },
209
+ deviceId: { type: "string", description: "Optional device serial/udid" },
210
+ sessionId: { type: "string", description: "Optional log stream session id to prefer" }
211
+ }
212
+ }
213
+ },
198
214
  {
199
215
  name: "start_log_stream",
200
216
  description: "Start streaming logs for a target application on Android or iOS. For Android this uses adb logcat --pid=<pid>; for iOS it streams `xcrun simctl spawn <device> log stream` with a predicate.",
@@ -316,6 +332,21 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
316
332
  required: ["platform", "text"]
317
333
  }
318
334
  },
335
+ {
336
+ name: "find_element",
337
+ description: "Find a UI element by semantic query (text, content-desc, resource-id, class). Returns best match.",
338
+ inputSchema: {
339
+ type: "object",
340
+ properties: {
341
+ query: { type: "string", description: "Search query (text or label)" },
342
+ exact: { type: "boolean", description: "Require exact match (true/false)", default: false },
343
+ timeoutMs: { type: "number", description: "Timeout in ms to keep searching", default: 3000 },
344
+ platform: { type: "string", enum: ["android", "ios"], description: "Optional platform override" },
345
+ deviceId: { type: "string", description: "Optional device serial/udid" }
346
+ },
347
+ required: ["query"]
348
+ }
349
+ },
319
350
  {
320
351
  name: "tap",
321
352
  description: "Simulate a finger tap on the device screen at specific coordinates.",
@@ -556,6 +587,11 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
556
587
  ]
557
588
  };
558
589
  }
590
+ if (name === "capture_debug_snapshot") {
591
+ const { reason, includeLogs, logLines, platform, appId, deviceId, sessionId } = args;
592
+ const res = await ToolsObserve.captureDebugSnapshotHandler({ reason, includeLogs, logLines, platform, appId, deviceId, sessionId });
593
+ return wrapResponse(res);
594
+ }
559
595
  if (name === "get_ui_tree") {
560
596
  const { platform, deviceId } = args;
561
597
  const res = await ToolsObserve.getUITreeHandler({ platform, deviceId });
@@ -581,6 +617,11 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
581
617
  const res = await ToolsInteract.waitForElementHandler({ platform, text, timeout, deviceId });
582
618
  return wrapResponse(res);
583
619
  }
620
+ if (name === "find_element") {
621
+ const { query, exact = false, timeoutMs = 3000, platform, deviceId } = (args || {});
622
+ const res = await ToolsInteract.findElementHandler({ query, exact, timeoutMs, platform, deviceId });
623
+ return wrapResponse(res);
624
+ }
584
625
  if (name === "tap") {
585
626
  const { platform, x, y, deviceId } = (args || {});
586
627
  const res = await ToolsInteract.tapHandler({ platform, x, y, deviceId });
package/docs/CHANGELOG.md CHANGED
@@ -2,6 +2,13 @@
2
2
 
3
3
  All notable changes to the **Mobile Debug MCP** project will be documented in this file.
4
4
 
5
+ ## [0.18.0]
6
+ - Added `find_element` interact tool: semantic UI element search with actionable tap coordinates and lightweight telemetry. The tool searches the UI tree for the best match by text, content description, resource-id, and class, scores candidates (exact, partial, resource-id), and returns the most relevant visible element. When a matching node is non-interactable (e.g., Compose Text child), the tool locates a clickable ancestor (parent or containing element) and returns actionable tapCoordinates (x,y). The handler also returns a `confidence` value and `telemetry` metadata (matchedIndex, matchedInteractable) to aid agent decision-making and logging. Implemented as `ToolsInteract.findElementHandler` and covered by unit tests.
7
+
8
+
9
+ ## [0.17.0]
10
+ - Added `capture_debug_snapshot` observe tool: captures a full debugging snapshot including screenshot (base64), UI tree, current activity (Android), screen fingerprint, and recent logs (prefers active log stream, falls back to snapshot logs). Returns a single structured JSON object and includes per-part error fields for partial failures. Implemented as `ToolsObserve.captureDebugSnapshotHandler` and registered in the server.
11
+
5
12
  ## [0.16.0]
6
13
  - Added `wait_for_screen_change` interact tool: polls the platform-specific `get_screen_fingerprint` until it differs from a provided `previousFingerprint`, with configurable `timeoutMs` and `pollIntervalMs` and an optional stability confirmation poll to avoid reacting to transient UI flickers. Implemented at the interact layer and delegates fingerprinting to the observe implementations (Android/iOS).
7
14
  - Added unit tests covering immediate change, transient null fingerprints, stability confirmation and timeout behavior: `test/interact/unit/wait_for_screen_change.test.ts`.
@@ -101,3 +101,53 @@ Notes:
101
101
  - Default `timeoutMs` is 5000ms and default `pollIntervalMs` is 300ms; callers may override these.
102
102
  - Implemented as an interact-level tool and delegates platform-specific fingerprint calculation to the observe layer (`get_screen_fingerprint`).
103
103
 
104
+ ---
105
+
106
+ ## find_element
107
+
108
+ Purpose:
109
+
110
+ Locate a UI element on the current screen using semantic matching and return an actionable element descriptor (including tap coordinates) and confidence telemetry.
111
+
112
+ Input:
113
+
114
+ ```json
115
+ { "query": "string", "exact": false, "timeoutMs": 3000, "platform": "android|ios", "deviceId": "optional device id" }
116
+ ```
117
+
118
+ Behaviour:
119
+
120
+ - Fetches the current UI tree (get_ui_tree) and scores visible elements using: text, content description, resource-id, and class name.
121
+ - Normalises strings (lowercase, trimmed). If exact=true require exact match; otherwise allow partial matches (contains) and resource-id/class matches.
122
+ - Considers element bounds and visibility; scores non-interactable children as matches and attempts to resolve a clickable ancestor (parent index or containing clickable element) to produce an actionable element.
123
+ - Retries until timeoutMs; stops early for high-confidence matches.
124
+ - Does not block on long operations and returns partial results where appropriate.
125
+
126
+ Output:
127
+
128
+ ```json
129
+ {
130
+ "found": true,
131
+ "element": {
132
+ "text": "Login",
133
+ "resourceId": "com.example:id/login",
134
+ "contentDesc": null,
135
+ "class": "android.widget.Button",
136
+ "bounds": { "left":0, "top":0, "right":100, "bottom":50 },
137
+ "clickable": true,
138
+ "enabled": true,
139
+ "tapCoordinates": { "x":50, "y":25 },
140
+ "telemetry": { "matchedIndex": 3, "matchedInteractable": true }
141
+ },
142
+ "score": 1.0,
143
+ "confidence": 1.0
144
+ }
145
+ ```
146
+
147
+ Notes:
148
+
149
+ - `tapCoordinates` are the recommended center point to use for `tap` calls.
150
+ - `confidence` mirrors the internal scoring (0..1) and is suitable for telemetry or logging to decide whether to proceed with an automated action.
151
+ - The tool favours actionable (clickable/focusable) targets; when a matching node is not directly actionable, it finds the smallest containing clickable ancestor.
152
+ - Unit tests for edge cases (parent-clickable child-text, resource-id matches, fuzzy matching) are under `test/observe/unit/find_element.test.ts`.
153
+
@@ -76,6 +76,50 @@ Response:
76
76
 
77
77
  ---
78
78
 
79
+ ## capture_debug_snapshot
80
+ Capture a complete debug snapshot of the app state for diagnostics and post-mortem analysis.
81
+
82
+ Input:
83
+
84
+ ```json
85
+ {
86
+ "reason": "optional string describing why snapshot is taken",
87
+ "includeLogs": true,
88
+ "logLines": 200,
89
+ "platform": "android | ios",
90
+ "appId": "optional package/bundle id to scope logs",
91
+ "deviceId": "optional device serial/udid",
92
+ "sessionId": "optional log stream session id to prefer"
93
+ }
94
+ ```
95
+
96
+ Behavior:
97
+ - Captures screenshot (base64), current activity (Android), screen fingerprint, full UI tree, and recent logs.
98
+ - Prefers active log stream entries (read_log_stream) and falls back to get_logs when no active stream is available.
99
+ - Returns partial data when components fail and includes per-part error fields (e.g. `screenshot_error`, `ui_tree_error`).
100
+ - Caps logs to `logLines` entries and prefers recent entries.
101
+ - Fast by default: does not wait for new logs and avoids long blocking operations.
102
+
103
+ Response (example):
104
+
105
+ ```json
106
+ {
107
+ "timestamp": 1710000000,
108
+ "reason": "Crash after tapping checkout",
109
+ "activity": "CheckoutActivity",
110
+ "fingerprint": "abc123",
111
+ "screenshot": "<base64 PNG string>",
112
+ "ui_tree": { ... },
113
+ "logs": [ { "timestamp": 1710000000, "level": "ERROR", "message": "NullPointerException at CheckoutViewModel" } ]
114
+ }
115
+ ```
116
+
117
+ Notes:
118
+ - Useful immediately after detecting crashes or unexpected UI behaviour.
119
+ - Do not expect perfect data during a crash; tool is designed to return best-effort context and include errors for failed parts.
120
+
121
+ ---
122
+
79
123
  ## get_screen_fingerprint
80
124
  Generate a stable fingerprint representing the visible screen. Useful for detecting navigation changes, preventing loops, and synchronisation.
81
125
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mobile-debug-mcp",
3
- "version": "0.16.0",
3
+ "version": "0.18.0",
4
4
  "description": "MCP server for mobile app debugging (Android + iOS), with focus on security and reliability",
5
5
  "type": "module",
6
6
  "bin": {
@@ -5,6 +5,30 @@ export { AndroidInteract, iOSInteract };
5
5
  import { resolveTargetDevice } from '../utils/resolve-device.js'
6
6
  import { ToolsObserve } from '../observe/index.js'
7
7
 
8
+ interface ScreenFingerprintResponse { fingerprint: string | null }
9
+
10
+ interface UiElement {
11
+ text?: string | null
12
+ label?: string | null
13
+ value?: string | null
14
+ contentDescription?: string | null
15
+ contentDesc?: string | null
16
+ accessibilityLabel?: string | null
17
+ resourceId?: string | null
18
+ resourceID?: string | null
19
+ id?: string | null
20
+ type?: string | null
21
+ class?: string | null
22
+ bounds?: number[] | null
23
+ clickable?: boolean
24
+ enabled?: boolean
25
+ focusable?: boolean
26
+ visible?: boolean
27
+ parentId?: number | string | null
28
+ _index?: number
29
+ _interactable?: boolean
30
+ }
31
+
8
32
  export class ToolsInteract {
9
33
 
10
34
  private static async getInteractionService(platform?: 'android' | 'ios', deviceId?: string) {
@@ -45,14 +69,168 @@ export class ToolsInteract {
45
69
  return await interact.scrollToElement(selector, direction, maxScrolls, scrollAmount, resolved.id)
46
70
  }
47
71
 
72
+ static async findElementHandler({ query, exact = false, timeoutMs = 3000, platform, deviceId }: { query: string, exact?: boolean, timeoutMs?: number, platform?: 'android' | 'ios', deviceId?: string }) {
73
+ // Try to use observe layer to fetch the current UI tree and perform a fast semantic search
74
+ const start = Date.now()
75
+ const deadline = start + timeoutMs
76
+ const normalize = (s: any) => (s === null || s === undefined) ? '' : String(s).toLowerCase().trim()
77
+
78
+ const q = normalize(query)
79
+ if (!q) return { found: false, error: 'Empty query' }
80
+
81
+ let best: UiElement | null = null
82
+ let bestScore = 0
83
+
84
+ const scoreElement = (el: UiElement | null) => {
85
+ if (!el || !el.visible) return 0
86
+ const bounds = el.bounds || [0,0,0,0]
87
+ if (!Array.isArray(bounds) || bounds.length < 4) return 0
88
+ const [l,t,r,b] = bounds
89
+ if (r <= l || b <= t) return 0
90
+ // Do not early-return on non-interactable elements — score them so we can locate their clickable ancestor later
91
+ const interactable = !!(el.clickable || el.enabled || el.focusable)
92
+
93
+ const text = normalize(el.text ?? el.label ?? el.value ?? '')
94
+ const content = normalize(el.contentDescription ?? el.contentDesc ?? el.accessibilityLabel ?? '')
95
+ const resourceId = normalize(el.resourceId ?? el.resourceID ?? el.id ?? '')
96
+ const className = normalize(el.type ?? el.class ?? '')
97
+
98
+ let score = 0
99
+ if (exact) {
100
+ if (text && text === q) score = 1.0
101
+ else if (content && content === q) score = 0.95
102
+ } else {
103
+ if (text && text === q) score = 1.0
104
+ else if (content && content === q) score = 0.95
105
+ else if (text && text.includes(q)) score = 0.6
106
+ else if (content && content.includes(q)) score = 0.55
107
+ else if (resourceId && resourceId.includes(q)) score = 0.7
108
+ else if (className && className.includes(q)) score = 0.3
109
+ }
110
+ if (score > 0 && interactable) score += 0.05
111
+ return score
112
+ }
113
+
114
+ while (Date.now() <= deadline) {
115
+ try {
116
+ const tree = await ToolsObserve.getUITreeHandler({ platform, deviceId })
117
+ if (tree && Array.isArray((tree as any).elements)) {
118
+ const elements = ((tree as any).elements as UiElement[])
119
+ for (let i = 0; i < elements.length; i++) {
120
+ const el = elements[i]
121
+ try {
122
+ const s = scoreElement(el)
123
+ const interactable = !!(el.clickable || el.enabled || (el as any).focusable)
124
+ if (s > bestScore) {
125
+ bestScore = s
126
+ best = el as UiElement
127
+ if (best) { best._index = i; best._interactable = interactable }
128
+ }
129
+ if (bestScore >= 0.95) break
130
+ } catch (e) { console.error('Error scoring element:', e) }
131
+ }
132
+ if (bestScore >= 0.95) break
133
+ }
134
+ } catch (e) { console.error('Error fetching UI tree:', e) }
135
+ if (Date.now() > deadline) break
136
+ await new Promise(r => setTimeout(r, 100))
137
+ }
138
+
139
+ if (!best) return { found: false, error: 'Element not found' }
140
+
141
+ // If the best match is not interactable, try to resolve an actionable ancestor.
142
+ try {
143
+ const tree = await ToolsObserve.getUITreeHandler({ platform, deviceId }) as any
144
+ const elements = (tree && Array.isArray(tree.elements)) ? (tree.elements as UiElement[]) : []
145
+ let chosen = best as any
146
+ const childBounds = Array.isArray(chosen?.bounds) ? chosen.bounds : null
147
+
148
+ // Strategy 1: if parentId references an index, climb that chain
149
+ let resolvedAncestor: any = null
150
+ if (childBounds && (chosen.parentId !== undefined && chosen.parentId !== null)) {
151
+ let cur = chosen
152
+ let safety = 0
153
+ while (cur && safety < 20 && !(cur.clickable || cur.focusable) && (cur.parentId !== undefined && cur.parentId !== null)) {
154
+ let pid = cur.parentId
155
+ let idx: number | null = null
156
+ if (typeof pid === 'number') idx = pid
157
+ else if (typeof pid === 'string' && /^\d+$/.test(pid)) idx = Number(pid)
158
+ // If parentId is not an index, try to find by matching resourceId or id field
159
+ if (idx !== null && elements[idx]) {
160
+ cur = elements[idx]
161
+ if (cur && (cur.clickable || cur.enabled || cur.focusable)) { resolvedAncestor = cur; break }
162
+ } else if (typeof pid === 'string') {
163
+ // fallback: search elements for matching resourceId or id
164
+ const found = elements.find((el: UiElement)=> (el.resourceId === pid || el.id === pid))
165
+ if (found) {
166
+ cur = found
167
+ if (cur && (cur.clickable || cur.enabled || cur.focusable)) { resolvedAncestor = cur; break }
168
+ // otherwise continue climbing if this found element has its own parentId
169
+ } else {
170
+ break
171
+ }
172
+ } else {
173
+ break
174
+ }
175
+ safety++
176
+ }
177
+ }
178
+
179
+ // Strategy 2: fallback - find a clickable element whose bounds fully contain the child's bounds
180
+ if (!resolvedAncestor && childBounds) {
181
+ const [cl,ct,cr,cb] = childBounds
182
+ // find candidates that are clickable and contain the child bounds
183
+ const candidates = elements.filter((el: UiElement)=> el && (el.clickable || el.focusable) && Array.isArray(el.bounds) && el.bounds!.length>=4).map((el: UiElement)=>({el, bounds: el.bounds! as number[]}))
184
+ let bestCandidate: any = null
185
+ let bestCandidateArea = Infinity
186
+ for (const c of candidates) {
187
+ const [pl,pt,pr,pb] = c.bounds
188
+ if (pl <= cl && pt <= ct && pr >= cr && pb >= cb) {
189
+ const area = (pr-pl) * (pb-pt)
190
+ if (area < bestCandidateArea) { bestCandidateArea = area; bestCandidate = c.el }
191
+ }
192
+ }
193
+ if (bestCandidate) resolvedAncestor = bestCandidate
194
+ }
195
+
196
+ if (resolvedAncestor) {
197
+ best = resolvedAncestor
198
+ // small score bump to reflect actionability
199
+ bestScore = Math.min(1, bestScore + 0.02)
200
+ }
201
+ } catch (e) { console.error('Error resolving ancestor:', e) }
202
+
203
+ if (!best) return { found: false, error: 'Element not found' }
204
+
205
+ const boundsObj = Array.isArray(best.bounds) ? { left: best.bounds[0], top: best.bounds[1], right: best.bounds[2], bottom: best.bounds[3] } : null
206
+ const tapCoordinates = boundsObj ? { x: Math.floor((boundsObj.left + boundsObj.right) / 2), y: Math.floor((boundsObj.top + boundsObj.bottom) / 2) } : null
207
+
208
+ const outEl = {
209
+ text: best.text ?? null,
210
+ resourceId: best.resourceId ?? null,
211
+ contentDesc: best.contentDescription ?? best.contentDesc ?? null,
212
+ class: best.type ?? best.class ?? null,
213
+ bounds: boundsObj,
214
+ clickable: !!best.clickable,
215
+ enabled: !!best.enabled,
216
+ tapCoordinates,
217
+ telemetry: {
218
+ matchedIndex: best?._index ?? null,
219
+ matchedInteractable: !!best?._interactable
220
+ }
221
+ }
222
+ const scoreVal = Math.min(1, Number(bestScore.toFixed(3)))
223
+ return { found: true, element: outEl, score: scoreVal, confidence: scoreVal }
224
+ }
225
+
48
226
  static async waitForScreenChangeHandler({ platform, previousFingerprint, timeoutMs = 5000, pollIntervalMs = 300, deviceId }: { platform?: 'android' | 'ios', previousFingerprint: string, timeoutMs?: number, pollIntervalMs?: number, deviceId?: string }) {
49
227
  const start = Date.now()
50
228
  let lastFingerprint: string | null = null
51
229
 
52
230
  while (Date.now() - start < timeoutMs) {
53
231
  try {
54
- const res = await ToolsObserve.getScreenFingerprintHandler({ platform, deviceId })
55
- const fp = (res as any)?.fingerprint ?? null
232
+ const res = await ToolsObserve.getScreenFingerprintHandler({ platform, deviceId }) as ScreenFingerprintResponse | null
233
+ const fp = res?.fingerprint ?? null
56
234
  if (fp === null || fp === undefined) {
57
235
  lastFingerprint = null
58
236
  await new Promise(resolve => setTimeout(resolve, pollIntervalMs))
@@ -64,22 +242,17 @@ export class ToolsInteract {
64
242
  if (fp !== previousFingerprint) {
65
243
  // Stability confirmation
66
244
  await new Promise(resolve => setTimeout(resolve, pollIntervalMs))
67
- try {
68
- const confirmRes = await ToolsObserve.getScreenFingerprintHandler({ platform, deviceId })
69
- const confirmFp = (confirmRes as any)?.fingerprint ?? null
245
+ try {
246
+ const confirmRes = await ToolsObserve.getScreenFingerprintHandler({ platform, deviceId }) as ScreenFingerprintResponse | null
247
+ const confirmFp = confirmRes?.fingerprint ?? null
70
248
  if (confirmFp === fp) {
71
249
  return { success: true, newFingerprint: fp, elapsedMs: Date.now() - start }
72
250
  }
73
251
  lastFingerprint = confirmFp
74
252
  continue
75
- } catch {
76
- // ignore and continue polling
77
- continue
78
- }
253
+ } catch (e) { console.error('Error confirming fingerprint:', e); continue }
79
254
  }
80
- } catch {
81
- // ignore transient errors
82
- }
255
+ } catch (e) { console.error('Error getting screen fingerprint:', e) }
83
256
 
84
257
  await new Promise(resolve => setTimeout(resolve, pollIntervalMs))
85
258
  }
@@ -89,4 +89,92 @@ export class ToolsObserve {
89
89
  // Both observes implement getScreenFingerprint
90
90
  return await (observe as any).getScreenFingerprint(resolved.id)
91
91
  }
92
+
93
+ static async captureDebugSnapshotHandler({ reason, includeLogs = true, logLines = 200, platform, appId, deviceId, sessionId }: { reason?: string; includeLogs?: boolean; logLines?: number; platform?: 'android' | 'ios'; appId?: string; deviceId?: string; sessionId?: string } = {}) {
94
+ const timestamp = Date.now()
95
+ const out: any = { timestamp, reason: reason || '', activity: null, fingerprint: null, screenshot: null, ui_tree: null, logs: [] }
96
+
97
+ // Parallel fetches for performance: screenshot, current screen, fingerprint, ui tree, and log stream/get logs
98
+ const sid = sessionId || 'default'
99
+ const tasks = {
100
+ screenshot: ToolsObserve.captureScreenshotHandler({ platform, deviceId }),
101
+ currentScreen: (!platform || platform === 'android') ? ToolsObserve.getCurrentScreenHandler({ deviceId }) : Promise.resolve(null),
102
+ fingerprint: ToolsObserve.getScreenFingerprintHandler({ platform, deviceId }),
103
+ uiTree: ToolsObserve.getUITreeHandler({ platform, deviceId }),
104
+ readLogStream: includeLogs ? ToolsObserve.readLogStreamHandler({ platform, sessionId: sid, limit: logLines }) : Promise.resolve({ entries: [] }),
105
+ }
106
+
107
+ const results = await Promise.allSettled(Object.values(tasks))
108
+ const keys = Object.keys(tasks)
109
+
110
+ // Map results back to keys
111
+ for (let i = 0; i < results.length; i++) {
112
+ const key = keys[i]
113
+ const res = results[i] as PromiseSettledResult<any>
114
+ if (res.status === 'fulfilled') {
115
+ const val = res.value
116
+ if (key === 'screenshot') {
117
+ out.screenshot = val && val.screenshot ? val.screenshot : null
118
+ } else if (key === 'currentScreen') {
119
+ out.activity = val && ((val.activity || val.shortActivity)) ? (val.activity || val.shortActivity) : out.activity || ''
120
+ } else if (key === 'fingerprint') {
121
+ if (val && val.fingerprint) out.fingerprint = val.fingerprint
122
+ if (val && val.activity) out.activity = out.activity || val.activity
123
+ if (val && val.error) out.fingerprint_error = val.error
124
+ } else if (key === 'uiTree') {
125
+ out.ui_tree = val
126
+ if (val && val.error) out.ui_tree_error = val.error
127
+ } else if (key === 'readLogStream') {
128
+ // handle below after evaluating fallback
129
+ // temporarily attach to out._streamEntries
130
+ out._streamEntries = val && val.entries ? val.entries : []
131
+ }
132
+ } else {
133
+ const errMsg = res.reason instanceof Error ? res.reason.message : String(res.reason)
134
+ if (key === 'screenshot') out.screenshot_error = errMsg
135
+ if (key === 'currentScreen') out.activity_error = errMsg
136
+ if (key === 'fingerprint') { out.fingerprint = null; out.fingerprint_error = errMsg }
137
+ if (key === 'uiTree') { out.ui_tree = null; out.ui_tree_error = errMsg }
138
+ if (key === 'readLogStream') { out._streamEntries = [] ; out.logs_error = errMsg }
139
+ }
140
+ }
141
+
142
+ // Logs: prefer stream entries, fallback to snapshot logs when empty
143
+ if (includeLogs) {
144
+ try {
145
+ let entries: any[] = Array.isArray(out._streamEntries) ? out._streamEntries : []
146
+ if (!entries || entries.length === 0) {
147
+ const gl = await ToolsObserve.getLogsHandler({ platform, appId, deviceId, lines: logLines })
148
+ const raw: string[] = (gl && (gl as any).logs) ? (gl as any).logs : []
149
+ entries = raw.slice(-Math.max(0, logLines)).map(line => {
150
+ const level = /\b(FATAL EXCEPTION|ERROR| E )\b/i.test(line) ? 'ERROR' : /\b(WARN| W )\b/i.test(line) ? 'WARN' : 'INFO'
151
+ return { timestamp: null, level, message: line }
152
+ })
153
+ } else {
154
+ entries = entries.map(ent => {
155
+ const msg = (ent && (ent.message || ent.msg)) ? (ent.message || ent.msg) : (typeof ent === 'string' ? ent : JSON.stringify(ent))
156
+ const levelRaw = (ent && (ent.level || ent.levelName || ent._level)) ? (ent.level || ent.levelName || ent._level) : ''
157
+ const level = (levelRaw && String(levelRaw)).toString().toUpperCase() || (/\bERROR\b/i.test(msg) ? 'ERROR' : /\bWARN\b/i.test(msg) ? 'WARN' : 'INFO')
158
+ let tsNum: number | null = null
159
+ const maybeIso = ent && ((ent._iso || ent.timestamp) as any)
160
+ if (maybeIso && typeof maybeIso === 'string') {
161
+ const d = new Date(maybeIso)
162
+ if (!isNaN(d.getTime())) tsNum = d.getTime()
163
+ }
164
+ return { timestamp: tsNum, level, message: msg }
165
+ })
166
+ }
167
+
168
+ out.logs = entries
169
+ } catch (e) {
170
+ out.logs = []
171
+ out.logs_error = e instanceof Error ? e.message : String(e)
172
+ }
173
+ }
174
+
175
+ // Clean up internal temporary field
176
+ delete out._streamEntries
177
+
178
+ return out
179
+ }
92
180
  }
package/src/server.ts CHANGED
@@ -215,6 +215,23 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
215
215
  required: ["platform"]
216
216
  }
217
217
  },
218
+ {
219
+ name: "capture_debug_snapshot",
220
+ description: "Capture a complete debug snapshot (screenshot, ui tree, activity, fingerprint, logs). Returns structured JSON."
221
+ ,
222
+ inputSchema: {
223
+ type: "object",
224
+ properties: {
225
+ reason: { type: "string", description: "Optional reason for snapshot" },
226
+ includeLogs: { type: "boolean", description: "Whether to include logs", default: true },
227
+ logLines: { type: "number", description: "Maximum number of log lines to include", default: 200 },
228
+ platform: { type: "string", enum: ["android","ios"], description: "Optional platform override" },
229
+ appId: { type: "string", description: "Optional appId to scope logs (package/bundle id)" },
230
+ deviceId: { type: "string", description: "Optional device serial/udid" },
231
+ sessionId: { type: "string", description: "Optional log stream session id to prefer" }
232
+ }
233
+ }
234
+ },
218
235
  {
219
236
  name: "start_log_stream",
220
237
  description: "Start streaming logs for a target application on Android or iOS. For Android this uses adb logcat --pid=<pid>; for iOS it streams `xcrun simctl spawn <device> log stream` with a predicate.",
@@ -337,6 +354,21 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
337
354
  required: ["platform", "text"]
338
355
  }
339
356
  },
357
+ {
358
+ name: "find_element",
359
+ description: "Find a UI element by semantic query (text, content-desc, resource-id, class). Returns best match.",
360
+ inputSchema: {
361
+ type: "object",
362
+ properties: {
363
+ query: { type: "string", description: "Search query (text or label)" },
364
+ exact: { type: "boolean", description: "Require exact match (true/false)", default: false },
365
+ timeoutMs: { type: "number", description: "Timeout in ms to keep searching", default: 3000 },
366
+ platform: { type: "string", enum: ["android","ios"], description: "Optional platform override" },
367
+ deviceId: { type: "string", description: "Optional device serial/udid" }
368
+ },
369
+ required: ["query"]
370
+ }
371
+ },
340
372
 
341
373
  {
342
374
  name: "tap",
@@ -594,6 +626,12 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
594
626
  }
595
627
  }
596
628
 
629
+ if (name === "capture_debug_snapshot") {
630
+ const { reason, includeLogs, logLines, platform, appId, deviceId, sessionId } = args as any
631
+ const res = await ToolsObserve.captureDebugSnapshotHandler({ reason, includeLogs, logLines, platform, appId, deviceId, sessionId })
632
+ return wrapResponse(res)
633
+ }
634
+
597
635
  if (name === "get_ui_tree") {
598
636
  const { platform, deviceId } = args as any
599
637
  const res = await ToolsObserve.getUITreeHandler({ platform, deviceId })
@@ -624,6 +662,12 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
624
662
  return wrapResponse(res)
625
663
  }
626
664
 
665
+ if (name === "find_element") {
666
+ const { query, exact = false, timeoutMs = 3000, platform, deviceId } = (args || {}) as any
667
+ const res = await ToolsInteract.findElementHandler({ query, exact, timeoutMs, platform, deviceId })
668
+ return wrapResponse(res)
669
+ }
670
+
627
671
  if (name === "tap") {
628
672
  const { platform, x, y, deviceId } = (args || {}) as any
629
673
  const res = await ToolsInteract.tapHandler({ platform, x, y, deviceId })
@@ -0,0 +1,89 @@
1
+ import { ToolsObserve } from '../../../src/observe/index.js'
2
+
3
+ async function run() {
4
+ console.log('Starting capture_debug_snapshot unit tests...')
5
+
6
+ // Save original ToolsObserve handlers
7
+ const origCaptureHandler = (ToolsObserve as any).captureScreenshotHandler
8
+ const origGetCurrentHandler = (ToolsObserve as any).getCurrentScreenHandler
9
+ const origGetFpHandler = (ToolsObserve as any).getScreenFingerprintHandler
10
+ const origGetTreeHandler = (ToolsObserve as any).getUITreeHandler
11
+ const origReadLogStreamHandler = (ToolsObserve as any).readLogStreamHandler
12
+ const origGetLogsHandler = (ToolsObserve as any).getLogsHandler
13
+
14
+ try {
15
+ // --- Test 1: all components succeed and logs come from stream ---
16
+ ;(ToolsObserve as any).captureScreenshotHandler = async function() {
17
+ return { device: { platform: 'android', id: 'mock', osVersion: '12', model: 'Pixel', simulator: true }, screenshot: 'BASE64PNG', resolution: { width: 1080, height: 1920 } }
18
+ }
19
+ ;(ToolsObserve as any).getCurrentScreenHandler = async function() {
20
+ return { device: { platform: 'android', id: 'mock' }, package: 'com.example', activity: 'com.example.Main', shortActivity: 'Main' }
21
+ }
22
+ ;(ToolsObserve as any).getScreenFingerprintHandler = async function() {
23
+ return { fingerprint: 'abc123', activity: 'Main' }
24
+ }
25
+ ;(ToolsObserve as any).getUITreeHandler = async function() {
26
+ return { device: { platform: 'android', id: 'mock', osVersion: '12', model: 'Pixel', simulator: true }, screen: '', resolution: { width: 1080, height: 1920 }, elements: [] }
27
+ }
28
+ ;(ToolsObserve as any).readLogStreamHandler = async function() {
29
+ return { entries: [ { timestamp: '2026-03-23T20:00:00.000Z', level: 'ERROR', message: 'Boom' } ], crash_summary: { crash_detected: true } }
30
+ }
31
+ ;(ToolsObserve as any).getLogsHandler = async function() {
32
+ return { device: { platform: 'android', id: 'mock' }, logs: [], logCount: 0 }
33
+ }
34
+
35
+ const res1: any = await ToolsObserve.captureDebugSnapshotHandler({ platform: 'android', includeLogs: true, logLines: 50, sessionId: 's1' })
36
+ console.log('res1:', JSON.stringify(res1, null, 2))
37
+ const pass1 = res1 && res1.screenshot === 'BASE64PNG' && res1.activity && res1.fingerprint === 'abc123' && Array.isArray(res1.logs) && res1.logs.length === 1
38
+ console.log('Test 1:', pass1 ? 'PASS' : 'FAIL')
39
+
40
+ // Restore handlers before next test
41
+ ;(ToolsObserve as any).captureScreenshotHandler = origCaptureHandler
42
+ ;(ToolsObserve as any).getCurrentScreenHandler = origGetCurrentHandler
43
+ ;(ToolsObserve as any).getScreenFingerprintHandler = origGetFpHandler
44
+ ;(ToolsObserve as any).getUITreeHandler = origGetTreeHandler
45
+ ;(ToolsObserve as any).readLogStreamHandler = origReadLogStreamHandler
46
+ ;(ToolsObserve as any).getLogsHandler = origGetLogsHandler
47
+
48
+ // --- Test 2: screenshot and ui tree fail; logs fallback to getLogs ---
49
+ ;(ToolsObserve as any).captureScreenshotHandler = async function() { throw new Error('screencap failed') }
50
+ ;(ToolsObserve as any).getUITreeHandler = async function() { throw new Error('uie_error') }
51
+ ;(ToolsObserve as any).readLogStreamHandler = async function() { return { entries: [] } }
52
+ ;(ToolsObserve as any).getLogsHandler = async function() { return { device: { platform: 'android', id: 'mock' }, logs: ['INFO starting','ERROR crash here'], logCount: 2 } }
53
+
54
+ const res2: any = await ToolsObserve.captureDebugSnapshotHandler({ platform: 'android', includeLogs: true, logLines: 10, appId: 'com.example' })
55
+ console.log('res2:', JSON.stringify(res2, null, 2))
56
+ const pass2 = res2 && res2.screenshot_error && res2.ui_tree_error && Array.isArray(res2.logs) && res2.logs.length === 2
57
+ console.log('Test 2:', pass2 ? 'PASS' : 'FAIL')
58
+
59
+ // Restore handlers before next test
60
+ ;(ToolsObserve as any).captureScreenshotHandler = origCaptureHandler
61
+ ;(ToolsObserve as any).getCurrentScreenHandler = origGetCurrentHandler
62
+ ;(ToolsObserve as any).getScreenFingerprintHandler = origGetFpHandler
63
+ ;(ToolsObserve as any).getUITreeHandler = origGetTreeHandler
64
+ ;(ToolsObserve as any).readLogStreamHandler = origReadLogStreamHandler
65
+ ;(ToolsObserve as any).getLogsHandler = origGetLogsHandler
66
+
67
+ // --- Test 3: includeLogs=false should omit logs ---
68
+ ;(ToolsObserve as any).captureScreenshotHandler = async function() { return { device: { platform: 'android', id: 'mock' }, screenshot: null } }
69
+ ;(ToolsObserve as any).getCurrentScreenHandler = async function() { return { device: { platform: 'android', id: 'mock' }, package: '', activity: '', shortActivity: '' } }
70
+ ;(ToolsObserve as any).getScreenFingerprintHandler = async function() { return { fingerprint: null } }
71
+ ;(ToolsObserve as any).getUITreeHandler = async function() { return { device: { platform: 'android', id: 'mock' }, screen: '', resolution: { width: 0, height: 0 }, elements: [] } }
72
+ ;(ToolsObserve as any).readLogStreamHandler = async function() { return { entries: [] } }
73
+
74
+ const res3: any = await ToolsObserve.captureDebugSnapshotHandler({ platform: 'android', includeLogs: false })
75
+ console.log('res3:', JSON.stringify(res3, null, 2))
76
+ const pass3 = res3 && typeof res3.logs !== 'undefined' && res3.logs.length === 0
77
+ console.log('Test 3:', pass3 ? 'PASS' : 'FAIL')
78
+
79
+ } finally {
80
+ ;(ToolsObserve as any).captureScreenshotHandler = origCaptureHandler
81
+ ;(ToolsObserve as any).getCurrentScreenHandler = origGetCurrentHandler
82
+ ;(ToolsObserve as any).getScreenFingerprintHandler = origGetFpHandler
83
+ ;(ToolsObserve as any).getUITreeHandler = origGetTreeHandler
84
+ ;(ToolsObserve as any).readLogStreamHandler = origReadLogStreamHandler
85
+ ;(ToolsObserve as any).getLogsHandler = origGetLogsHandler
86
+ }
87
+ }
88
+
89
+ run().catch(console.error)
@@ -0,0 +1,85 @@
1
+ import { ToolsInteract } from '../../../src/interact/index.js'
2
+ import { ToolsObserve } from '../../../src/observe/index.js'
3
+
4
+ async function run() {
5
+ process.stdout.write('Starting find_element unit tests...\n')
6
+
7
+ const origGetTree = (ToolsObserve as any).getUITreeHandler
8
+
9
+ try {
10
+ // Test 1: exact text match
11
+ (ToolsObserve as any).getUITreeHandler = async () => ({
12
+ device: { platform: 'android', id: 'mock' },
13
+ screen: '',
14
+ resolution: { width: 1080, height: 1920 },
15
+ elements: [
16
+ { text: 'Login', type: 'android.widget.Button', contentDescription: null, clickable: true, enabled: true, visible: true, bounds: [10,10,100,60], resourceId: 'btn_login' },
17
+ { text: 'Cancel', type: 'android.widget.Button', contentDescription: null, clickable: true, enabled: true, visible: true, bounds: [110,10,200,60], resourceId: 'btn_cancel' }
18
+ ]
19
+ })
20
+
21
+ const res1: any = await ToolsInteract.findElementHandler({ query: 'login', exact: true, platform: 'android' })
22
+ process.stdout.write('res1 ' + JSON.stringify(res1, null, 2) + '\n');
23
+ const pass1 = res1.found === true && res1.element && res1.element.resourceId === 'btn_login' && res1.element.tapCoordinates && typeof res1.element.tapCoordinates.x === 'number' && typeof res1.element.tapCoordinates.y === 'number' && typeof res1.confidence === 'number'
24
+ process.stdout.write('Test 1: ' + (pass1 ? 'PASS' : 'FAIL') + '\n');
25
+
26
+ // Test 2: partial match & scoring
27
+ (ToolsObserve as any).getUITreeHandler = async () => ({
28
+ device: { platform: 'android', id: 'mock' },
29
+ screen: '',
30
+ resolution: { width: 1080, height: 1920 },
31
+ elements: [
32
+ { text: 'Sign in', type: 'android.widget.Button', contentDescription: null, clickable: true, enabled: true, visible: true, bounds: [10,10,100,60], resourceId: 'btn_signin' },
33
+ { text: 'Login with Email', type: 'android.widget.Button', contentDescription: null, clickable: true, enabled: true, visible: true, bounds: [110,10,300,60], resourceId: 'btn_login_email' }
34
+ ]
35
+ })
36
+
37
+ const res2: any = await ToolsInteract.findElementHandler({ query: 'login', exact: false, platform: 'android' })
38
+ process.stdout.write('res2 ' + JSON.stringify(res2, null, 2) + '\n');
39
+ const pass2 = res2.found === true && res2.element && res2.element.resourceId === 'btn_login_email' && res2.element.tapCoordinates && typeof res2.element.tapCoordinates.x === 'number' && typeof res2.element.tapCoordinates.y === 'number' && typeof res2.confidence === 'number'
40
+ process.stdout.write('Test 2: ' + (pass2 ? 'PASS' : 'FAIL') + '\n');
41
+
42
+ // Test 3: resourceId match
43
+ (ToolsObserve as any).getUITreeHandler = async () => ({
44
+ device: { platform: 'android', id: 'mock' },
45
+ screen: '',
46
+ resolution: { width: 1080, height: 1920 },
47
+ elements: [
48
+ { text: null, type: 'android.widget.ImageView', contentDescription: null, clickable: false, enabled: true, visible: true, bounds: [0,0,50,50], resourceId: 'icon_login' }
49
+ ]
50
+ })
51
+
52
+ const res3: any = await ToolsInteract.findElementHandler({ query: 'icon_login', exact: false, platform: 'android' })
53
+ process.stdout.write('res3 ' + JSON.stringify(res3, null, 2) + '\n');
54
+ const pass3 = res3.found === true && res3.element && res3.element.resourceId === 'icon_login' && res3.element.tapCoordinates && typeof res3.element.tapCoordinates.x === 'number' && typeof res3.element.tapCoordinates.y === 'number' && typeof res3.confidence === 'number'
55
+ process.stdout.write('Test 3: ' + (pass3 ? 'PASS' : 'FAIL') + '\n');
56
+
57
+ // Test 4: parent-clickable child-text scenario
58
+ (ToolsObserve as any).getUITreeHandler = async () => ({
59
+ device: { platform: 'android', id: 'mock' },
60
+ screen: '',
61
+ resolution: { width: 1080, height: 1920 },
62
+ elements: [
63
+ { text: null, type: 'android.view.View', contentDescription: null, clickable: true, enabled: true, visible: true, bounds: [0,0,400,100], resourceId: 'btn_generate', children: [1] },
64
+ { text: 'Generate Session', type: 'android.widget.TextView', contentDescription: null, clickable: false, enabled: true, visible: true, bounds: [10,10,390,90], resourceId: null, parentId: 0 }
65
+ ]
66
+ })
67
+
68
+ const res4: any = await ToolsInteract.findElementHandler({ query: 'generate', exact: false, platform: 'android', timeoutMs: 300 })
69
+ process.stdout.write('res4 ' + JSON.stringify(res4, null, 2) + '\n');
70
+ const pass4 = res4.found === true && res4.element && res4.element.clickable === true && res4.element.resourceId === 'btn_generate' && res4.element.tapCoordinates && typeof res4.element.tapCoordinates.x === 'number' && typeof res4.element.tapCoordinates.y === 'number' && typeof res4.confidence === 'number'
71
+ process.stdout.write('Test 4: ' + (pass4 ? 'PASS' : 'FAIL') + '\n');
72
+
73
+ // Test 5: not found
74
+ (ToolsObserve as any).getUITreeHandler = async () => ({ device: { platform: 'android', id: 'mock' }, screen: '', resolution: { width: 1080, height: 1920 }, elements: [] })
75
+ const res5: any = await ToolsInteract.findElementHandler({ query: 'nope', exact: false, platform: 'android', timeoutMs: 300 })
76
+ process.stdout.write('res5 ' + JSON.stringify(res5, null, 2) + '\n');
77
+ const pass5 = res5.found === false
78
+ process.stdout.write('Test 5: ' + (pass5 ? 'PASS' : 'FAIL') + '\n');
79
+
80
+ } finally {
81
+ ;(ToolsObserve as any).getUITreeHandler = origGetTree
82
+ }
83
+ }
84
+
85
+ run().catch(console.error)
@@ -11,5 +11,7 @@ import '../manage/unit/diagnostics.test.ts'
11
11
  import '../manage/unit/detection.test.ts'
12
12
  import '../manage/unit/mcp_disable_autodetect.test.ts'
13
13
  import '../interact/unit/wait_for_screen_change.test.ts'
14
+ import '../observe/unit/capture_debug_snapshot.test.ts'
15
+ import '../observe/unit/find_element.test.ts'
14
16
 
15
17
  console.log('Unit tests loaded.')