mobile-debug-mcp 0.17.0 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/dist/interact/index.js +189 -4
- package/dist/server.js +20 -0
- package/docs/CHANGELOG.md +4 -0
- package/docs/tools/interact.md +50 -0
- package/package.json +1 -1
- package/src/interact/index.ts +178 -7
- package/src/server.ts +21 -0
- package/test/observe/unit/find_element.test.ts +85 -0
- package/test/unit/index.ts +1 -0
package/README.md
CHANGED
package/dist/interact/index.js
CHANGED
|
@@ -35,6 +35,191 @@ export class ToolsInteract {
|
|
|
35
35
|
const { interact, resolved } = await ToolsInteract.getInteractionService(platform, deviceId);
|
|
36
36
|
return await interact.scrollToElement(selector, direction, maxScrolls, scrollAmount, resolved.id);
|
|
37
37
|
}
|
|
38
|
+
static async findElementHandler({ query, exact = false, timeoutMs = 3000, platform, deviceId }) {
|
|
39
|
+
// Try to use observe layer to fetch the current UI tree and perform a fast semantic search
|
|
40
|
+
const start = Date.now();
|
|
41
|
+
const deadline = start + timeoutMs;
|
|
42
|
+
const normalize = (s) => (s === null || s === undefined) ? '' : String(s).toLowerCase().trim();
|
|
43
|
+
const q = normalize(query);
|
|
44
|
+
if (!q)
|
|
45
|
+
return { found: false, error: 'Empty query' };
|
|
46
|
+
let best = null;
|
|
47
|
+
let bestScore = 0;
|
|
48
|
+
const scoreElement = (el) => {
|
|
49
|
+
if (!el || !el.visible)
|
|
50
|
+
return 0;
|
|
51
|
+
const bounds = el.bounds || [0, 0, 0, 0];
|
|
52
|
+
if (!Array.isArray(bounds) || bounds.length < 4)
|
|
53
|
+
return 0;
|
|
54
|
+
const [l, t, r, b] = bounds;
|
|
55
|
+
if (r <= l || b <= t)
|
|
56
|
+
return 0;
|
|
57
|
+
// Do not early-return on non-interactable elements — score them so we can locate their clickable ancestor later
|
|
58
|
+
const interactable = !!(el.clickable || el.enabled || el.focusable);
|
|
59
|
+
const text = normalize(el.text ?? el.label ?? el.value ?? '');
|
|
60
|
+
const content = normalize(el.contentDescription ?? el.contentDesc ?? el.accessibilityLabel ?? '');
|
|
61
|
+
const resourceId = normalize(el.resourceId ?? el.resourceID ?? el.id ?? '');
|
|
62
|
+
const className = normalize(el.type ?? el.class ?? '');
|
|
63
|
+
let score = 0;
|
|
64
|
+
if (exact) {
|
|
65
|
+
if (text && text === q)
|
|
66
|
+
score = 1.0;
|
|
67
|
+
else if (content && content === q)
|
|
68
|
+
score = 0.95;
|
|
69
|
+
}
|
|
70
|
+
else {
|
|
71
|
+
if (text && text === q)
|
|
72
|
+
score = 1.0;
|
|
73
|
+
else if (content && content === q)
|
|
74
|
+
score = 0.95;
|
|
75
|
+
else if (text && text.includes(q))
|
|
76
|
+
score = 0.6;
|
|
77
|
+
else if (content && content.includes(q))
|
|
78
|
+
score = 0.55;
|
|
79
|
+
else if (resourceId && resourceId.includes(q))
|
|
80
|
+
score = 0.7;
|
|
81
|
+
else if (className && className.includes(q))
|
|
82
|
+
score = 0.3;
|
|
83
|
+
}
|
|
84
|
+
if (score > 0 && interactable)
|
|
85
|
+
score += 0.05;
|
|
86
|
+
return score;
|
|
87
|
+
};
|
|
88
|
+
while (Date.now() <= deadline) {
|
|
89
|
+
try {
|
|
90
|
+
const tree = await ToolsObserve.getUITreeHandler({ platform, deviceId });
|
|
91
|
+
if (tree && Array.isArray(tree.elements)) {
|
|
92
|
+
const elements = tree.elements;
|
|
93
|
+
for (let i = 0; i < elements.length; i++) {
|
|
94
|
+
const el = elements[i];
|
|
95
|
+
try {
|
|
96
|
+
const s = scoreElement(el);
|
|
97
|
+
const interactable = !!(el.clickable || el.enabled || el.focusable);
|
|
98
|
+
if (s > bestScore) {
|
|
99
|
+
bestScore = s;
|
|
100
|
+
best = el;
|
|
101
|
+
if (best) {
|
|
102
|
+
best._index = i;
|
|
103
|
+
best._interactable = interactable;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
if (bestScore >= 0.95)
|
|
107
|
+
break;
|
|
108
|
+
}
|
|
109
|
+
catch (e) {
|
|
110
|
+
console.error('Error scoring element:', e);
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
if (bestScore >= 0.95)
|
|
114
|
+
break;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
catch (e) {
|
|
118
|
+
console.error('Error fetching UI tree:', e);
|
|
119
|
+
}
|
|
120
|
+
if (Date.now() > deadline)
|
|
121
|
+
break;
|
|
122
|
+
await new Promise(r => setTimeout(r, 100));
|
|
123
|
+
}
|
|
124
|
+
if (!best)
|
|
125
|
+
return { found: false, error: 'Element not found' };
|
|
126
|
+
// If the best match is not interactable, try to resolve an actionable ancestor.
|
|
127
|
+
try {
|
|
128
|
+
const tree = await ToolsObserve.getUITreeHandler({ platform, deviceId });
|
|
129
|
+
const elements = (tree && Array.isArray(tree.elements)) ? tree.elements : [];
|
|
130
|
+
let chosen = best;
|
|
131
|
+
const childBounds = Array.isArray(chosen?.bounds) ? chosen.bounds : null;
|
|
132
|
+
// Strategy 1: if parentId references an index, climb that chain
|
|
133
|
+
let resolvedAncestor = null;
|
|
134
|
+
if (childBounds && (chosen.parentId !== undefined && chosen.parentId !== null)) {
|
|
135
|
+
let cur = chosen;
|
|
136
|
+
let safety = 0;
|
|
137
|
+
while (cur && safety < 20 && !(cur.clickable || cur.focusable) && (cur.parentId !== undefined && cur.parentId !== null)) {
|
|
138
|
+
let pid = cur.parentId;
|
|
139
|
+
let idx = null;
|
|
140
|
+
if (typeof pid === 'number')
|
|
141
|
+
idx = pid;
|
|
142
|
+
else if (typeof pid === 'string' && /^\d+$/.test(pid))
|
|
143
|
+
idx = Number(pid);
|
|
144
|
+
// If parentId is not an index, try to find by matching resourceId or id field
|
|
145
|
+
if (idx !== null && elements[idx]) {
|
|
146
|
+
cur = elements[idx];
|
|
147
|
+
if (cur && (cur.clickable || cur.enabled || cur.focusable)) {
|
|
148
|
+
resolvedAncestor = cur;
|
|
149
|
+
break;
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
else if (typeof pid === 'string') {
|
|
153
|
+
// fallback: search elements for matching resourceId or id
|
|
154
|
+
const found = elements.find((el) => (el.resourceId === pid || el.id === pid));
|
|
155
|
+
if (found) {
|
|
156
|
+
cur = found;
|
|
157
|
+
if (cur && (cur.clickable || cur.enabled || cur.focusable)) {
|
|
158
|
+
resolvedAncestor = cur;
|
|
159
|
+
break;
|
|
160
|
+
}
|
|
161
|
+
// otherwise continue climbing if this found element has its own parentId
|
|
162
|
+
}
|
|
163
|
+
else {
|
|
164
|
+
break;
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
else {
|
|
168
|
+
break;
|
|
169
|
+
}
|
|
170
|
+
safety++;
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
// Strategy 2: fallback - find a clickable element whose bounds fully contain the child's bounds
|
|
174
|
+
if (!resolvedAncestor && childBounds) {
|
|
175
|
+
const [cl, ct, cr, cb] = childBounds;
|
|
176
|
+
// find candidates that are clickable and contain the child bounds
|
|
177
|
+
const candidates = elements.filter((el) => el && (el.clickable || el.focusable) && Array.isArray(el.bounds) && el.bounds.length >= 4).map((el) => ({ el, bounds: el.bounds }));
|
|
178
|
+
let bestCandidate = null;
|
|
179
|
+
let bestCandidateArea = Infinity;
|
|
180
|
+
for (const c of candidates) {
|
|
181
|
+
const [pl, pt, pr, pb] = c.bounds;
|
|
182
|
+
if (pl <= cl && pt <= ct && pr >= cr && pb >= cb) {
|
|
183
|
+
const area = (pr - pl) * (pb - pt);
|
|
184
|
+
if (area < bestCandidateArea) {
|
|
185
|
+
bestCandidateArea = area;
|
|
186
|
+
bestCandidate = c.el;
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
if (bestCandidate)
|
|
191
|
+
resolvedAncestor = bestCandidate;
|
|
192
|
+
}
|
|
193
|
+
if (resolvedAncestor) {
|
|
194
|
+
best = resolvedAncestor;
|
|
195
|
+
// small score bump to reflect actionability
|
|
196
|
+
bestScore = Math.min(1, bestScore + 0.02);
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
catch (e) {
|
|
200
|
+
console.error('Error resolving ancestor:', e);
|
|
201
|
+
}
|
|
202
|
+
if (!best)
|
|
203
|
+
return { found: false, error: 'Element not found' };
|
|
204
|
+
const boundsObj = Array.isArray(best.bounds) ? { left: best.bounds[0], top: best.bounds[1], right: best.bounds[2], bottom: best.bounds[3] } : null;
|
|
205
|
+
const tapCoordinates = boundsObj ? { x: Math.floor((boundsObj.left + boundsObj.right) / 2), y: Math.floor((boundsObj.top + boundsObj.bottom) / 2) } : null;
|
|
206
|
+
const outEl = {
|
|
207
|
+
text: best.text ?? null,
|
|
208
|
+
resourceId: best.resourceId ?? null,
|
|
209
|
+
contentDesc: best.contentDescription ?? best.contentDesc ?? null,
|
|
210
|
+
class: best.type ?? best.class ?? null,
|
|
211
|
+
bounds: boundsObj,
|
|
212
|
+
clickable: !!best.clickable,
|
|
213
|
+
enabled: !!best.enabled,
|
|
214
|
+
tapCoordinates,
|
|
215
|
+
telemetry: {
|
|
216
|
+
matchedIndex: best?._index ?? null,
|
|
217
|
+
matchedInteractable: !!best?._interactable
|
|
218
|
+
}
|
|
219
|
+
};
|
|
220
|
+
const scoreVal = Math.min(1, Number(bestScore.toFixed(3)));
|
|
221
|
+
return { found: true, element: outEl, score: scoreVal, confidence: scoreVal };
|
|
222
|
+
}
|
|
38
223
|
static async waitForScreenChangeHandler({ platform, previousFingerprint, timeoutMs = 5000, pollIntervalMs = 300, deviceId }) {
|
|
39
224
|
const start = Date.now();
|
|
40
225
|
let lastFingerprint = null;
|
|
@@ -60,14 +245,14 @@ export class ToolsInteract {
|
|
|
60
245
|
lastFingerprint = confirmFp;
|
|
61
246
|
continue;
|
|
62
247
|
}
|
|
63
|
-
catch {
|
|
64
|
-
|
|
248
|
+
catch (e) {
|
|
249
|
+
console.error('Error confirming fingerprint:', e);
|
|
65
250
|
continue;
|
|
66
251
|
}
|
|
67
252
|
}
|
|
68
253
|
}
|
|
69
|
-
catch {
|
|
70
|
-
|
|
254
|
+
catch (e) {
|
|
255
|
+
console.error('Error getting screen fingerprint:', e);
|
|
71
256
|
}
|
|
72
257
|
await new Promise(resolve => setTimeout(resolve, pollIntervalMs));
|
|
73
258
|
}
|
package/dist/server.js
CHANGED
|
@@ -332,6 +332,21 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
|
|
|
332
332
|
required: ["platform", "text"]
|
|
333
333
|
}
|
|
334
334
|
},
|
|
335
|
+
{
|
|
336
|
+
name: "find_element",
|
|
337
|
+
description: "Find a UI element by semantic query (text, content-desc, resource-id, class). Returns best match.",
|
|
338
|
+
inputSchema: {
|
|
339
|
+
type: "object",
|
|
340
|
+
properties: {
|
|
341
|
+
query: { type: "string", description: "Search query (text or label)" },
|
|
342
|
+
exact: { type: "boolean", description: "Require exact match (true/false)", default: false },
|
|
343
|
+
timeoutMs: { type: "number", description: "Timeout in ms to keep searching", default: 3000 },
|
|
344
|
+
platform: { type: "string", enum: ["android", "ios"], description: "Optional platform override" },
|
|
345
|
+
deviceId: { type: "string", description: "Optional device serial/udid" }
|
|
346
|
+
},
|
|
347
|
+
required: ["query"]
|
|
348
|
+
}
|
|
349
|
+
},
|
|
335
350
|
{
|
|
336
351
|
name: "tap",
|
|
337
352
|
description: "Simulate a finger tap on the device screen at specific coordinates.",
|
|
@@ -602,6 +617,11 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
602
617
|
const res = await ToolsInteract.waitForElementHandler({ platform, text, timeout, deviceId });
|
|
603
618
|
return wrapResponse(res);
|
|
604
619
|
}
|
|
620
|
+
if (name === "find_element") {
|
|
621
|
+
const { query, exact = false, timeoutMs = 3000, platform, deviceId } = (args || {});
|
|
622
|
+
const res = await ToolsInteract.findElementHandler({ query, exact, timeoutMs, platform, deviceId });
|
|
623
|
+
return wrapResponse(res);
|
|
624
|
+
}
|
|
605
625
|
if (name === "tap") {
|
|
606
626
|
const { platform, x, y, deviceId } = (args || {});
|
|
607
627
|
const res = await ToolsInteract.tapHandler({ platform, x, y, deviceId });
|
package/docs/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,10 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to the **Mobile Debug MCP** project will be documented in this file.
|
|
4
4
|
|
|
5
|
+
## [0.18.0]
|
|
6
|
+
- Added `find_element` interact tool: semantic UI element search with actionable tap coordinates and lightweight telemetry. The tool searches the UI tree for the best match by text, content description, resource-id, and class, scores candidates (exact, partial, resource-id), and returns the most relevant visible element. When a matching node is non-interactable (e.g., Compose Text child), the tool locates a clickable ancestor (parent or containing element) and returns actionable tapCoordinates (x,y). The handler also returns a `confidence` value and `telemetry` metadata (matchedIndex, matchedInteractable) to aid agent decision-making and logging. Implemented as `ToolsInteract.findElementHandler` and covered by unit tests.
|
|
7
|
+
|
|
8
|
+
|
|
5
9
|
## [0.17.0]
|
|
6
10
|
- Added `capture_debug_snapshot` observe tool: captures a full debugging snapshot including screenshot (base64), UI tree, current activity (Android), screen fingerprint, and recent logs (prefers active log stream, falls back to snapshot logs). Returns a single structured JSON object and includes per-part error fields for partial failures. Implemented as `ToolsObserve.captureDebugSnapshotHandler` and registered in the server.
|
|
7
11
|
|
package/docs/tools/interact.md
CHANGED
|
@@ -101,3 +101,53 @@ Notes:
|
|
|
101
101
|
- Default `timeoutMs` is 5000ms and default `pollIntervalMs` is 300ms; callers may override these.
|
|
102
102
|
- Implemented as an interact-level tool and delegates platform-specific fingerprint calculation to the observe layer (`get_screen_fingerprint`).
|
|
103
103
|
|
|
104
|
+
---
|
|
105
|
+
|
|
106
|
+
## find_element
|
|
107
|
+
|
|
108
|
+
Purpose:
|
|
109
|
+
|
|
110
|
+
Locate a UI element on the current screen using semantic matching and return an actionable element descriptor (including tap coordinates) and confidence telemetry.
|
|
111
|
+
|
|
112
|
+
Input:
|
|
113
|
+
|
|
114
|
+
```json
|
|
115
|
+
{ "query": "string", "exact": false, "timeoutMs": 3000, "platform": "android|ios", "deviceId": "optional device id" }
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
Behaviour:
|
|
119
|
+
|
|
120
|
+
- Fetches the current UI tree (get_ui_tree) and scores visible elements using: text, content description, resource-id, and class name.
|
|
121
|
+
- Normalises strings (lowercase, trimmed). If exact=true require exact match; otherwise allow partial matches (contains) and resource-id/class matches.
|
|
122
|
+
- Considers element bounds and visibility; scores non-interactable children as matches and attempts to resolve a clickable ancestor (parent index or containing clickable element) to produce an actionable element.
|
|
123
|
+
- Retries until timeoutMs; stops early for high-confidence matches.
|
|
124
|
+
- Does not block on long operations and returns partial results where appropriate.
|
|
125
|
+
|
|
126
|
+
Output:
|
|
127
|
+
|
|
128
|
+
```json
|
|
129
|
+
{
|
|
130
|
+
"found": true,
|
|
131
|
+
"element": {
|
|
132
|
+
"text": "Login",
|
|
133
|
+
"resourceId": "com.example:id/login",
|
|
134
|
+
"contentDesc": null,
|
|
135
|
+
"class": "android.widget.Button",
|
|
136
|
+
"bounds": { "left":0, "top":0, "right":100, "bottom":50 },
|
|
137
|
+
"clickable": true,
|
|
138
|
+
"enabled": true,
|
|
139
|
+
"tapCoordinates": { "x":50, "y":25 },
|
|
140
|
+
"telemetry": { "matchedIndex": 3, "matchedInteractable": true }
|
|
141
|
+
},
|
|
142
|
+
"score": 1.0,
|
|
143
|
+
"confidence": 1.0
|
|
144
|
+
}
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
Notes:
|
|
148
|
+
|
|
149
|
+
- `tapCoordinates` are the recommended center point to use for `tap` calls.
|
|
150
|
+
- `confidence` mirrors the internal scoring (0..1) and is suitable for telemetry or logging to decide whether to proceed with an automated action.
|
|
151
|
+
- The tool favours actionable (clickable/focusable) targets; when a matching node is not directly actionable, it finds the smallest containing clickable ancestor.
|
|
152
|
+
- Unit tests for edge cases (parent-clickable child-text, resource-id matches, fuzzy matching) are under `test/observe/unit/find_element.test.ts`.
|
|
153
|
+
|
package/package.json
CHANGED
package/src/interact/index.ts
CHANGED
|
@@ -7,6 +7,28 @@ import { ToolsObserve } from '../observe/index.js'
|
|
|
7
7
|
|
|
8
8
|
interface ScreenFingerprintResponse { fingerprint: string | null }
|
|
9
9
|
|
|
10
|
+
interface UiElement {
|
|
11
|
+
text?: string | null
|
|
12
|
+
label?: string | null
|
|
13
|
+
value?: string | null
|
|
14
|
+
contentDescription?: string | null
|
|
15
|
+
contentDesc?: string | null
|
|
16
|
+
accessibilityLabel?: string | null
|
|
17
|
+
resourceId?: string | null
|
|
18
|
+
resourceID?: string | null
|
|
19
|
+
id?: string | null
|
|
20
|
+
type?: string | null
|
|
21
|
+
class?: string | null
|
|
22
|
+
bounds?: number[] | null
|
|
23
|
+
clickable?: boolean
|
|
24
|
+
enabled?: boolean
|
|
25
|
+
focusable?: boolean
|
|
26
|
+
visible?: boolean
|
|
27
|
+
parentId?: number | string | null
|
|
28
|
+
_index?: number
|
|
29
|
+
_interactable?: boolean
|
|
30
|
+
}
|
|
31
|
+
|
|
10
32
|
export class ToolsInteract {
|
|
11
33
|
|
|
12
34
|
private static async getInteractionService(platform?: 'android' | 'ios', deviceId?: string) {
|
|
@@ -47,6 +69,160 @@ export class ToolsInteract {
|
|
|
47
69
|
return await interact.scrollToElement(selector, direction, maxScrolls, scrollAmount, resolved.id)
|
|
48
70
|
}
|
|
49
71
|
|
|
72
|
+
static async findElementHandler({ query, exact = false, timeoutMs = 3000, platform, deviceId }: { query: string, exact?: boolean, timeoutMs?: number, platform?: 'android' | 'ios', deviceId?: string }) {
|
|
73
|
+
// Try to use observe layer to fetch the current UI tree and perform a fast semantic search
|
|
74
|
+
const start = Date.now()
|
|
75
|
+
const deadline = start + timeoutMs
|
|
76
|
+
const normalize = (s: any) => (s === null || s === undefined) ? '' : String(s).toLowerCase().trim()
|
|
77
|
+
|
|
78
|
+
const q = normalize(query)
|
|
79
|
+
if (!q) return { found: false, error: 'Empty query' }
|
|
80
|
+
|
|
81
|
+
let best: UiElement | null = null
|
|
82
|
+
let bestScore = 0
|
|
83
|
+
|
|
84
|
+
const scoreElement = (el: UiElement | null) => {
|
|
85
|
+
if (!el || !el.visible) return 0
|
|
86
|
+
const bounds = el.bounds || [0,0,0,0]
|
|
87
|
+
if (!Array.isArray(bounds) || bounds.length < 4) return 0
|
|
88
|
+
const [l,t,r,b] = bounds
|
|
89
|
+
if (r <= l || b <= t) return 0
|
|
90
|
+
// Do not early-return on non-interactable elements — score them so we can locate their clickable ancestor later
|
|
91
|
+
const interactable = !!(el.clickable || el.enabled || el.focusable)
|
|
92
|
+
|
|
93
|
+
const text = normalize(el.text ?? el.label ?? el.value ?? '')
|
|
94
|
+
const content = normalize(el.contentDescription ?? el.contentDesc ?? el.accessibilityLabel ?? '')
|
|
95
|
+
const resourceId = normalize(el.resourceId ?? el.resourceID ?? el.id ?? '')
|
|
96
|
+
const className = normalize(el.type ?? el.class ?? '')
|
|
97
|
+
|
|
98
|
+
let score = 0
|
|
99
|
+
if (exact) {
|
|
100
|
+
if (text && text === q) score = 1.0
|
|
101
|
+
else if (content && content === q) score = 0.95
|
|
102
|
+
} else {
|
|
103
|
+
if (text && text === q) score = 1.0
|
|
104
|
+
else if (content && content === q) score = 0.95
|
|
105
|
+
else if (text && text.includes(q)) score = 0.6
|
|
106
|
+
else if (content && content.includes(q)) score = 0.55
|
|
107
|
+
else if (resourceId && resourceId.includes(q)) score = 0.7
|
|
108
|
+
else if (className && className.includes(q)) score = 0.3
|
|
109
|
+
}
|
|
110
|
+
if (score > 0 && interactable) score += 0.05
|
|
111
|
+
return score
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
while (Date.now() <= deadline) {
|
|
115
|
+
try {
|
|
116
|
+
const tree = await ToolsObserve.getUITreeHandler({ platform, deviceId })
|
|
117
|
+
if (tree && Array.isArray((tree as any).elements)) {
|
|
118
|
+
const elements = ((tree as any).elements as UiElement[])
|
|
119
|
+
for (let i = 0; i < elements.length; i++) {
|
|
120
|
+
const el = elements[i]
|
|
121
|
+
try {
|
|
122
|
+
const s = scoreElement(el)
|
|
123
|
+
const interactable = !!(el.clickable || el.enabled || (el as any).focusable)
|
|
124
|
+
if (s > bestScore) {
|
|
125
|
+
bestScore = s
|
|
126
|
+
best = el as UiElement
|
|
127
|
+
if (best) { best._index = i; best._interactable = interactable }
|
|
128
|
+
}
|
|
129
|
+
if (bestScore >= 0.95) break
|
|
130
|
+
} catch (e) { console.error('Error scoring element:', e) }
|
|
131
|
+
}
|
|
132
|
+
if (bestScore >= 0.95) break
|
|
133
|
+
}
|
|
134
|
+
} catch (e) { console.error('Error fetching UI tree:', e) }
|
|
135
|
+
if (Date.now() > deadline) break
|
|
136
|
+
await new Promise(r => setTimeout(r, 100))
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
if (!best) return { found: false, error: 'Element not found' }
|
|
140
|
+
|
|
141
|
+
// If the best match is not interactable, try to resolve an actionable ancestor.
|
|
142
|
+
try {
|
|
143
|
+
const tree = await ToolsObserve.getUITreeHandler({ platform, deviceId }) as any
|
|
144
|
+
const elements = (tree && Array.isArray(tree.elements)) ? (tree.elements as UiElement[]) : []
|
|
145
|
+
let chosen = best as any
|
|
146
|
+
const childBounds = Array.isArray(chosen?.bounds) ? chosen.bounds : null
|
|
147
|
+
|
|
148
|
+
// Strategy 1: if parentId references an index, climb that chain
|
|
149
|
+
let resolvedAncestor: any = null
|
|
150
|
+
if (childBounds && (chosen.parentId !== undefined && chosen.parentId !== null)) {
|
|
151
|
+
let cur = chosen
|
|
152
|
+
let safety = 0
|
|
153
|
+
while (cur && safety < 20 && !(cur.clickable || cur.focusable) && (cur.parentId !== undefined && cur.parentId !== null)) {
|
|
154
|
+
let pid = cur.parentId
|
|
155
|
+
let idx: number | null = null
|
|
156
|
+
if (typeof pid === 'number') idx = pid
|
|
157
|
+
else if (typeof pid === 'string' && /^\d+$/.test(pid)) idx = Number(pid)
|
|
158
|
+
// If parentId is not an index, try to find by matching resourceId or id field
|
|
159
|
+
if (idx !== null && elements[idx]) {
|
|
160
|
+
cur = elements[idx]
|
|
161
|
+
if (cur && (cur.clickable || cur.enabled || cur.focusable)) { resolvedAncestor = cur; break }
|
|
162
|
+
} else if (typeof pid === 'string') {
|
|
163
|
+
// fallback: search elements for matching resourceId or id
|
|
164
|
+
const found = elements.find((el: UiElement)=> (el.resourceId === pid || el.id === pid))
|
|
165
|
+
if (found) {
|
|
166
|
+
cur = found
|
|
167
|
+
if (cur && (cur.clickable || cur.enabled || cur.focusable)) { resolvedAncestor = cur; break }
|
|
168
|
+
// otherwise continue climbing if this found element has its own parentId
|
|
169
|
+
} else {
|
|
170
|
+
break
|
|
171
|
+
}
|
|
172
|
+
} else {
|
|
173
|
+
break
|
|
174
|
+
}
|
|
175
|
+
safety++
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// Strategy 2: fallback - find a clickable element whose bounds fully contain the child's bounds
|
|
180
|
+
if (!resolvedAncestor && childBounds) {
|
|
181
|
+
const [cl,ct,cr,cb] = childBounds
|
|
182
|
+
// find candidates that are clickable and contain the child bounds
|
|
183
|
+
const candidates = elements.filter((el: UiElement)=> el && (el.clickable || el.focusable) && Array.isArray(el.bounds) && el.bounds!.length>=4).map((el: UiElement)=>({el, bounds: el.bounds! as number[]}))
|
|
184
|
+
let bestCandidate: any = null
|
|
185
|
+
let bestCandidateArea = Infinity
|
|
186
|
+
for (const c of candidates) {
|
|
187
|
+
const [pl,pt,pr,pb] = c.bounds
|
|
188
|
+
if (pl <= cl && pt <= ct && pr >= cr && pb >= cb) {
|
|
189
|
+
const area = (pr-pl) * (pb-pt)
|
|
190
|
+
if (area < bestCandidateArea) { bestCandidateArea = area; bestCandidate = c.el }
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
if (bestCandidate) resolvedAncestor = bestCandidate
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
if (resolvedAncestor) {
|
|
197
|
+
best = resolvedAncestor
|
|
198
|
+
// small score bump to reflect actionability
|
|
199
|
+
bestScore = Math.min(1, bestScore + 0.02)
|
|
200
|
+
}
|
|
201
|
+
} catch (e) { console.error('Error resolving ancestor:', e) }
|
|
202
|
+
|
|
203
|
+
if (!best) return { found: false, error: 'Element not found' }
|
|
204
|
+
|
|
205
|
+
const boundsObj = Array.isArray(best.bounds) ? { left: best.bounds[0], top: best.bounds[1], right: best.bounds[2], bottom: best.bounds[3] } : null
|
|
206
|
+
const tapCoordinates = boundsObj ? { x: Math.floor((boundsObj.left + boundsObj.right) / 2), y: Math.floor((boundsObj.top + boundsObj.bottom) / 2) } : null
|
|
207
|
+
|
|
208
|
+
const outEl = {
|
|
209
|
+
text: best.text ?? null,
|
|
210
|
+
resourceId: best.resourceId ?? null,
|
|
211
|
+
contentDesc: best.contentDescription ?? best.contentDesc ?? null,
|
|
212
|
+
class: best.type ?? best.class ?? null,
|
|
213
|
+
bounds: boundsObj,
|
|
214
|
+
clickable: !!best.clickable,
|
|
215
|
+
enabled: !!best.enabled,
|
|
216
|
+
tapCoordinates,
|
|
217
|
+
telemetry: {
|
|
218
|
+
matchedIndex: best?._index ?? null,
|
|
219
|
+
matchedInteractable: !!best?._interactable
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
const scoreVal = Math.min(1, Number(bestScore.toFixed(3)))
|
|
223
|
+
return { found: true, element: outEl, score: scoreVal, confidence: scoreVal }
|
|
224
|
+
}
|
|
225
|
+
|
|
50
226
|
static async waitForScreenChangeHandler({ platform, previousFingerprint, timeoutMs = 5000, pollIntervalMs = 300, deviceId }: { platform?: 'android' | 'ios', previousFingerprint: string, timeoutMs?: number, pollIntervalMs?: number, deviceId?: string }) {
|
|
51
227
|
const start = Date.now()
|
|
52
228
|
let lastFingerprint: string | null = null
|
|
@@ -74,14 +250,9 @@ export class ToolsInteract {
|
|
|
74
250
|
}
|
|
75
251
|
lastFingerprint = confirmFp
|
|
76
252
|
continue
|
|
77
|
-
} catch {
|
|
78
|
-
// ignore and continue polling
|
|
79
|
-
continue
|
|
80
|
-
}
|
|
253
|
+
} catch (e) { console.error('Error confirming fingerprint:', e); continue }
|
|
81
254
|
}
|
|
82
|
-
} catch {
|
|
83
|
-
// ignore transient errors
|
|
84
|
-
}
|
|
255
|
+
} catch (e) { console.error('Error getting screen fingerprint:', e) }
|
|
85
256
|
|
|
86
257
|
await new Promise(resolve => setTimeout(resolve, pollIntervalMs))
|
|
87
258
|
}
|
package/src/server.ts
CHANGED
|
@@ -354,6 +354,21 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
|
|
|
354
354
|
required: ["platform", "text"]
|
|
355
355
|
}
|
|
356
356
|
},
|
|
357
|
+
{
|
|
358
|
+
name: "find_element",
|
|
359
|
+
description: "Find a UI element by semantic query (text, content-desc, resource-id, class). Returns best match.",
|
|
360
|
+
inputSchema: {
|
|
361
|
+
type: "object",
|
|
362
|
+
properties: {
|
|
363
|
+
query: { type: "string", description: "Search query (text or label)" },
|
|
364
|
+
exact: { type: "boolean", description: "Require exact match (true/false)", default: false },
|
|
365
|
+
timeoutMs: { type: "number", description: "Timeout in ms to keep searching", default: 3000 },
|
|
366
|
+
platform: { type: "string", enum: ["android","ios"], description: "Optional platform override" },
|
|
367
|
+
deviceId: { type: "string", description: "Optional device serial/udid" }
|
|
368
|
+
},
|
|
369
|
+
required: ["query"]
|
|
370
|
+
}
|
|
371
|
+
},
|
|
357
372
|
|
|
358
373
|
{
|
|
359
374
|
name: "tap",
|
|
@@ -647,6 +662,12 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
647
662
|
return wrapResponse(res)
|
|
648
663
|
}
|
|
649
664
|
|
|
665
|
+
if (name === "find_element") {
|
|
666
|
+
const { query, exact = false, timeoutMs = 3000, platform, deviceId } = (args || {}) as any
|
|
667
|
+
const res = await ToolsInteract.findElementHandler({ query, exact, timeoutMs, platform, deviceId })
|
|
668
|
+
return wrapResponse(res)
|
|
669
|
+
}
|
|
670
|
+
|
|
650
671
|
if (name === "tap") {
|
|
651
672
|
const { platform, x, y, deviceId } = (args || {}) as any
|
|
652
673
|
const res = await ToolsInteract.tapHandler({ platform, x, y, deviceId })
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
import { ToolsInteract } from '../../../src/interact/index.js'
|
|
2
|
+
import { ToolsObserve } from '../../../src/observe/index.js'
|
|
3
|
+
|
|
4
|
+
async function run() {
|
|
5
|
+
process.stdout.write('Starting find_element unit tests...\n')
|
|
6
|
+
|
|
7
|
+
const origGetTree = (ToolsObserve as any).getUITreeHandler
|
|
8
|
+
|
|
9
|
+
try {
|
|
10
|
+
// Test 1: exact text match
|
|
11
|
+
(ToolsObserve as any).getUITreeHandler = async () => ({
|
|
12
|
+
device: { platform: 'android', id: 'mock' },
|
|
13
|
+
screen: '',
|
|
14
|
+
resolution: { width: 1080, height: 1920 },
|
|
15
|
+
elements: [
|
|
16
|
+
{ text: 'Login', type: 'android.widget.Button', contentDescription: null, clickable: true, enabled: true, visible: true, bounds: [10,10,100,60], resourceId: 'btn_login' },
|
|
17
|
+
{ text: 'Cancel', type: 'android.widget.Button', contentDescription: null, clickable: true, enabled: true, visible: true, bounds: [110,10,200,60], resourceId: 'btn_cancel' }
|
|
18
|
+
]
|
|
19
|
+
})
|
|
20
|
+
|
|
21
|
+
const res1: any = await ToolsInteract.findElementHandler({ query: 'login', exact: true, platform: 'android' })
|
|
22
|
+
process.stdout.write('res1 ' + JSON.stringify(res1, null, 2) + '\n');
|
|
23
|
+
const pass1 = res1.found === true && res1.element && res1.element.resourceId === 'btn_login' && res1.element.tapCoordinates && typeof res1.element.tapCoordinates.x === 'number' && typeof res1.element.tapCoordinates.y === 'number' && typeof res1.confidence === 'number'
|
|
24
|
+
process.stdout.write('Test 1: ' + (pass1 ? 'PASS' : 'FAIL') + '\n');
|
|
25
|
+
|
|
26
|
+
// Test 2: partial match & scoring
|
|
27
|
+
(ToolsObserve as any).getUITreeHandler = async () => ({
|
|
28
|
+
device: { platform: 'android', id: 'mock' },
|
|
29
|
+
screen: '',
|
|
30
|
+
resolution: { width: 1080, height: 1920 },
|
|
31
|
+
elements: [
|
|
32
|
+
{ text: 'Sign in', type: 'android.widget.Button', contentDescription: null, clickable: true, enabled: true, visible: true, bounds: [10,10,100,60], resourceId: 'btn_signin' },
|
|
33
|
+
{ text: 'Login with Email', type: 'android.widget.Button', contentDescription: null, clickable: true, enabled: true, visible: true, bounds: [110,10,300,60], resourceId: 'btn_login_email' }
|
|
34
|
+
]
|
|
35
|
+
})
|
|
36
|
+
|
|
37
|
+
const res2: any = await ToolsInteract.findElementHandler({ query: 'login', exact: false, platform: 'android' })
|
|
38
|
+
process.stdout.write('res2 ' + JSON.stringify(res2, null, 2) + '\n');
|
|
39
|
+
const pass2 = res2.found === true && res2.element && res2.element.resourceId === 'btn_login_email' && res2.element.tapCoordinates && typeof res2.element.tapCoordinates.x === 'number' && typeof res2.element.tapCoordinates.y === 'number' && typeof res2.confidence === 'number'
|
|
40
|
+
process.stdout.write('Test 2: ' + (pass2 ? 'PASS' : 'FAIL') + '\n');
|
|
41
|
+
|
|
42
|
+
// Test 3: resourceId match
|
|
43
|
+
(ToolsObserve as any).getUITreeHandler = async () => ({
|
|
44
|
+
device: { platform: 'android', id: 'mock' },
|
|
45
|
+
screen: '',
|
|
46
|
+
resolution: { width: 1080, height: 1920 },
|
|
47
|
+
elements: [
|
|
48
|
+
{ text: null, type: 'android.widget.ImageView', contentDescription: null, clickable: false, enabled: true, visible: true, bounds: [0,0,50,50], resourceId: 'icon_login' }
|
|
49
|
+
]
|
|
50
|
+
})
|
|
51
|
+
|
|
52
|
+
const res3: any = await ToolsInteract.findElementHandler({ query: 'icon_login', exact: false, platform: 'android' })
|
|
53
|
+
process.stdout.write('res3 ' + JSON.stringify(res3, null, 2) + '\n');
|
|
54
|
+
const pass3 = res3.found === true && res3.element && res3.element.resourceId === 'icon_login' && res3.element.tapCoordinates && typeof res3.element.tapCoordinates.x === 'number' && typeof res3.element.tapCoordinates.y === 'number' && typeof res3.confidence === 'number'
|
|
55
|
+
process.stdout.write('Test 3: ' + (pass3 ? 'PASS' : 'FAIL') + '\n');
|
|
56
|
+
|
|
57
|
+
// Test 4: parent-clickable child-text scenario
|
|
58
|
+
(ToolsObserve as any).getUITreeHandler = async () => ({
|
|
59
|
+
device: { platform: 'android', id: 'mock' },
|
|
60
|
+
screen: '',
|
|
61
|
+
resolution: { width: 1080, height: 1920 },
|
|
62
|
+
elements: [
|
|
63
|
+
{ text: null, type: 'android.view.View', contentDescription: null, clickable: true, enabled: true, visible: true, bounds: [0,0,400,100], resourceId: 'btn_generate', children: [1] },
|
|
64
|
+
{ text: 'Generate Session', type: 'android.widget.TextView', contentDescription: null, clickable: false, enabled: true, visible: true, bounds: [10,10,390,90], resourceId: null, parentId: 0 }
|
|
65
|
+
]
|
|
66
|
+
})
|
|
67
|
+
|
|
68
|
+
const res4: any = await ToolsInteract.findElementHandler({ query: 'generate', exact: false, platform: 'android', timeoutMs: 300 })
|
|
69
|
+
process.stdout.write('res4 ' + JSON.stringify(res4, null, 2) + '\n');
|
|
70
|
+
const pass4 = res4.found === true && res4.element && res4.element.clickable === true && res4.element.resourceId === 'btn_generate' && res4.element.tapCoordinates && typeof res4.element.tapCoordinates.x === 'number' && typeof res4.element.tapCoordinates.y === 'number' && typeof res4.confidence === 'number'
|
|
71
|
+
process.stdout.write('Test 4: ' + (pass4 ? 'PASS' : 'FAIL') + '\n');
|
|
72
|
+
|
|
73
|
+
// Test 5: not found
|
|
74
|
+
(ToolsObserve as any).getUITreeHandler = async () => ({ device: { platform: 'android', id: 'mock' }, screen: '', resolution: { width: 1080, height: 1920 }, elements: [] })
|
|
75
|
+
const res5: any = await ToolsInteract.findElementHandler({ query: 'nope', exact: false, platform: 'android', timeoutMs: 300 })
|
|
76
|
+
process.stdout.write('res5 ' + JSON.stringify(res5, null, 2) + '\n');
|
|
77
|
+
const pass5 = res5.found === false
|
|
78
|
+
process.stdout.write('Test 5: ' + (pass5 ? 'PASS' : 'FAIL') + '\n');
|
|
79
|
+
|
|
80
|
+
} finally {
|
|
81
|
+
;(ToolsObserve as any).getUITreeHandler = origGetTree
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
run().catch(console.error)
|
package/test/unit/index.ts
CHANGED
|
@@ -12,5 +12,6 @@ import '../manage/unit/detection.test.ts'
|
|
|
12
12
|
import '../manage/unit/mcp_disable_autodetect.test.ts'
|
|
13
13
|
import '../interact/unit/wait_for_screen_change.test.ts'
|
|
14
14
|
import '../observe/unit/capture_debug_snapshot.test.ts'
|
|
15
|
+
import '../observe/unit/find_element.test.ts'
|
|
15
16
|
|
|
16
17
|
console.log('Unit tests loaded.')
|