mobile-debug-mcp 0.26.1 → 0.26.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +3 -0
- package/dist/interact/index.js +169 -102
- package/dist/server/common.js +14 -1
- package/dist/server/tool-definitions.js +22 -4
- package/dist/server/tool-handlers.js +7 -0
- package/dist/server-core.js +1 -1
- package/docs/CHANGELOG.md +6 -0
- package/docs/ROADMAP.md +242 -76
- package/docs/rfcs/005-unified-action-execution-and-verification-model.md +216 -0
- package/docs/rfcs/006-runtime-action-instrumentation-and-binding-layer.md +230 -0
- package/docs/rfcs/007-actionability-resolution-and-executable-target-selection.md +277 -0
- package/docs/specs/mcp-tooling-spec-v1.md +4 -0
- package/docs/tools/interact.md +13 -1
- package/package.json +1 -1
- package/src/interact/index.ts +203 -107
- package/src/server/common.ts +22 -1
- package/src/server/tool-definitions.ts +22 -4
- package/src/server/tool-handlers.ts +7 -0
- package/src/server-core.ts +1 -1
- package/src/types.ts +75 -0
- package/test/unit/observe/find_element.test.ts +5 -0
- package/test/unit/server/response_shapes.test.ts +8 -0
package/AGENTS.md
CHANGED
|
@@ -41,11 +41,14 @@ Portable agent skills live under `skills/`.
|
|
|
41
41
|
- `skills/README.md` — repo-wide skill convention
|
|
42
42
|
- `skills/mcp-builder/` — build/install/toolchain guidance
|
|
43
43
|
- `skills/test-authoring/` — test creation and placement guidance
|
|
44
|
+
- `skills/rfc-review/` — RFC review rubric and response template
|
|
44
45
|
|
|
45
46
|
If the task is about **creating or updating tests**, load `skills/test-authoring/SKILL.md` first.
|
|
46
47
|
|
|
47
48
|
If the task is about **building, installing, or diagnosing native tooling**, load `skills/mcp-builder/SKILL.md` first.
|
|
48
49
|
|
|
50
|
+
If the task is about **reviewing an RFC or spec draft**, load `skills/rfc-review/SKILL.md` first.
|
|
51
|
+
|
|
49
52
|
### Repository docs
|
|
50
53
|
|
|
51
54
|
- `README.md` — high-level repo overview and commands
|
package/dist/interact/index.js
CHANGED
|
@@ -5,7 +5,7 @@ export { AndroidInteract, iOSInteract };
|
|
|
5
5
|
import { resolveTargetDevice } from '../utils/resolve-device.js';
|
|
6
6
|
import { ToolsObserve } from '../observe/index.js';
|
|
7
7
|
import { computeSnapshotSignature } from '../observe/snapshot-metadata.js';
|
|
8
|
-
import {
|
|
8
|
+
import { buildActionExecutionResult } from '../server/common.js';
|
|
9
9
|
export class ToolsInteract {
|
|
10
10
|
static _maxResolvedUiElements = 256;
|
|
11
11
|
static _uiChangeKinds = ['hierarchy_diff', 'text_change', 'state_change'];
|
|
@@ -203,19 +203,34 @@ export class ToolsInteract {
|
|
|
203
203
|
semantic: element.semantic ?? null
|
|
204
204
|
};
|
|
205
205
|
}
|
|
206
|
-
static
|
|
206
|
+
static _summarizeResolutionCandidate(candidate) {
|
|
207
|
+
const bounds = ToolsInteract._normalizeBounds(candidate.el.bounds);
|
|
207
208
|
return {
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
209
|
+
text: candidate.el.text ?? null,
|
|
210
|
+
resource_id: candidate.el.resourceId ?? candidate.el.resourceID ?? candidate.el.id ?? null,
|
|
211
|
+
accessibility_id: candidate.el.contentDescription ?? candidate.el.contentDesc ?? candidate.el.accessibilityLabel ?? candidate.el.label ?? null,
|
|
212
|
+
class: candidate.el.type ?? candidate.el.class ?? null,
|
|
213
|
+
bounds: bounds
|
|
214
|
+
? { left: bounds[0], top: bounds[1], right: bounds[2], bottom: bounds[3] }
|
|
215
|
+
: null,
|
|
216
|
+
clickable: !!candidate.el.clickable,
|
|
217
|
+
enabled: !!candidate.el.enabled,
|
|
218
|
+
score: candidate.score,
|
|
219
|
+
reason: candidate.reason
|
|
217
220
|
};
|
|
218
221
|
}
|
|
222
|
+
static _actionFailure(actionType, selector, resolved, failureCode, retryable, uiFingerprintBefore, uiFingerprintAfter, sourceModule = 'interact') {
|
|
223
|
+
return buildActionExecutionResult({
|
|
224
|
+
actionType,
|
|
225
|
+
selector,
|
|
226
|
+
resolved,
|
|
227
|
+
success: false,
|
|
228
|
+
uiFingerprintBefore,
|
|
229
|
+
uiFingerprintAfter: uiFingerprintAfter ?? null,
|
|
230
|
+
failure: { failureCode, retryable },
|
|
231
|
+
sourceModule
|
|
232
|
+
});
|
|
233
|
+
}
|
|
219
234
|
static _resetResolvedUiElementsForTests() {
|
|
220
235
|
ToolsInteract._resolvedUiElements.clear();
|
|
221
236
|
}
|
|
@@ -350,14 +365,11 @@ export class ToolsInteract {
|
|
|
350
365
|
return await interact.tap(x, y, resolved.id);
|
|
351
366
|
}
|
|
352
367
|
static async tapElementHandler({ elementId }) {
|
|
353
|
-
const timestampMs = Date.now();
|
|
354
|
-
const timestamp = new Date(timestampMs).toISOString();
|
|
355
368
|
const actionType = 'tap_element';
|
|
356
|
-
const actionId = nextActionId(actionType, timestampMs);
|
|
357
369
|
const selector = { elementId };
|
|
358
370
|
const resolved = ToolsInteract._resolvedUiElements.get(elementId);
|
|
359
371
|
if (!resolved) {
|
|
360
|
-
return ToolsInteract._actionFailure(
|
|
372
|
+
return ToolsInteract._actionFailure(actionType, selector, null, 'STALE_REFERENCE', true, null);
|
|
361
373
|
}
|
|
362
374
|
const fingerprintBefore = await ToolsInteract._captureFingerprint(resolved.platform, resolved.deviceId);
|
|
363
375
|
const tree = await ToolsObserve.getUITreeHandler({ platform: resolved.platform, deviceId: resolved.deviceId });
|
|
@@ -366,40 +378,37 @@ export class ToolsInteract {
|
|
|
366
378
|
const elements = Array.isArray(tree?.elements) ? tree.elements : [];
|
|
367
379
|
const currentMatch = ToolsInteract._findCurrentResolvedElement(elements, treePlatform, treeDeviceId, resolved);
|
|
368
380
|
if (!currentMatch) {
|
|
369
|
-
return ToolsInteract._actionFailure(
|
|
381
|
+
return ToolsInteract._actionFailure(actionType, selector, null, 'STALE_REFERENCE', true, fingerprintBefore);
|
|
370
382
|
}
|
|
371
383
|
const resolvedTarget = ToolsInteract._resolvedTargetFromElement(resolved.elementId, currentMatch.el, currentMatch.index);
|
|
372
384
|
if (!ToolsInteract._isVisibleElement(currentMatch.el)) {
|
|
373
|
-
return ToolsInteract._actionFailure(
|
|
385
|
+
return ToolsInteract._actionFailure(actionType, selector, resolvedTarget, 'ELEMENT_NOT_INTERACTABLE', true, fingerprintBefore);
|
|
374
386
|
}
|
|
375
387
|
if (currentMatch.el.enabled === false) {
|
|
376
|
-
return ToolsInteract._actionFailure(
|
|
388
|
+
return ToolsInteract._actionFailure(actionType, selector, resolvedTarget, 'ELEMENT_NOT_INTERACTABLE', true, fingerprintBefore);
|
|
377
389
|
}
|
|
378
390
|
const bounds = ToolsInteract._normalizeBounds(currentMatch.el.bounds) ?? resolved.bounds;
|
|
379
391
|
if (!bounds || bounds[2] <= bounds[0] || bounds[3] <= bounds[1]) {
|
|
380
|
-
return ToolsInteract._actionFailure(
|
|
392
|
+
return ToolsInteract._actionFailure(actionType, selector, resolvedTarget, 'ELEMENT_NOT_INTERACTABLE', true, fingerprintBefore);
|
|
381
393
|
}
|
|
382
394
|
const x = Math.floor((bounds[0] + bounds[2]) / 2);
|
|
383
395
|
const y = Math.floor((bounds[1] + bounds[3]) / 2);
|
|
384
396
|
const tapResult = await ToolsInteract.tapHandler({ platform: resolved.platform, x, y, deviceId: resolved.deviceId });
|
|
385
397
|
if (!tapResult.success) {
|
|
386
398
|
const fingerprintAfterFailure = await ToolsInteract._captureFingerprint(resolved.platform, resolved.deviceId);
|
|
387
|
-
return ToolsInteract._actionFailure(
|
|
399
|
+
return ToolsInteract._actionFailure(actionType, selector, resolvedTarget, 'UNKNOWN', false, fingerprintBefore, fingerprintAfterFailure);
|
|
388
400
|
}
|
|
389
401
|
const fingerprintAfter = await ToolsInteract._captureFingerprint(resolved.platform, resolved.deviceId);
|
|
390
|
-
return {
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
target: {
|
|
396
|
-
selector,
|
|
397
|
-
resolved: resolvedTarget
|
|
398
|
-
},
|
|
402
|
+
return buildActionExecutionResult({
|
|
403
|
+
actionType,
|
|
404
|
+
device: tree?.device,
|
|
405
|
+
selector,
|
|
406
|
+
resolved: resolvedTarget,
|
|
399
407
|
success: true,
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
408
|
+
uiFingerprintBefore: fingerprintBefore,
|
|
409
|
+
uiFingerprintAfter: fingerprintAfter,
|
|
410
|
+
sourceModule: 'interact'
|
|
411
|
+
});
|
|
403
412
|
}
|
|
404
413
|
static async swipeHandler({ platform = 'android', x1, y1, x2, y2, duration, deviceId }) {
|
|
405
414
|
const { interact, resolved } = await ToolsInteract.getInteractionService(platform, deviceId);
|
|
@@ -426,17 +435,18 @@ export class ToolsInteract {
|
|
|
426
435
|
if (!q)
|
|
427
436
|
return { found: false, error: 'Empty query' };
|
|
428
437
|
let best = null;
|
|
429
|
-
let
|
|
430
|
-
let
|
|
431
|
-
|
|
438
|
+
let bestTree = null;
|
|
439
|
+
let bestIterationCandidates = [];
|
|
440
|
+
let shouldStop = false;
|
|
441
|
+
const scoreElement = (el, idx) => {
|
|
432
442
|
if (!el || !el.visible)
|
|
433
|
-
return
|
|
443
|
+
return null;
|
|
434
444
|
const bounds = el.bounds || [0, 0, 0, 0];
|
|
435
445
|
if (!Array.isArray(bounds) || bounds.length < 4)
|
|
436
|
-
return
|
|
446
|
+
return null;
|
|
437
447
|
const [l, t, r, b] = bounds;
|
|
438
448
|
if (r <= l || b <= t)
|
|
439
|
-
return
|
|
449
|
+
return null;
|
|
440
450
|
// Do not early-return on non-interactable elements — score them so we can locate their clickable ancestor later
|
|
441
451
|
const interactable = !!(el.clickable || el.enabled || el.focusable);
|
|
442
452
|
const text = normalize(el.text ?? el.label ?? el.value ?? '');
|
|
@@ -444,64 +454,98 @@ export class ToolsInteract {
|
|
|
444
454
|
const resourceId = normalize(el.resourceId ?? el.resourceID ?? el.id ?? '');
|
|
445
455
|
const className = normalize(el.type ?? el.class ?? '');
|
|
446
456
|
let score = 0;
|
|
457
|
+
let reason = 'best_scoring_candidate';
|
|
447
458
|
if (exact) {
|
|
448
|
-
if (text && text === q)
|
|
459
|
+
if (text && text === q) {
|
|
449
460
|
score = 1.0;
|
|
450
|
-
|
|
461
|
+
reason = 'exact_text_match';
|
|
462
|
+
}
|
|
463
|
+
else if (content && content === q) {
|
|
451
464
|
score = 0.95;
|
|
465
|
+
reason = 'exact_content_desc_match';
|
|
466
|
+
}
|
|
467
|
+
else if (resourceId && resourceId === q) {
|
|
468
|
+
score = 0.92;
|
|
469
|
+
reason = 'exact_resource_id_match';
|
|
470
|
+
}
|
|
471
|
+
else if (className && className === q) {
|
|
472
|
+
score = 0.3;
|
|
473
|
+
reason = 'exact_class_match';
|
|
474
|
+
}
|
|
452
475
|
}
|
|
453
476
|
else {
|
|
454
|
-
if (text && text === q)
|
|
477
|
+
if (text && text === q) {
|
|
455
478
|
score = 1.0;
|
|
456
|
-
|
|
479
|
+
reason = 'exact_text_match';
|
|
480
|
+
}
|
|
481
|
+
else if (content && content === q) {
|
|
457
482
|
score = 0.95;
|
|
458
|
-
|
|
483
|
+
reason = 'exact_content_desc_match';
|
|
484
|
+
}
|
|
485
|
+
else if (resourceId && resourceId === q) {
|
|
486
|
+
score = 0.92;
|
|
487
|
+
reason = 'exact_resource_id_match';
|
|
488
|
+
}
|
|
489
|
+
else if (text && text.includes(q)) {
|
|
459
490
|
score = 0.6;
|
|
460
|
-
|
|
491
|
+
reason = 'partial_text_match';
|
|
492
|
+
}
|
|
493
|
+
else if (content && content.includes(q)) {
|
|
461
494
|
score = 0.55;
|
|
462
|
-
|
|
495
|
+
reason = 'partial_content_desc_match';
|
|
496
|
+
}
|
|
497
|
+
else if (resourceId && resourceId.includes(q)) {
|
|
463
498
|
score = 0.7;
|
|
464
|
-
|
|
499
|
+
reason = 'partial_resource_id_match';
|
|
500
|
+
}
|
|
501
|
+
else if (className && className.includes(q)) {
|
|
465
502
|
score = 0.3;
|
|
503
|
+
reason = 'partial_class_match';
|
|
504
|
+
}
|
|
466
505
|
}
|
|
467
506
|
if (score > 0 && interactable)
|
|
468
507
|
score += 0.05;
|
|
469
|
-
|
|
508
|
+
if (score <= 0)
|
|
509
|
+
return null;
|
|
510
|
+
return { el, idx, score, reason, interactable };
|
|
470
511
|
};
|
|
471
512
|
while (Date.now() <= deadline) {
|
|
472
513
|
try {
|
|
473
514
|
const tree = await ToolsObserve.getUITreeHandler({ platform, deviceId });
|
|
474
|
-
lastTree = tree;
|
|
475
515
|
if (tree && Array.isArray(tree.elements)) {
|
|
476
516
|
const elements = tree.elements;
|
|
517
|
+
const iterationCandidates = [];
|
|
518
|
+
let iterationImprovedBest = false;
|
|
477
519
|
for (let i = 0; i < elements.length; i++) {
|
|
478
520
|
const el = elements[i];
|
|
479
521
|
try {
|
|
480
|
-
const
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
522
|
+
const candidate = scoreElement(el, i);
|
|
523
|
+
if (!candidate)
|
|
524
|
+
continue;
|
|
525
|
+
iterationCandidates.push(candidate);
|
|
526
|
+
if (!best || candidate.score > best.score) {
|
|
527
|
+
best = candidate;
|
|
528
|
+
bestTree = tree;
|
|
529
|
+
iterationImprovedBest = true;
|
|
530
|
+
if (best.score >= 0.95) {
|
|
531
|
+
shouldStop = true;
|
|
532
|
+
break;
|
|
488
533
|
}
|
|
489
534
|
}
|
|
490
|
-
if (bestScore >= 0.95)
|
|
491
|
-
break;
|
|
492
535
|
}
|
|
493
536
|
catch (e) {
|
|
494
537
|
console.error('Error scoring element:', e);
|
|
495
538
|
}
|
|
496
539
|
}
|
|
497
|
-
if (
|
|
498
|
-
|
|
540
|
+
if (iterationImprovedBest) {
|
|
541
|
+
bestIterationCandidates = iterationCandidates.slice();
|
|
542
|
+
}
|
|
499
543
|
}
|
|
500
544
|
}
|
|
501
545
|
catch (e) {
|
|
502
546
|
console.error('Error fetching UI tree:', e);
|
|
503
547
|
}
|
|
504
|
-
if (Date.now() > deadline)
|
|
548
|
+
if (shouldStop || Date.now() > deadline)
|
|
505
549
|
break;
|
|
506
550
|
await new Promise(r => setTimeout(r, 100));
|
|
507
551
|
}
|
|
@@ -509,17 +553,17 @@ export class ToolsInteract {
|
|
|
509
553
|
return { found: false, error: 'Element not found' };
|
|
510
554
|
// If the best match is not interactable, try to resolve an actionable ancestor.
|
|
511
555
|
try {
|
|
512
|
-
const elements = (
|
|
513
|
-
const screen =
|
|
556
|
+
const elements = (bestTree && Array.isArray(bestTree.elements)) ? bestTree.elements : [];
|
|
557
|
+
const screen = bestTree?.resolution && typeof bestTree.resolution === 'object' ? bestTree.resolution : null;
|
|
514
558
|
let chosen = best;
|
|
515
|
-
const childBounds = Array.isArray(chosen?.bounds) ? chosen.bounds : null;
|
|
559
|
+
const childBounds = Array.isArray(chosen?.el?.bounds) ? chosen.el.bounds : null;
|
|
516
560
|
// Strategy 1: if parentId references an index, climb that chain
|
|
517
561
|
let resolvedAncestor = null;
|
|
518
|
-
if (childBounds && (chosen.parentId !== undefined && chosen.parentId !== null)) {
|
|
562
|
+
if (childBounds && (chosen.el.parentId !== undefined && chosen.el.parentId !== null)) {
|
|
519
563
|
let cur = chosen;
|
|
520
564
|
let safety = 0;
|
|
521
|
-
while (cur && safety < 20 && !(cur.clickable || cur.focusable) && (cur.parentId !== undefined && cur.parentId !== null)) {
|
|
522
|
-
let pid = cur.parentId;
|
|
565
|
+
while (cur && safety < 20 && !(cur.el.clickable || cur.el.focusable) && (cur.el.parentId !== undefined && cur.el.parentId !== null)) {
|
|
566
|
+
let pid = cur.el.parentId;
|
|
523
567
|
let idx = null;
|
|
524
568
|
if (typeof pid === 'number')
|
|
525
569
|
idx = pid;
|
|
@@ -527,18 +571,19 @@ export class ToolsInteract {
|
|
|
527
571
|
idx = Number(pid);
|
|
528
572
|
// If parentId is not an index, try to find by matching resourceId or id field
|
|
529
573
|
if (idx !== null && elements[idx]) {
|
|
530
|
-
cur = elements[idx];
|
|
531
|
-
if (cur && (cur.clickable || cur.enabled || cur.focusable)) {
|
|
574
|
+
cur = { el: elements[idx], idx };
|
|
575
|
+
if (cur && (cur.el.clickable || cur.el.enabled || cur.el.focusable)) {
|
|
532
576
|
resolvedAncestor = cur;
|
|
533
577
|
break;
|
|
534
578
|
}
|
|
535
579
|
}
|
|
536
580
|
else if (typeof pid === 'string') {
|
|
537
581
|
// fallback: search elements for matching resourceId or id
|
|
538
|
-
const
|
|
582
|
+
const foundIndex = elements.findIndex((el) => (el.resourceId === pid || el.id === pid));
|
|
583
|
+
const found = foundIndex >= 0 ? elements[foundIndex] : null;
|
|
539
584
|
if (found) {
|
|
540
|
-
cur = found;
|
|
541
|
-
if (cur && (cur.clickable || cur.enabled || cur.focusable)) {
|
|
585
|
+
cur = { el: found, idx: foundIndex };
|
|
586
|
+
if (cur && (cur.el.clickable || cur.el.enabled || cur.el.focusable)) {
|
|
542
587
|
resolvedAncestor = cur;
|
|
543
588
|
break;
|
|
544
589
|
}
|
|
@@ -558,16 +603,19 @@ export class ToolsInteract {
|
|
|
558
603
|
if (!resolvedAncestor && childBounds) {
|
|
559
604
|
const [cl, ct, cr, cb] = childBounds;
|
|
560
605
|
// find candidates that are clickable and contain the child bounds
|
|
561
|
-
const candidates = elements
|
|
606
|
+
const candidates = elements
|
|
607
|
+
.map((el, idx) => ({ el, idx }))
|
|
608
|
+
.filter(({ el }) => el && (el.clickable || el.focusable) && Array.isArray(el.bounds) && el.bounds.length >= 4);
|
|
562
609
|
let bestCandidate = null;
|
|
563
610
|
let bestCandidateArea = Infinity;
|
|
564
611
|
for (const c of candidates) {
|
|
565
|
-
const
|
|
612
|
+
const bounds = c.el.bounds;
|
|
613
|
+
const [pl, pt, pr, pb] = bounds;
|
|
566
614
|
if (pl <= cl && pt <= ct && pr >= cr && pb >= cb) {
|
|
567
615
|
const area = (pr - pl) * (pb - pt);
|
|
568
616
|
if (area < bestCandidateArea) {
|
|
569
617
|
bestCandidateArea = area;
|
|
570
|
-
bestCandidate = c
|
|
618
|
+
bestCandidate = c;
|
|
571
619
|
}
|
|
572
620
|
}
|
|
573
621
|
}
|
|
@@ -575,17 +623,24 @@ export class ToolsInteract {
|
|
|
575
623
|
resolvedAncestor = bestCandidate;
|
|
576
624
|
}
|
|
577
625
|
if (resolvedAncestor) {
|
|
578
|
-
best =
|
|
579
|
-
|
|
580
|
-
|
|
626
|
+
best = {
|
|
627
|
+
el: resolvedAncestor.el,
|
|
628
|
+
idx: resolvedAncestor.idx,
|
|
629
|
+
score: Math.min(1, best.score + 0.02),
|
|
630
|
+
reason: 'clickable_parent_preferred',
|
|
631
|
+
interactable: true
|
|
632
|
+
};
|
|
581
633
|
}
|
|
582
|
-
if (best && !(best.clickable || best.focusable)) {
|
|
583
|
-
const nearbyActionable = ToolsInteract._resolveNearbyActionableControl(elements, { el: best, idx: best.
|
|
634
|
+
if (best && !(best.el.clickable || best.el.focusable)) {
|
|
635
|
+
const nearbyActionable = ToolsInteract._resolveNearbyActionableControl(elements, { el: best.el, idx: best.idx }, screen);
|
|
584
636
|
if (nearbyActionable) {
|
|
585
|
-
best =
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
637
|
+
best = {
|
|
638
|
+
el: nearbyActionable.el,
|
|
639
|
+
idx: nearbyActionable.idx,
|
|
640
|
+
score: Math.min(1, best.score + 0.02),
|
|
641
|
+
reason: nearbyActionable.sliderLike ? 'slider_track_preferred' : 'nearby_actionable_control',
|
|
642
|
+
interactable: true
|
|
643
|
+
};
|
|
589
644
|
}
|
|
590
645
|
}
|
|
591
646
|
}
|
|
@@ -594,29 +649,34 @@ export class ToolsInteract {
|
|
|
594
649
|
}
|
|
595
650
|
if (!best)
|
|
596
651
|
return { found: false, error: 'Element not found' };
|
|
597
|
-
const boundsObj = Array.isArray(best.bounds) ? { left: best.bounds[0], top: best.bounds[1], right: best.bounds[2], bottom: best.bounds[3] } : null;
|
|
652
|
+
const boundsObj = Array.isArray(best.el.bounds) ? { left: best.el.bounds[0], top: best.el.bounds[1], right: best.el.bounds[2], bottom: best.el.bounds[3] } : null;
|
|
598
653
|
const tapCoordinates = boundsObj ? { x: Math.floor((boundsObj.left + boundsObj.right) / 2), y: Math.floor((boundsObj.top + boundsObj.bottom) / 2) } : null;
|
|
654
|
+
const uniqueRanked = bestIterationCandidates.filter((candidate, index, array) => index === array.findIndex((other) => other.idx === candidate.idx && other.el === candidate.el));
|
|
655
|
+
const alternateCandidates = uniqueRanked
|
|
656
|
+
.filter((candidate) => candidate.idx !== best.idx || candidate.el !== best.el)
|
|
657
|
+
.slice(0, 3)
|
|
658
|
+
.map((candidate) => ToolsInteract._summarizeResolutionCandidate(candidate));
|
|
599
659
|
const outEl = {
|
|
600
|
-
text: best.text ?? null,
|
|
601
|
-
resourceId: best.resourceId ?? null,
|
|
602
|
-
contentDesc: best.contentDescription ?? best.contentDesc ?? null,
|
|
603
|
-
class: best.type ?? best.class ?? null,
|
|
660
|
+
text: best.el.text ?? null,
|
|
661
|
+
resourceId: best.el.resourceId ?? null,
|
|
662
|
+
contentDesc: best.el.contentDescription ?? best.el.contentDesc ?? null,
|
|
663
|
+
class: best.el.type ?? best.el.class ?? null,
|
|
604
664
|
bounds: boundsObj,
|
|
605
|
-
clickable: !!best.clickable,
|
|
606
|
-
enabled: !!best.enabled,
|
|
607
|
-
stable_id: best.stable_id ?? null,
|
|
608
|
-
role: best.role ?? null,
|
|
609
|
-
test_tag: best.test_tag ?? null,
|
|
610
|
-
selector: best.selector ?? null,
|
|
611
|
-
semantic: best.semantic ?? null,
|
|
665
|
+
clickable: !!best.el.clickable,
|
|
666
|
+
enabled: !!best.el.enabled,
|
|
667
|
+
stable_id: best.el.stable_id ?? null,
|
|
668
|
+
role: best.el.role ?? null,
|
|
669
|
+
test_tag: best.el.test_tag ?? null,
|
|
670
|
+
selector: best.el.selector ?? null,
|
|
671
|
+
semantic: best.el.semantic ?? null,
|
|
612
672
|
tapCoordinates,
|
|
613
673
|
telemetry: {
|
|
614
|
-
matchedIndex: best
|
|
615
|
-
matchedInteractable: !!best
|
|
616
|
-
sliderLike:
|
|
674
|
+
matchedIndex: best.idx ?? null,
|
|
675
|
+
matchedInteractable: !!best.interactable,
|
|
676
|
+
sliderLike: best.reason === 'slider_track_preferred'
|
|
617
677
|
}
|
|
618
678
|
};
|
|
619
|
-
if (best
|
|
679
|
+
if (best.reason === 'slider_track_preferred') {
|
|
620
680
|
const isVertical = !!boundsObj && (boundsObj.bottom - boundsObj.top) > (boundsObj.right - boundsObj.left);
|
|
621
681
|
const interactionHint = {
|
|
622
682
|
kind: 'slider',
|
|
@@ -625,8 +685,15 @@ export class ToolsInteract {
|
|
|
625
685
|
};
|
|
626
686
|
outEl.interactionHint = interactionHint;
|
|
627
687
|
}
|
|
628
|
-
const scoreVal = Math.min(1, Number(
|
|
629
|
-
|
|
688
|
+
const scoreVal = Math.min(1, Number(best.score.toFixed(3)));
|
|
689
|
+
const resolution = {
|
|
690
|
+
confidence: scoreVal,
|
|
691
|
+
reason: best.reason,
|
|
692
|
+
fallback_available: alternateCandidates.length > 0,
|
|
693
|
+
matched_count: uniqueRanked.length,
|
|
694
|
+
alternates: alternateCandidates
|
|
695
|
+
};
|
|
696
|
+
return { found: true, element: outEl, score: scoreVal, confidence: scoreVal, resolution };
|
|
630
697
|
}
|
|
631
698
|
static async waitForUIHandler({ selector, condition = 'exists', timeout_ms = 60000, poll_interval_ms = 300, match, retry = { max_attempts: 1, backoff_ms: 0 }, platform, deviceId }) {
|
|
632
699
|
const overallStart = Date.now();
|
package/dist/server/common.js
CHANGED
|
@@ -95,13 +95,26 @@ export function inferScrollFailure(message) {
|
|
|
95
95
|
return { failureCode: 'TIMEOUT', retryable: true };
|
|
96
96
|
return { failureCode: 'UNKNOWN', retryable: false };
|
|
97
97
|
}
|
|
98
|
-
|
|
98
|
+
const ACTION_LIFECYCLE_STATE_BY_OUTCOME = {
|
|
99
|
+
success: 'pending_verification',
|
|
100
|
+
failure: 'failed'
|
|
101
|
+
};
|
|
102
|
+
export function determineActionLifecycleState({ success, failure }) {
|
|
103
|
+
if (failure)
|
|
104
|
+
return ACTION_LIFECYCLE_STATE_BY_OUTCOME.failure;
|
|
105
|
+
if (success)
|
|
106
|
+
return ACTION_LIFECYCLE_STATE_BY_OUTCOME.success;
|
|
107
|
+
return ACTION_LIFECYCLE_STATE_BY_OUTCOME.success;
|
|
108
|
+
}
|
|
109
|
+
export function buildActionExecutionResult({ actionType, device, selector, resolved, success, uiFingerprintBefore, uiFingerprintAfter, failure, details, sourceModule }) {
|
|
99
110
|
const timestampMs = Date.now();
|
|
100
111
|
const timestamp = new Date(timestampMs).toISOString();
|
|
101
112
|
return {
|
|
102
113
|
action_id: nextActionId(actionType, timestampMs),
|
|
103
114
|
timestamp,
|
|
104
115
|
action_type: actionType,
|
|
116
|
+
lifecycle_state: determineActionLifecycleState({ success, failure }),
|
|
117
|
+
source_module: sourceModule,
|
|
105
118
|
...(device ? { device } : {}),
|
|
106
119
|
target: {
|
|
107
120
|
selector,
|
|
@@ -11,7 +11,9 @@ Inputs:
|
|
|
11
11
|
|
|
12
12
|
Output Structure:
|
|
13
13
|
- action_id, timestamp (ISO 8601), action_type
|
|
14
|
-
-
|
|
14
|
+
- lifecycle_state: post-dispatch lifecycle state (pending_verification or failed)
|
|
15
|
+
- source_module: runtime source of the action envelope
|
|
16
|
+
- target.selector = { appId }
|
|
15
17
|
- success = true when launch was dispatched successfully
|
|
16
18
|
- failure_code/retryable when launch dispatch fails
|
|
17
19
|
- ui_fingerprint_before/ui_fingerprint_after when available
|
|
@@ -84,7 +86,9 @@ Inputs:
|
|
|
84
86
|
|
|
85
87
|
Output Structure:
|
|
86
88
|
- action_id, timestamp (ISO 8601), action_type
|
|
87
|
-
-
|
|
89
|
+
- lifecycle_state: post-dispatch lifecycle state (pending_verification or failed)
|
|
90
|
+
- source_module: runtime source of the action envelope
|
|
91
|
+
- target.selector = { appId }
|
|
88
92
|
- success = true when the restart command completed
|
|
89
93
|
- failure_code/retryable when restart dispatch fails
|
|
90
94
|
- ui_fingerprint_before/ui_fingerprint_after when available
|
|
@@ -592,7 +596,9 @@ Recommended Usage:
|
|
|
592
596
|
},
|
|
593
597
|
{
|
|
594
598
|
name: 'find_element',
|
|
595
|
-
description:
|
|
599
|
+
description: `Find a UI element by semantic query (text, content-desc, resource-id, class).
|
|
600
|
+
|
|
601
|
+
Returns the best match plus resolution metadata when available, including confidence, selection reason, and fallback alternates.`,
|
|
596
602
|
inputSchema: {
|
|
597
603
|
type: 'object',
|
|
598
604
|
properties: {
|
|
@@ -617,7 +623,9 @@ Inputs:
|
|
|
617
623
|
|
|
618
624
|
Output Structure:
|
|
619
625
|
- action_id, timestamp (ISO 8601), action_type
|
|
620
|
-
-
|
|
626
|
+
- lifecycle_state: post-dispatch lifecycle state (pending_verification or failed)
|
|
627
|
+
- source_module: runtime source of the action envelope
|
|
628
|
+
- target.selector = { x, y }
|
|
621
629
|
- success = true when the tap was dispatched
|
|
622
630
|
- failure_code/retryable when dispatch fails
|
|
623
631
|
- ui_fingerprint_before/ui_fingerprint_after when available
|
|
@@ -673,6 +681,8 @@ Output Structure:
|
|
|
673
681
|
- action_id: unique timestamp-based action identifier
|
|
674
682
|
- timestamp: ISO 8601 timestamp for the action attempt
|
|
675
683
|
- action_type: "tap_element"
|
|
684
|
+
- lifecycle_state: post-dispatch lifecycle state (pending_verification or failed)
|
|
685
|
+
- source_module: runtime source of the action envelope
|
|
676
686
|
- target.selector: original target handle ({ elementId })
|
|
677
687
|
- target.resolved: minimal resolved element info used for the tap
|
|
678
688
|
- success: true when the tap was dispatched
|
|
@@ -725,6 +735,8 @@ Inputs:
|
|
|
725
735
|
|
|
726
736
|
Output Structure:
|
|
727
737
|
- action_id, timestamp (ISO 8601), action_type
|
|
738
|
+
- lifecycle_state: post-dispatch lifecycle state (pending_verification or failed)
|
|
739
|
+
- source_module: runtime source of the action envelope
|
|
728
740
|
- target.selector = { x1, y1, x2, y2, duration }
|
|
729
741
|
- success = true when the swipe was dispatched
|
|
730
742
|
- failure_code/retryable when dispatch fails
|
|
@@ -777,6 +789,8 @@ Inputs:
|
|
|
777
789
|
|
|
778
790
|
Output Structure:
|
|
779
791
|
- action_id, timestamp (ISO 8601), action_type
|
|
792
|
+
- lifecycle_state: post-dispatch lifecycle state (pending_verification or failed)
|
|
793
|
+
- source_module: runtime source of the action envelope
|
|
780
794
|
- target.selector = original selector
|
|
781
795
|
- target.resolved = minimal resolved element info when found
|
|
782
796
|
- success = true when scrolling produced a visible target element
|
|
@@ -831,6 +845,8 @@ Inputs:
|
|
|
831
845
|
|
|
832
846
|
Output Structure:
|
|
833
847
|
- action_id, timestamp (ISO 8601), action_type
|
|
848
|
+
- lifecycle_state: post-dispatch lifecycle state (pending_verification or failed)
|
|
849
|
+
- source_module: runtime source of the action envelope
|
|
834
850
|
- target.selector = { text }
|
|
835
851
|
- success = true when text input was dispatched
|
|
836
852
|
- failure_code/retryable when dispatch fails
|
|
@@ -880,6 +896,8 @@ Inputs:
|
|
|
880
896
|
|
|
881
897
|
Output Structure:
|
|
882
898
|
- action_id, timestamp (ISO 8601), action_type
|
|
899
|
+
- lifecycle_state: post-dispatch lifecycle state (pending_verification or failed)
|
|
900
|
+
- source_module: runtime source of the action envelope
|
|
883
901
|
- target.selector = { key: "back" }
|
|
884
902
|
- success = true when the back action was dispatched
|
|
885
903
|
- failure_code/retryable when dispatch fails
|
|
@@ -15,6 +15,7 @@ async function handleStartApp(args) {
|
|
|
15
15
|
const uiFingerprintAfter = await captureActionFingerprint(platform, deviceId);
|
|
16
16
|
return wrapResponse(buildActionExecutionResult({
|
|
17
17
|
actionType: 'start_app',
|
|
18
|
+
sourceModule: 'server',
|
|
18
19
|
device: res.device,
|
|
19
20
|
selector: { appId },
|
|
20
21
|
success: !!res.appStarted,
|
|
@@ -48,6 +49,7 @@ async function handleRestartApp(args) {
|
|
|
48
49
|
const uiFingerprintAfter = await captureActionFingerprint(platform, deviceId);
|
|
49
50
|
return wrapResponse(buildActionExecutionResult({
|
|
50
51
|
actionType: 'restart_app',
|
|
52
|
+
sourceModule: 'server',
|
|
51
53
|
device: res.device,
|
|
52
54
|
selector: { appId },
|
|
53
55
|
success: !!res.appRestarted,
|
|
@@ -265,6 +267,7 @@ async function handleTap(args) {
|
|
|
265
267
|
const uiFingerprintAfter = await captureActionFingerprint(platform, deviceId);
|
|
266
268
|
return wrapResponse(buildActionExecutionResult({
|
|
267
269
|
actionType: 'tap',
|
|
270
|
+
sourceModule: 'server',
|
|
268
271
|
selector: { x, y },
|
|
269
272
|
success: !!res.success,
|
|
270
273
|
uiFingerprintBefore,
|
|
@@ -292,6 +295,7 @@ async function handleSwipe(args) {
|
|
|
292
295
|
const uiFingerprintAfter = await captureActionFingerprint(platform, deviceId);
|
|
293
296
|
return wrapResponse(buildActionExecutionResult({
|
|
294
297
|
actionType: 'swipe',
|
|
298
|
+
sourceModule: 'server',
|
|
295
299
|
selector: { x1, y1, x2, y2, duration },
|
|
296
300
|
success: !!res.success,
|
|
297
301
|
uiFingerprintBefore,
|
|
@@ -312,6 +316,7 @@ async function handleScrollToElement(args) {
|
|
|
312
316
|
const uiFingerprintAfter = await captureActionFingerprint(platform, deviceId);
|
|
313
317
|
return wrapResponse(buildActionExecutionResult({
|
|
314
318
|
actionType: 'scroll_to_element',
|
|
319
|
+
sourceModule: 'server',
|
|
315
320
|
selector: selector ?? null,
|
|
316
321
|
resolved: res?.success && res?.element ? {
|
|
317
322
|
elementId: null,
|
|
@@ -337,6 +342,7 @@ async function handleTypeText(args) {
|
|
|
337
342
|
const uiFingerprintAfter = await captureActionFingerprint('android', deviceId);
|
|
338
343
|
return wrapResponse(buildActionExecutionResult({
|
|
339
344
|
actionType: 'type_text',
|
|
345
|
+
sourceModule: 'server',
|
|
340
346
|
selector: { text },
|
|
341
347
|
success: !!res.success,
|
|
342
348
|
uiFingerprintBefore,
|
|
@@ -352,6 +358,7 @@ async function handlePressBack(args) {
|
|
|
352
358
|
const uiFingerprintAfter = await captureActionFingerprint('android', deviceId);
|
|
353
359
|
return wrapResponse(buildActionExecutionResult({
|
|
354
360
|
actionType: 'press_back',
|
|
361
|
+
sourceModule: 'server',
|
|
355
362
|
selector: { key: 'back' },
|
|
356
363
|
success: !!res.success,
|
|
357
364
|
uiFingerprintBefore,
|
package/dist/server-core.js
CHANGED
|
@@ -6,7 +6,7 @@ import { handleToolCall } from './server/tool-handlers.js';
|
|
|
6
6
|
export { wrapResponse, toolDefinitions, handleToolCall };
|
|
7
7
|
export const serverInfo = {
|
|
8
8
|
name: 'mobile-debug-mcp',
|
|
9
|
-
version: '0.26.
|
|
9
|
+
version: '0.26.3'
|
|
10
10
|
};
|
|
11
11
|
export function createServer() {
|
|
12
12
|
const server = new Server(serverInfo, {
|
package/docs/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,12 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to the **Mobile Debug MCP** project will be documented in this file.
|
|
4
4
|
|
|
5
|
+
## [0.26.3]
|
|
6
|
+
- updates the `find_element` tool to return detailed resolution metadata, including confidence scores,
|
|
7
|
+
|
|
8
|
+
## [0.26.2]
|
|
9
|
+
- unified action execution and verification model
|
|
10
|
+
|
|
5
11
|
## [0.26.1]
|
|
6
12
|
- Fixed overuse of `get_network_activity`
|
|
7
13
|
|