mobile-debug-mcp 0.26.1 → 0.26.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +3 -0
- package/dist/interact/index.js +169 -102
- package/dist/server/common.js +14 -1
- package/dist/server/tool-definitions.js +22 -4
- package/dist/server/tool-handlers.js +7 -0
- package/dist/server-core.js +1 -1
- package/docs/CHANGELOG.md +6 -0
- package/docs/ROADMAP.md +242 -76
- package/docs/rfcs/005-unified-action-execution-and-verification-model.md +216 -0
- package/docs/rfcs/006-runtime-action-instrumentation-and-binding-layer.md +230 -0
- package/docs/rfcs/007-actionability-resolution-and-executable-target-selection.md +277 -0
- package/docs/specs/mcp-tooling-spec-v1.md +4 -0
- package/docs/tools/interact.md +13 -1
- package/package.json +1 -1
- package/src/interact/index.ts +203 -107
- package/src/server/common.ts +22 -1
- package/src/server/tool-definitions.ts +22 -4
- package/src/server/tool-handlers.ts +7 -0
- package/src/server-core.ts +1 -1
- package/src/types.ts +75 -0
- package/test/unit/observe/find_element.test.ts +5 -0
- package/test/unit/server/response_shapes.test.ts +8 -0
package/src/interact/index.ts
CHANGED
|
@@ -6,10 +6,11 @@ export { AndroidInteract, iOSInteract };
|
|
|
6
6
|
import { resolveTargetDevice } from '../utils/resolve-device.js'
|
|
7
7
|
import { ToolsObserve } from '../observe/index.js'
|
|
8
8
|
import { computeSnapshotSignature } from '../observe/snapshot-metadata.js'
|
|
9
|
-
import {
|
|
9
|
+
import { buildActionExecutionResult } from '../server/common.js'
|
|
10
10
|
import type {
|
|
11
11
|
ActionFailureCode,
|
|
12
12
|
ActionTargetResolved,
|
|
13
|
+
FindElementResponse,
|
|
13
14
|
ExpectElementVisibleResponse,
|
|
14
15
|
ExpectStateResponse,
|
|
15
16
|
ExpectScreenResponse,
|
|
@@ -68,6 +69,32 @@ interface UiChangeSignatureSet {
|
|
|
68
69
|
state: string | null
|
|
69
70
|
}
|
|
70
71
|
|
|
72
|
+
interface RankedResolutionCandidate {
|
|
73
|
+
el: UiElement
|
|
74
|
+
idx: number
|
|
75
|
+
score: number
|
|
76
|
+
reason: string
|
|
77
|
+
interactable: boolean
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
interface FindElementResolutionSummary {
|
|
81
|
+
confidence: number
|
|
82
|
+
reason: string
|
|
83
|
+
fallback_available: boolean
|
|
84
|
+
matched_count: number
|
|
85
|
+
alternates: Array<{
|
|
86
|
+
text: string | null
|
|
87
|
+
resource_id: string | null
|
|
88
|
+
accessibility_id: string | null
|
|
89
|
+
class: string | null
|
|
90
|
+
bounds: { left: number; top: number; right: number; bottom: number } | null
|
|
91
|
+
clickable: boolean
|
|
92
|
+
enabled: boolean
|
|
93
|
+
score: number
|
|
94
|
+
reason: string
|
|
95
|
+
}>
|
|
96
|
+
}
|
|
97
|
+
|
|
71
98
|
|
|
72
99
|
export class ToolsInteract {
|
|
73
100
|
private static readonly _maxResolvedUiElements = 256
|
|
@@ -290,28 +317,43 @@ export class ToolsInteract {
|
|
|
290
317
|
}
|
|
291
318
|
}
|
|
292
319
|
|
|
320
|
+
private static _summarizeResolutionCandidate(candidate: RankedResolutionCandidate): FindElementResolutionSummary['alternates'][number] {
|
|
321
|
+
const bounds = ToolsInteract._normalizeBounds(candidate.el.bounds)
|
|
322
|
+
return {
|
|
323
|
+
text: candidate.el.text ?? null,
|
|
324
|
+
resource_id: candidate.el.resourceId ?? candidate.el.resourceID ?? candidate.el.id ?? null,
|
|
325
|
+
accessibility_id: candidate.el.contentDescription ?? candidate.el.contentDesc ?? candidate.el.accessibilityLabel ?? candidate.el.label ?? null,
|
|
326
|
+
class: candidate.el.type ?? candidate.el.class ?? null,
|
|
327
|
+
bounds: bounds
|
|
328
|
+
? { left: bounds[0], top: bounds[1], right: bounds[2], bottom: bounds[3] }
|
|
329
|
+
: null,
|
|
330
|
+
clickable: !!candidate.el.clickable,
|
|
331
|
+
enabled: !!candidate.el.enabled,
|
|
332
|
+
score: candidate.score,
|
|
333
|
+
reason: candidate.reason
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
|
|
293
337
|
private static _actionFailure(
|
|
294
|
-
actionId: string,
|
|
295
|
-
timestamp: string,
|
|
296
338
|
actionType: string,
|
|
297
339
|
selector: Record<string, unknown> | null,
|
|
298
340
|
resolved: ActionTargetResolved | null,
|
|
299
341
|
failureCode: ActionFailureCode,
|
|
300
342
|
retryable: boolean,
|
|
301
343
|
uiFingerprintBefore: string | null,
|
|
302
|
-
uiFingerprintAfter?: string | null
|
|
344
|
+
uiFingerprintAfter?: string | null,
|
|
345
|
+
sourceModule: 'server' | 'interact' = 'interact'
|
|
303
346
|
): TapElementResponse {
|
|
304
|
-
return {
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
target: { selector, resolved },
|
|
347
|
+
return buildActionExecutionResult({
|
|
348
|
+
actionType,
|
|
349
|
+
selector,
|
|
350
|
+
resolved,
|
|
309
351
|
success: false,
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
}
|
|
352
|
+
uiFingerprintBefore,
|
|
353
|
+
uiFingerprintAfter: uiFingerprintAfter ?? null,
|
|
354
|
+
failure: { failureCode, retryable },
|
|
355
|
+
sourceModule
|
|
356
|
+
})
|
|
315
357
|
}
|
|
316
358
|
|
|
317
359
|
static _resetResolvedUiElementsForTests() {
|
|
@@ -472,14 +514,11 @@ export class ToolsInteract {
|
|
|
472
514
|
}
|
|
473
515
|
|
|
474
516
|
static async tapElementHandler({ elementId }: { elementId: string }): Promise<TapElementResponse> {
|
|
475
|
-
const timestampMs = Date.now()
|
|
476
|
-
const timestamp = new Date(timestampMs).toISOString()
|
|
477
517
|
const actionType = 'tap_element'
|
|
478
|
-
const actionId = nextActionId(actionType, timestampMs)
|
|
479
518
|
const selector = { elementId }
|
|
480
519
|
const resolved = ToolsInteract._resolvedUiElements.get(elementId)
|
|
481
520
|
if (!resolved) {
|
|
482
|
-
return ToolsInteract._actionFailure(
|
|
521
|
+
return ToolsInteract._actionFailure(actionType, selector, null, 'STALE_REFERENCE', true, null)
|
|
483
522
|
}
|
|
484
523
|
|
|
485
524
|
const fingerprintBefore = await ToolsInteract._captureFingerprint(resolved.platform, resolved.deviceId)
|
|
@@ -491,22 +530,22 @@ export class ToolsInteract {
|
|
|
491
530
|
const currentMatch = ToolsInteract._findCurrentResolvedElement(elements, treePlatform, treeDeviceId, resolved)
|
|
492
531
|
|
|
493
532
|
if (!currentMatch) {
|
|
494
|
-
return ToolsInteract._actionFailure(
|
|
533
|
+
return ToolsInteract._actionFailure(actionType, selector, null, 'STALE_REFERENCE', true, fingerprintBefore)
|
|
495
534
|
}
|
|
496
535
|
|
|
497
536
|
const resolvedTarget = ToolsInteract._resolvedTargetFromElement(resolved.elementId, currentMatch.el, currentMatch.index)
|
|
498
537
|
|
|
499
538
|
if (!ToolsInteract._isVisibleElement(currentMatch.el)) {
|
|
500
|
-
return ToolsInteract._actionFailure(
|
|
539
|
+
return ToolsInteract._actionFailure(actionType, selector, resolvedTarget, 'ELEMENT_NOT_INTERACTABLE', true, fingerprintBefore)
|
|
501
540
|
}
|
|
502
541
|
|
|
503
542
|
if (currentMatch.el.enabled === false) {
|
|
504
|
-
return ToolsInteract._actionFailure(
|
|
543
|
+
return ToolsInteract._actionFailure(actionType, selector, resolvedTarget, 'ELEMENT_NOT_INTERACTABLE', true, fingerprintBefore)
|
|
505
544
|
}
|
|
506
545
|
|
|
507
546
|
const bounds = ToolsInteract._normalizeBounds(currentMatch.el.bounds) ?? resolved.bounds
|
|
508
547
|
if (!bounds || bounds[2] <= bounds[0] || bounds[3] <= bounds[1]) {
|
|
509
|
-
return ToolsInteract._actionFailure(
|
|
548
|
+
return ToolsInteract._actionFailure(actionType, selector, resolvedTarget, 'ELEMENT_NOT_INTERACTABLE', true, fingerprintBefore)
|
|
510
549
|
}
|
|
511
550
|
|
|
512
551
|
const x = Math.floor((bounds[0] + bounds[2]) / 2)
|
|
@@ -515,23 +554,20 @@ export class ToolsInteract {
|
|
|
515
554
|
|
|
516
555
|
if (!tapResult.success) {
|
|
517
556
|
const fingerprintAfterFailure = await ToolsInteract._captureFingerprint(resolved.platform, resolved.deviceId)
|
|
518
|
-
return ToolsInteract._actionFailure(
|
|
557
|
+
return ToolsInteract._actionFailure(actionType, selector, resolvedTarget, 'UNKNOWN', false, fingerprintBefore, fingerprintAfterFailure)
|
|
519
558
|
}
|
|
520
559
|
|
|
521
560
|
const fingerprintAfter = await ToolsInteract._captureFingerprint(resolved.platform, resolved.deviceId)
|
|
522
|
-
return {
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
target: {
|
|
528
|
-
selector,
|
|
529
|
-
resolved: resolvedTarget
|
|
530
|
-
},
|
|
561
|
+
return buildActionExecutionResult({
|
|
562
|
+
actionType,
|
|
563
|
+
device: tree?.device,
|
|
564
|
+
selector,
|
|
565
|
+
resolved: resolvedTarget,
|
|
531
566
|
success: true,
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
567
|
+
uiFingerprintBefore: fingerprintBefore,
|
|
568
|
+
uiFingerprintAfter: fingerprintAfter,
|
|
569
|
+
sourceModule: 'interact'
|
|
570
|
+
})
|
|
535
571
|
}
|
|
536
572
|
|
|
537
573
|
static async swipeHandler({ platform = 'android', x1, y1, x2, y2, duration, deviceId }: { platform?: 'android' | 'ios', x1: number, y1: number, x2: number, y2: number, duration: number, deviceId?: string }) {
|
|
@@ -554,7 +590,7 @@ export class ToolsInteract {
|
|
|
554
590
|
return await interact.scrollToElement(selector, direction, maxScrolls, scrollAmount, resolved.id)
|
|
555
591
|
}
|
|
556
592
|
|
|
557
|
-
static async findElementHandler({ query, exact = false, timeoutMs = 3000, platform, deviceId }: { query: string, exact?: boolean, timeoutMs?: number, platform?: 'android' | 'ios', deviceId?: string }) {
|
|
593
|
+
static async findElementHandler({ query, exact = false, timeoutMs = 3000, platform, deviceId }: { query: string, exact?: boolean, timeoutMs?: number, platform?: 'android' | 'ios', deviceId?: string }): Promise<FindElementResponse> {
|
|
558
594
|
// Try to use observe layer to fetch the current UI tree and perform a fast semantic search
|
|
559
595
|
const start = Date.now()
|
|
560
596
|
const deadline = start + timeoutMs
|
|
@@ -563,16 +599,17 @@ export class ToolsInteract {
|
|
|
563
599
|
const q = normalize(query)
|
|
564
600
|
if (!q) return { found: false, error: 'Empty query' }
|
|
565
601
|
|
|
566
|
-
let best:
|
|
567
|
-
let
|
|
568
|
-
let
|
|
602
|
+
let best: RankedResolutionCandidate | null = null
|
|
603
|
+
let bestTree: any = null
|
|
604
|
+
let bestIterationCandidates: RankedResolutionCandidate[] = []
|
|
605
|
+
let shouldStop = false
|
|
569
606
|
|
|
570
|
-
const scoreElement = (el: UiElement | null) => {
|
|
571
|
-
if (!el || !el.visible) return
|
|
607
|
+
const scoreElement = (el: UiElement | null, idx: number): RankedResolutionCandidate | null => {
|
|
608
|
+
if (!el || !el.visible) return null
|
|
572
609
|
const bounds = el.bounds || [0,0,0,0]
|
|
573
|
-
if (!Array.isArray(bounds) || bounds.length < 4) return
|
|
610
|
+
if (!Array.isArray(bounds) || bounds.length < 4) return null
|
|
574
611
|
const [l,t,r,b] = bounds
|
|
575
|
-
if (r <= l || b <= t) return
|
|
612
|
+
if (r <= l || b <= t) return null
|
|
576
613
|
// Do not early-return on non-interactable elements — score them so we can locate their clickable ancestor later
|
|
577
614
|
const interactable = !!(el.clickable || el.enabled || el.focusable)
|
|
578
615
|
|
|
@@ -582,44 +619,80 @@ export class ToolsInteract {
|
|
|
582
619
|
const className = normalize(el.type ?? el.class ?? '')
|
|
583
620
|
|
|
584
621
|
let score = 0
|
|
622
|
+
let reason = 'best_scoring_candidate'
|
|
585
623
|
if (exact) {
|
|
586
|
-
if (text && text === q)
|
|
587
|
-
|
|
624
|
+
if (text && text === q) {
|
|
625
|
+
score = 1.0
|
|
626
|
+
reason = 'exact_text_match'
|
|
627
|
+
} else if (content && content === q) {
|
|
628
|
+
score = 0.95
|
|
629
|
+
reason = 'exact_content_desc_match'
|
|
630
|
+
} else if (resourceId && resourceId === q) {
|
|
631
|
+
score = 0.92
|
|
632
|
+
reason = 'exact_resource_id_match'
|
|
633
|
+
} else if (className && className === q) {
|
|
634
|
+
score = 0.3
|
|
635
|
+
reason = 'exact_class_match'
|
|
636
|
+
}
|
|
588
637
|
} else {
|
|
589
|
-
if (text && text === q)
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
else if (content && content
|
|
593
|
-
|
|
594
|
-
|
|
638
|
+
if (text && text === q) {
|
|
639
|
+
score = 1.0
|
|
640
|
+
reason = 'exact_text_match'
|
|
641
|
+
} else if (content && content === q) {
|
|
642
|
+
score = 0.95
|
|
643
|
+
reason = 'exact_content_desc_match'
|
|
644
|
+
} else if (resourceId && resourceId === q) {
|
|
645
|
+
score = 0.92
|
|
646
|
+
reason = 'exact_resource_id_match'
|
|
647
|
+
} else if (text && text.includes(q)) {
|
|
648
|
+
score = 0.6
|
|
649
|
+
reason = 'partial_text_match'
|
|
650
|
+
} else if (content && content.includes(q)) {
|
|
651
|
+
score = 0.55
|
|
652
|
+
reason = 'partial_content_desc_match'
|
|
653
|
+
} else if (resourceId && resourceId.includes(q)) {
|
|
654
|
+
score = 0.7
|
|
655
|
+
reason = 'partial_resource_id_match'
|
|
656
|
+
} else if (className && className.includes(q)) {
|
|
657
|
+
score = 0.3
|
|
658
|
+
reason = 'partial_class_match'
|
|
659
|
+
}
|
|
595
660
|
}
|
|
596
661
|
if (score > 0 && interactable) score += 0.05
|
|
597
|
-
return
|
|
662
|
+
if (score <= 0) return null
|
|
663
|
+
return { el, idx, score, reason, interactable }
|
|
598
664
|
}
|
|
599
665
|
|
|
600
666
|
while (Date.now() <= deadline) {
|
|
601
667
|
try {
|
|
602
|
-
|
|
603
|
-
lastTree = tree
|
|
668
|
+
const tree = await ToolsObserve.getUITreeHandler({ platform, deviceId })
|
|
604
669
|
if (tree && Array.isArray((tree as any).elements)) {
|
|
605
670
|
const elements = ((tree as any).elements as UiElement[])
|
|
671
|
+
const iterationCandidates: RankedResolutionCandidate[] = []
|
|
672
|
+
let iterationImprovedBest = false
|
|
606
673
|
for (let i = 0; i < elements.length; i++) {
|
|
607
674
|
const el = elements[i]
|
|
608
675
|
try {
|
|
609
|
-
const
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
best =
|
|
614
|
-
|
|
676
|
+
const candidate = scoreElement(el, i)
|
|
677
|
+
if (!candidate) continue
|
|
678
|
+
iterationCandidates.push(candidate)
|
|
679
|
+
if (!best || candidate.score > best.score) {
|
|
680
|
+
best = candidate
|
|
681
|
+
bestTree = tree
|
|
682
|
+
iterationImprovedBest = true
|
|
683
|
+
if (best.score >= 0.95) {
|
|
684
|
+
shouldStop = true
|
|
685
|
+
break
|
|
686
|
+
}
|
|
615
687
|
}
|
|
616
|
-
if (bestScore >= 0.95) break
|
|
617
688
|
} catch (e) { console.error('Error scoring element:', e) }
|
|
618
689
|
}
|
|
619
|
-
if (
|
|
690
|
+
if (iterationImprovedBest) {
|
|
691
|
+
bestIterationCandidates = iterationCandidates.slice()
|
|
692
|
+
}
|
|
620
693
|
}
|
|
621
694
|
} catch (e) { console.error('Error fetching UI tree:', e) }
|
|
622
|
-
if (Date.now() > deadline) break
|
|
695
|
+
if (shouldStop || Date.now() > deadline) break
|
|
623
696
|
await new Promise(r => setTimeout(r, 100))
|
|
624
697
|
}
|
|
625
698
|
|
|
@@ -627,31 +700,32 @@ export class ToolsInteract {
|
|
|
627
700
|
|
|
628
701
|
// If the best match is not interactable, try to resolve an actionable ancestor.
|
|
629
702
|
try {
|
|
630
|
-
const elements = (
|
|
631
|
-
const screen =
|
|
632
|
-
let chosen = best as
|
|
633
|
-
const childBounds = Array.isArray(chosen?.bounds) ? chosen.bounds : null
|
|
703
|
+
const elements = (bestTree && Array.isArray(bestTree.elements)) ? (bestTree.elements as UiElement[]) : []
|
|
704
|
+
const screen = bestTree?.resolution && typeof bestTree.resolution === 'object' ? bestTree.resolution as UiResolution : null
|
|
705
|
+
let chosen = best as { el: UiElement, idx: number }
|
|
706
|
+
const childBounds = Array.isArray(chosen?.el?.bounds) ? chosen.el.bounds : null
|
|
634
707
|
|
|
635
708
|
// Strategy 1: if parentId references an index, climb that chain
|
|
636
|
-
let resolvedAncestor:
|
|
637
|
-
if (childBounds && (chosen.parentId !== undefined && chosen.parentId !== null)) {
|
|
709
|
+
let resolvedAncestor: { el: UiElement, idx: number } | null = null
|
|
710
|
+
if (childBounds && (chosen.el.parentId !== undefined && chosen.el.parentId !== null)) {
|
|
638
711
|
let cur = chosen
|
|
639
712
|
let safety = 0
|
|
640
|
-
while (cur && safety < 20 && !(cur.clickable || cur.focusable) && (cur.parentId !== undefined && cur.parentId !== null)) {
|
|
641
|
-
let pid = cur.parentId
|
|
713
|
+
while (cur && safety < 20 && !(cur.el.clickable || cur.el.focusable) && (cur.el.parentId !== undefined && cur.el.parentId !== null)) {
|
|
714
|
+
let pid = cur.el.parentId
|
|
642
715
|
let idx: number | null = null
|
|
643
716
|
if (typeof pid === 'number') idx = pid
|
|
644
717
|
else if (typeof pid === 'string' && /^\d+$/.test(pid)) idx = Number(pid)
|
|
645
718
|
// If parentId is not an index, try to find by matching resourceId or id field
|
|
646
719
|
if (idx !== null && elements[idx]) {
|
|
647
|
-
cur = elements[idx]
|
|
648
|
-
if (cur && (cur.clickable || cur.enabled || cur.focusable)) { resolvedAncestor = cur; break }
|
|
720
|
+
cur = { el: elements[idx], idx }
|
|
721
|
+
if (cur && (cur.el.clickable || cur.el.enabled || cur.el.focusable)) { resolvedAncestor = cur; break }
|
|
649
722
|
} else if (typeof pid === 'string') {
|
|
650
723
|
// fallback: search elements for matching resourceId or id
|
|
651
|
-
const
|
|
724
|
+
const foundIndex = elements.findIndex((el: UiElement)=> (el.resourceId === pid || el.id === pid))
|
|
725
|
+
const found = foundIndex >= 0 ? elements[foundIndex] : null
|
|
652
726
|
if (found) {
|
|
653
|
-
cur = found
|
|
654
|
-
if (cur && (cur.clickable || cur.enabled || cur.focusable)) { resolvedAncestor = cur; break }
|
|
727
|
+
cur = { el: found, idx: foundIndex }
|
|
728
|
+
if (cur && (cur.el.clickable || cur.el.enabled || cur.el.focusable)) { resolvedAncestor = cur; break }
|
|
655
729
|
// otherwise continue climbing if this found element has its own parentId
|
|
656
730
|
} else {
|
|
657
731
|
break
|
|
@@ -667,62 +741,77 @@ export class ToolsInteract {
|
|
|
667
741
|
if (!resolvedAncestor && childBounds) {
|
|
668
742
|
const [cl,ct,cr,cb] = childBounds
|
|
669
743
|
// find candidates that are clickable and contain the child bounds
|
|
670
|
-
const candidates = elements
|
|
671
|
-
|
|
744
|
+
const candidates = elements
|
|
745
|
+
.map((el: UiElement, idx: number) => ({ el, idx }))
|
|
746
|
+
.filter(({ el }) => el && (el.clickable || el.focusable) && Array.isArray(el.bounds) && el.bounds!.length >= 4)
|
|
747
|
+
let bestCandidate: { el: UiElement, idx: number } | null = null
|
|
672
748
|
let bestCandidateArea = Infinity
|
|
673
749
|
for (const c of candidates) {
|
|
674
|
-
const
|
|
750
|
+
const bounds = c.el.bounds as number[]
|
|
751
|
+
const [pl,pt,pr,pb] = bounds
|
|
675
752
|
if (pl <= cl && pt <= ct && pr >= cr && pb >= cb) {
|
|
676
753
|
const area = (pr-pl) * (pb-pt)
|
|
677
|
-
if (area < bestCandidateArea) { bestCandidateArea = area; bestCandidate = c
|
|
754
|
+
if (area < bestCandidateArea) { bestCandidateArea = area; bestCandidate = c }
|
|
678
755
|
}
|
|
679
756
|
}
|
|
680
757
|
if (bestCandidate) resolvedAncestor = bestCandidate
|
|
681
758
|
}
|
|
682
759
|
|
|
683
760
|
if (resolvedAncestor) {
|
|
684
|
-
best =
|
|
685
|
-
|
|
686
|
-
|
|
761
|
+
best = {
|
|
762
|
+
el: resolvedAncestor.el,
|
|
763
|
+
idx: resolvedAncestor.idx,
|
|
764
|
+
score: Math.min(1, best.score + 0.02),
|
|
765
|
+
reason: 'clickable_parent_preferred',
|
|
766
|
+
interactable: true
|
|
767
|
+
}
|
|
687
768
|
}
|
|
688
769
|
|
|
689
|
-
if (best && !(best.clickable || best.focusable)) {
|
|
690
|
-
const nearbyActionable = ToolsInteract._resolveNearbyActionableControl(elements, { el: best, idx: best.
|
|
770
|
+
if (best && !(best.el.clickable || best.el.focusable)) {
|
|
771
|
+
const nearbyActionable = ToolsInteract._resolveNearbyActionableControl(elements, { el: best.el, idx: best.idx }, screen)
|
|
691
772
|
if (nearbyActionable) {
|
|
692
|
-
best =
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
773
|
+
best = {
|
|
774
|
+
el: nearbyActionable.el,
|
|
775
|
+
idx: nearbyActionable.idx,
|
|
776
|
+
score: Math.min(1, best.score + 0.02),
|
|
777
|
+
reason: nearbyActionable.sliderLike ? 'slider_track_preferred' : 'nearby_actionable_control',
|
|
778
|
+
interactable: true
|
|
779
|
+
}
|
|
696
780
|
}
|
|
697
781
|
}
|
|
698
782
|
} catch (e) { console.error('Error resolving ancestor:', e) }
|
|
699
783
|
|
|
700
784
|
if (!best) return { found: false, error: 'Element not found' }
|
|
701
785
|
|
|
702
|
-
const boundsObj = Array.isArray(best.bounds) ? { left: best.bounds[0], top: best.bounds[1], right: best.bounds[2], bottom: best.bounds[3] } : null
|
|
786
|
+
const boundsObj = Array.isArray(best.el.bounds) ? { left: best.el.bounds[0], top: best.el.bounds[1], right: best.el.bounds[2], bottom: best.el.bounds[3] } : null
|
|
703
787
|
const tapCoordinates = boundsObj ? { x: Math.floor((boundsObj.left + boundsObj.right) / 2), y: Math.floor((boundsObj.top + boundsObj.bottom) / 2) } : null
|
|
788
|
+
const uniqueRanked = bestIterationCandidates.filter((candidate, index, array) => index === array.findIndex((other) => other.idx === candidate.idx && other.el === candidate.el))
|
|
789
|
+
const alternateCandidates = uniqueRanked
|
|
790
|
+
.filter((candidate) => candidate.idx !== best.idx || candidate.el !== best.el)
|
|
791
|
+
.slice(0, 3)
|
|
792
|
+
.map((candidate) => ToolsInteract._summarizeResolutionCandidate(candidate))
|
|
704
793
|
|
|
705
794
|
const outEl = {
|
|
706
|
-
text: best.text ?? null,
|
|
707
|
-
resourceId: best.resourceId ?? null,
|
|
708
|
-
contentDesc: best.contentDescription ?? best.contentDesc ?? null,
|
|
709
|
-
class: best.type ?? best.class ?? null,
|
|
795
|
+
text: best.el.text ?? null,
|
|
796
|
+
resourceId: best.el.resourceId ?? null,
|
|
797
|
+
contentDesc: best.el.contentDescription ?? best.el.contentDesc ?? null,
|
|
798
|
+
class: best.el.type ?? best.el.class ?? null,
|
|
710
799
|
bounds: boundsObj,
|
|
711
|
-
clickable: !!best.clickable,
|
|
712
|
-
enabled: !!best.enabled,
|
|
713
|
-
stable_id: best.stable_id ?? null,
|
|
714
|
-
role: best.role ?? null,
|
|
715
|
-
test_tag: best.test_tag ?? null,
|
|
716
|
-
selector: best.selector ?? null,
|
|
717
|
-
semantic: best.semantic ?? null,
|
|
800
|
+
clickable: !!best.el.clickable,
|
|
801
|
+
enabled: !!best.el.enabled,
|
|
802
|
+
stable_id: best.el.stable_id ?? null,
|
|
803
|
+
role: best.el.role ?? null,
|
|
804
|
+
test_tag: best.el.test_tag ?? null,
|
|
805
|
+
selector: best.el.selector ?? null,
|
|
806
|
+
semantic: best.el.semantic ?? null,
|
|
718
807
|
tapCoordinates,
|
|
719
808
|
telemetry: {
|
|
720
|
-
matchedIndex: best
|
|
721
|
-
matchedInteractable: !!best
|
|
722
|
-
sliderLike:
|
|
809
|
+
matchedIndex: best.idx ?? null,
|
|
810
|
+
matchedInteractable: !!best.interactable,
|
|
811
|
+
sliderLike: best.reason === 'slider_track_preferred'
|
|
723
812
|
}
|
|
724
813
|
}
|
|
725
|
-
if (best
|
|
814
|
+
if (best.reason === 'slider_track_preferred') {
|
|
726
815
|
const isVertical = !!boundsObj && (boundsObj.bottom - boundsObj.top) > (boundsObj.right - boundsObj.left)
|
|
727
816
|
const interactionHint = {
|
|
728
817
|
kind: 'slider',
|
|
@@ -731,8 +820,15 @@ export class ToolsInteract {
|
|
|
731
820
|
}
|
|
732
821
|
;(outEl as any).interactionHint = interactionHint
|
|
733
822
|
}
|
|
734
|
-
const scoreVal = Math.min(1, Number(
|
|
735
|
-
|
|
823
|
+
const scoreVal = Math.min(1, Number(best.score.toFixed(3)))
|
|
824
|
+
const resolution: FindElementResolutionSummary = {
|
|
825
|
+
confidence: scoreVal,
|
|
826
|
+
reason: best.reason,
|
|
827
|
+
fallback_available: alternateCandidates.length > 0,
|
|
828
|
+
matched_count: uniqueRanked.length,
|
|
829
|
+
alternates: alternateCandidates
|
|
830
|
+
}
|
|
831
|
+
return { found: true, element: outEl, score: scoreVal, confidence: scoreVal, resolution }
|
|
736
832
|
}
|
|
737
833
|
|
|
738
834
|
static async waitForUIHandler({ selector, condition = 'exists', timeout_ms = 60000, poll_interval_ms = 300, match, retry = { max_attempts: 1, backoff_ms: 0 }, platform, deviceId }: { selector?: { text?: string, resource_id?: string, accessibility_id?: string, contains?: boolean }, condition?: 'exists'|'not_exists'|'visible'|'clickable', timeout_ms?: number, poll_interval_ms?: number, match?: { index?: number }, retry?: { max_attempts?: number, backoff_ms?: number }, platform?: 'android'|'ios', deviceId?: string }) {
|
package/src/server/common.ts
CHANGED
|
@@ -112,6 +112,23 @@ export function inferScrollFailure(message: string | undefined): { failureCode:
|
|
|
112
112
|
return { failureCode: 'UNKNOWN', retryable: false }
|
|
113
113
|
}
|
|
114
114
|
|
|
115
|
+
const ACTION_LIFECYCLE_STATE_BY_OUTCOME = {
|
|
116
|
+
success: 'pending_verification',
|
|
117
|
+
failure: 'failed'
|
|
118
|
+
} as const
|
|
119
|
+
|
|
120
|
+
export function determineActionLifecycleState({
|
|
121
|
+
success,
|
|
122
|
+
failure
|
|
123
|
+
}: {
|
|
124
|
+
success: boolean
|
|
125
|
+
failure?: { failureCode: ActionFailureCode; retryable: boolean }
|
|
126
|
+
}): NonNullable<ActionExecutionResult['lifecycle_state']> {
|
|
127
|
+
if (failure) return ACTION_LIFECYCLE_STATE_BY_OUTCOME.failure
|
|
128
|
+
if (success) return ACTION_LIFECYCLE_STATE_BY_OUTCOME.success
|
|
129
|
+
return ACTION_LIFECYCLE_STATE_BY_OUTCOME.success
|
|
130
|
+
}
|
|
131
|
+
|
|
115
132
|
export function buildActionExecutionResult({
|
|
116
133
|
actionType,
|
|
117
134
|
device,
|
|
@@ -121,7 +138,8 @@ export function buildActionExecutionResult({
|
|
|
121
138
|
uiFingerprintBefore,
|
|
122
139
|
uiFingerprintAfter,
|
|
123
140
|
failure,
|
|
124
|
-
details
|
|
141
|
+
details,
|
|
142
|
+
sourceModule
|
|
125
143
|
}: {
|
|
126
144
|
actionType: string
|
|
127
145
|
device?: ActionExecutionResult['device']
|
|
@@ -132,6 +150,7 @@ export function buildActionExecutionResult({
|
|
|
132
150
|
uiFingerprintAfter: string | null
|
|
133
151
|
failure?: { failureCode: ActionFailureCode; retryable: boolean }
|
|
134
152
|
details?: Record<string, unknown>
|
|
153
|
+
sourceModule: 'server' | 'interact'
|
|
135
154
|
}): ActionExecutionResult {
|
|
136
155
|
const timestampMs = Date.now()
|
|
137
156
|
const timestamp = new Date(timestampMs).toISOString()
|
|
@@ -139,6 +158,8 @@ export function buildActionExecutionResult({
|
|
|
139
158
|
action_id: nextActionId(actionType, timestampMs),
|
|
140
159
|
timestamp,
|
|
141
160
|
action_type: actionType,
|
|
161
|
+
lifecycle_state: determineActionLifecycleState({ success, failure }),
|
|
162
|
+
source_module: sourceModule,
|
|
142
163
|
...(device ? { device } : {}),
|
|
143
164
|
target: {
|
|
144
165
|
selector,
|
|
@@ -11,7 +11,9 @@ Inputs:
|
|
|
11
11
|
|
|
12
12
|
Output Structure:
|
|
13
13
|
- action_id, timestamp (ISO 8601), action_type
|
|
14
|
-
-
|
|
14
|
+
- lifecycle_state: post-dispatch lifecycle state (pending_verification or failed)
|
|
15
|
+
- source_module: runtime source of the action envelope
|
|
16
|
+
- target.selector = { appId }
|
|
15
17
|
- success = true when launch was dispatched successfully
|
|
16
18
|
- failure_code/retryable when launch dispatch fails
|
|
17
19
|
- ui_fingerprint_before/ui_fingerprint_after when available
|
|
@@ -84,7 +86,9 @@ Inputs:
|
|
|
84
86
|
|
|
85
87
|
Output Structure:
|
|
86
88
|
- action_id, timestamp (ISO 8601), action_type
|
|
87
|
-
-
|
|
89
|
+
- lifecycle_state: post-dispatch lifecycle state (pending_verification or failed)
|
|
90
|
+
- source_module: runtime source of the action envelope
|
|
91
|
+
- target.selector = { appId }
|
|
88
92
|
- success = true when the restart command completed
|
|
89
93
|
- failure_code/retryable when restart dispatch fails
|
|
90
94
|
- ui_fingerprint_before/ui_fingerprint_after when available
|
|
@@ -592,7 +596,9 @@ Recommended Usage:
|
|
|
592
596
|
},
|
|
593
597
|
{
|
|
594
598
|
name: 'find_element',
|
|
595
|
-
description:
|
|
599
|
+
description: `Find a UI element by semantic query (text, content-desc, resource-id, class).
|
|
600
|
+
|
|
601
|
+
Returns the best match plus resolution metadata when available, including confidence, selection reason, and fallback alternates.`,
|
|
596
602
|
inputSchema: {
|
|
597
603
|
type: 'object',
|
|
598
604
|
properties: {
|
|
@@ -617,7 +623,9 @@ Inputs:
|
|
|
617
623
|
|
|
618
624
|
Output Structure:
|
|
619
625
|
- action_id, timestamp (ISO 8601), action_type
|
|
620
|
-
-
|
|
626
|
+
- lifecycle_state: post-dispatch lifecycle state (pending_verification or failed)
|
|
627
|
+
- source_module: runtime source of the action envelope
|
|
628
|
+
- target.selector = { x, y }
|
|
621
629
|
- success = true when the tap was dispatched
|
|
622
630
|
- failure_code/retryable when dispatch fails
|
|
623
631
|
- ui_fingerprint_before/ui_fingerprint_after when available
|
|
@@ -673,6 +681,8 @@ Output Structure:
|
|
|
673
681
|
- action_id: unique timestamp-based action identifier
|
|
674
682
|
- timestamp: ISO 8601 timestamp for the action attempt
|
|
675
683
|
- action_type: "tap_element"
|
|
684
|
+
- lifecycle_state: post-dispatch lifecycle state (pending_verification or failed)
|
|
685
|
+
- source_module: runtime source of the action envelope
|
|
676
686
|
- target.selector: original target handle ({ elementId })
|
|
677
687
|
- target.resolved: minimal resolved element info used for the tap
|
|
678
688
|
- success: true when the tap was dispatched
|
|
@@ -725,6 +735,8 @@ Inputs:
|
|
|
725
735
|
|
|
726
736
|
Output Structure:
|
|
727
737
|
- action_id, timestamp (ISO 8601), action_type
|
|
738
|
+
- lifecycle_state: post-dispatch lifecycle state (pending_verification or failed)
|
|
739
|
+
- source_module: runtime source of the action envelope
|
|
728
740
|
- target.selector = { x1, y1, x2, y2, duration }
|
|
729
741
|
- success = true when the swipe was dispatched
|
|
730
742
|
- failure_code/retryable when dispatch fails
|
|
@@ -777,6 +789,8 @@ Inputs:
|
|
|
777
789
|
|
|
778
790
|
Output Structure:
|
|
779
791
|
- action_id, timestamp (ISO 8601), action_type
|
|
792
|
+
- lifecycle_state: post-dispatch lifecycle state (pending_verification or failed)
|
|
793
|
+
- source_module: runtime source of the action envelope
|
|
780
794
|
- target.selector = original selector
|
|
781
795
|
- target.resolved = minimal resolved element info when found
|
|
782
796
|
- success = true when scrolling produced a visible target element
|
|
@@ -831,6 +845,8 @@ Inputs:
|
|
|
831
845
|
|
|
832
846
|
Output Structure:
|
|
833
847
|
- action_id, timestamp (ISO 8601), action_type
|
|
848
|
+
- lifecycle_state: post-dispatch lifecycle state (pending_verification or failed)
|
|
849
|
+
- source_module: runtime source of the action envelope
|
|
834
850
|
- target.selector = { text }
|
|
835
851
|
- success = true when text input was dispatched
|
|
836
852
|
- failure_code/retryable when dispatch fails
|
|
@@ -880,6 +896,8 @@ Inputs:
|
|
|
880
896
|
|
|
881
897
|
Output Structure:
|
|
882
898
|
- action_id, timestamp (ISO 8601), action_type
|
|
899
|
+
- lifecycle_state: post-dispatch lifecycle state (pending_verification or failed)
|
|
900
|
+
- source_module: runtime source of the action envelope
|
|
883
901
|
- target.selector = { key: "back" }
|
|
884
902
|
- success = true when the back action was dispatched
|
|
885
903
|
- failure_code/retryable when dispatch fails
|