mobile-debug-mcp 0.26.1 → 0.26.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,10 +6,11 @@ export { AndroidInteract, iOSInteract };
6
6
  import { resolveTargetDevice } from '../utils/resolve-device.js'
7
7
  import { ToolsObserve } from '../observe/index.js'
8
8
  import { computeSnapshotSignature } from '../observe/snapshot-metadata.js'
9
- import { nextActionId } from '../server/common.js'
9
+ import { buildActionExecutionResult } from '../server/common.js'
10
10
  import type {
11
11
  ActionFailureCode,
12
12
  ActionTargetResolved,
13
+ FindElementResponse,
13
14
  ExpectElementVisibleResponse,
14
15
  ExpectStateResponse,
15
16
  ExpectScreenResponse,
@@ -68,6 +69,32 @@ interface UiChangeSignatureSet {
68
69
  state: string | null
69
70
  }
70
71
 
72
+ interface RankedResolutionCandidate {
73
+ el: UiElement
74
+ idx: number
75
+ score: number
76
+ reason: string
77
+ interactable: boolean
78
+ }
79
+
80
+ interface FindElementResolutionSummary {
81
+ confidence: number
82
+ reason: string
83
+ fallback_available: boolean
84
+ matched_count: number
85
+ alternates: Array<{
86
+ text: string | null
87
+ resource_id: string | null
88
+ accessibility_id: string | null
89
+ class: string | null
90
+ bounds: { left: number; top: number; right: number; bottom: number } | null
91
+ clickable: boolean
92
+ enabled: boolean
93
+ score: number
94
+ reason: string
95
+ }>
96
+ }
97
+
71
98
 
72
99
  export class ToolsInteract {
73
100
  private static readonly _maxResolvedUiElements = 256
@@ -290,28 +317,43 @@ export class ToolsInteract {
290
317
  }
291
318
  }
292
319
 
320
+ private static _summarizeResolutionCandidate(candidate: RankedResolutionCandidate): FindElementResolutionSummary['alternates'][number] {
321
+ const bounds = ToolsInteract._normalizeBounds(candidate.el.bounds)
322
+ return {
323
+ text: candidate.el.text ?? null,
324
+ resource_id: candidate.el.resourceId ?? candidate.el.resourceID ?? candidate.el.id ?? null,
325
+ accessibility_id: candidate.el.contentDescription ?? candidate.el.contentDesc ?? candidate.el.accessibilityLabel ?? candidate.el.label ?? null,
326
+ class: candidate.el.type ?? candidate.el.class ?? null,
327
+ bounds: bounds
328
+ ? { left: bounds[0], top: bounds[1], right: bounds[2], bottom: bounds[3] }
329
+ : null,
330
+ clickable: !!candidate.el.clickable,
331
+ enabled: !!candidate.el.enabled,
332
+ score: candidate.score,
333
+ reason: candidate.reason
334
+ }
335
+ }
336
+
293
337
  private static _actionFailure(
294
- actionId: string,
295
- timestamp: string,
296
338
  actionType: string,
297
339
  selector: Record<string, unknown> | null,
298
340
  resolved: ActionTargetResolved | null,
299
341
  failureCode: ActionFailureCode,
300
342
  retryable: boolean,
301
343
  uiFingerprintBefore: string | null,
302
- uiFingerprintAfter?: string | null
344
+ uiFingerprintAfter?: string | null,
345
+ sourceModule: 'server' | 'interact' = 'interact'
303
346
  ): TapElementResponse {
304
- return {
305
- action_id: actionId,
306
- timestamp,
307
- action_type: actionType,
308
- target: { selector, resolved },
347
+ return buildActionExecutionResult({
348
+ actionType,
349
+ selector,
350
+ resolved,
309
351
  success: false,
310
- failure_code: failureCode,
311
- retryable,
312
- ui_fingerprint_before: uiFingerprintBefore,
313
- ui_fingerprint_after: uiFingerprintAfter
314
- }
352
+ uiFingerprintBefore,
353
+ uiFingerprintAfter: uiFingerprintAfter ?? null,
354
+ failure: { failureCode, retryable },
355
+ sourceModule
356
+ })
315
357
  }
316
358
 
317
359
  static _resetResolvedUiElementsForTests() {
@@ -472,14 +514,11 @@ export class ToolsInteract {
472
514
  }
473
515
 
474
516
  static async tapElementHandler({ elementId }: { elementId: string }): Promise<TapElementResponse> {
475
- const timestampMs = Date.now()
476
- const timestamp = new Date(timestampMs).toISOString()
477
517
  const actionType = 'tap_element'
478
- const actionId = nextActionId(actionType, timestampMs)
479
518
  const selector = { elementId }
480
519
  const resolved = ToolsInteract._resolvedUiElements.get(elementId)
481
520
  if (!resolved) {
482
- return ToolsInteract._actionFailure(actionId, timestamp, actionType, selector, null, 'STALE_REFERENCE', true, null)
521
+ return ToolsInteract._actionFailure(actionType, selector, null, 'STALE_REFERENCE', true, null)
483
522
  }
484
523
 
485
524
  const fingerprintBefore = await ToolsInteract._captureFingerprint(resolved.platform, resolved.deviceId)
@@ -491,22 +530,22 @@ export class ToolsInteract {
491
530
  const currentMatch = ToolsInteract._findCurrentResolvedElement(elements, treePlatform, treeDeviceId, resolved)
492
531
 
493
532
  if (!currentMatch) {
494
- return ToolsInteract._actionFailure(actionId, timestamp, actionType, selector, null, 'STALE_REFERENCE', true, fingerprintBefore)
533
+ return ToolsInteract._actionFailure(actionType, selector, null, 'STALE_REFERENCE', true, fingerprintBefore)
495
534
  }
496
535
 
497
536
  const resolvedTarget = ToolsInteract._resolvedTargetFromElement(resolved.elementId, currentMatch.el, currentMatch.index)
498
537
 
499
538
  if (!ToolsInteract._isVisibleElement(currentMatch.el)) {
500
- return ToolsInteract._actionFailure(actionId, timestamp, actionType, selector, resolvedTarget, 'ELEMENT_NOT_INTERACTABLE', true, fingerprintBefore)
539
+ return ToolsInteract._actionFailure(actionType, selector, resolvedTarget, 'ELEMENT_NOT_INTERACTABLE', true, fingerprintBefore)
501
540
  }
502
541
 
503
542
  if (currentMatch.el.enabled === false) {
504
- return ToolsInteract._actionFailure(actionId, timestamp, actionType, selector, resolvedTarget, 'ELEMENT_NOT_INTERACTABLE', true, fingerprintBefore)
543
+ return ToolsInteract._actionFailure(actionType, selector, resolvedTarget, 'ELEMENT_NOT_INTERACTABLE', true, fingerprintBefore)
505
544
  }
506
545
 
507
546
  const bounds = ToolsInteract._normalizeBounds(currentMatch.el.bounds) ?? resolved.bounds
508
547
  if (!bounds || bounds[2] <= bounds[0] || bounds[3] <= bounds[1]) {
509
- return ToolsInteract._actionFailure(actionId, timestamp, actionType, selector, resolvedTarget, 'ELEMENT_NOT_INTERACTABLE', true, fingerprintBefore)
548
+ return ToolsInteract._actionFailure(actionType, selector, resolvedTarget, 'ELEMENT_NOT_INTERACTABLE', true, fingerprintBefore)
510
549
  }
511
550
 
512
551
  const x = Math.floor((bounds[0] + bounds[2]) / 2)
@@ -515,23 +554,20 @@ export class ToolsInteract {
515
554
 
516
555
  if (!tapResult.success) {
517
556
  const fingerprintAfterFailure = await ToolsInteract._captureFingerprint(resolved.platform, resolved.deviceId)
518
- return ToolsInteract._actionFailure(actionId, timestamp, actionType, selector, resolvedTarget, 'UNKNOWN', false, fingerprintBefore, fingerprintAfterFailure)
557
+ return ToolsInteract._actionFailure(actionType, selector, resolvedTarget, 'UNKNOWN', false, fingerprintBefore, fingerprintAfterFailure)
519
558
  }
520
559
 
521
560
  const fingerprintAfter = await ToolsInteract._captureFingerprint(resolved.platform, resolved.deviceId)
522
- return {
523
- action_id: actionId,
524
- timestamp,
525
- action_type: actionType,
526
- ...(tree?.device ? { device: tree.device } : {}),
527
- target: {
528
- selector,
529
- resolved: resolvedTarget
530
- },
561
+ return buildActionExecutionResult({
562
+ actionType,
563
+ device: tree?.device,
564
+ selector,
565
+ resolved: resolvedTarget,
531
566
  success: true,
532
- ui_fingerprint_before: fingerprintBefore,
533
- ui_fingerprint_after: fingerprintAfter
534
- }
567
+ uiFingerprintBefore: fingerprintBefore,
568
+ uiFingerprintAfter: fingerprintAfter,
569
+ sourceModule: 'interact'
570
+ })
535
571
  }
536
572
 
537
573
  static async swipeHandler({ platform = 'android', x1, y1, x2, y2, duration, deviceId }: { platform?: 'android' | 'ios', x1: number, y1: number, x2: number, y2: number, duration: number, deviceId?: string }) {
@@ -554,7 +590,7 @@ export class ToolsInteract {
554
590
  return await interact.scrollToElement(selector, direction, maxScrolls, scrollAmount, resolved.id)
555
591
  }
556
592
 
557
- static async findElementHandler({ query, exact = false, timeoutMs = 3000, platform, deviceId }: { query: string, exact?: boolean, timeoutMs?: number, platform?: 'android' | 'ios', deviceId?: string }) {
593
+ static async findElementHandler({ query, exact = false, timeoutMs = 3000, platform, deviceId }: { query: string, exact?: boolean, timeoutMs?: number, platform?: 'android' | 'ios', deviceId?: string }): Promise<FindElementResponse> {
558
594
  // Try to use observe layer to fetch the current UI tree and perform a fast semantic search
559
595
  const start = Date.now()
560
596
  const deadline = start + timeoutMs
@@ -563,16 +599,17 @@ export class ToolsInteract {
563
599
  const q = normalize(query)
564
600
  if (!q) return { found: false, error: 'Empty query' }
565
601
 
566
- let best: UiElement | null = null
567
- let bestScore = 0
568
- let lastTree: any = null
602
+ let best: RankedResolutionCandidate | null = null
603
+ let bestTree: any = null
604
+ let bestIterationCandidates: RankedResolutionCandidate[] = []
605
+ let shouldStop = false
569
606
 
570
- const scoreElement = (el: UiElement | null) => {
571
- if (!el || !el.visible) return 0
607
+ const scoreElement = (el: UiElement | null, idx: number): RankedResolutionCandidate | null => {
608
+ if (!el || !el.visible) return null
572
609
  const bounds = el.bounds || [0,0,0,0]
573
- if (!Array.isArray(bounds) || bounds.length < 4) return 0
610
+ if (!Array.isArray(bounds) || bounds.length < 4) return null
574
611
  const [l,t,r,b] = bounds
575
- if (r <= l || b <= t) return 0
612
+ if (r <= l || b <= t) return null
576
613
  // Do not early-return on non-interactable elements — score them so we can locate their clickable ancestor later
577
614
  const interactable = !!(el.clickable || el.enabled || el.focusable)
578
615
 
@@ -582,44 +619,80 @@ export class ToolsInteract {
582
619
  const className = normalize(el.type ?? el.class ?? '')
583
620
 
584
621
  let score = 0
622
+ let reason = 'best_scoring_candidate'
585
623
  if (exact) {
586
- if (text && text === q) score = 1.0
587
- else if (content && content === q) score = 0.95
624
+ if (text && text === q) {
625
+ score = 1.0
626
+ reason = 'exact_text_match'
627
+ } else if (content && content === q) {
628
+ score = 0.95
629
+ reason = 'exact_content_desc_match'
630
+ } else if (resourceId && resourceId === q) {
631
+ score = 0.92
632
+ reason = 'exact_resource_id_match'
633
+ } else if (className && className === q) {
634
+ score = 0.3
635
+ reason = 'exact_class_match'
636
+ }
588
637
  } else {
589
- if (text && text === q) score = 1.0
590
- else if (content && content === q) score = 0.95
591
- else if (text && text.includes(q)) score = 0.6
592
- else if (content && content.includes(q)) score = 0.55
593
- else if (resourceId && resourceId.includes(q)) score = 0.7
594
- else if (className && className.includes(q)) score = 0.3
638
+ if (text && text === q) {
639
+ score = 1.0
640
+ reason = 'exact_text_match'
641
+ } else if (content && content === q) {
642
+ score = 0.95
643
+ reason = 'exact_content_desc_match'
644
+ } else if (resourceId && resourceId === q) {
645
+ score = 0.92
646
+ reason = 'exact_resource_id_match'
647
+ } else if (text && text.includes(q)) {
648
+ score = 0.6
649
+ reason = 'partial_text_match'
650
+ } else if (content && content.includes(q)) {
651
+ score = 0.55
652
+ reason = 'partial_content_desc_match'
653
+ } else if (resourceId && resourceId.includes(q)) {
654
+ score = 0.7
655
+ reason = 'partial_resource_id_match'
656
+ } else if (className && className.includes(q)) {
657
+ score = 0.3
658
+ reason = 'partial_class_match'
659
+ }
595
660
  }
596
661
  if (score > 0 && interactable) score += 0.05
597
- return score
662
+ if (score <= 0) return null
663
+ return { el, idx, score, reason, interactable }
598
664
  }
599
665
 
600
666
  while (Date.now() <= deadline) {
601
667
  try {
602
- const tree = await ToolsObserve.getUITreeHandler({ platform, deviceId })
603
- lastTree = tree
668
+ const tree = await ToolsObserve.getUITreeHandler({ platform, deviceId })
604
669
  if (tree && Array.isArray((tree as any).elements)) {
605
670
  const elements = ((tree as any).elements as UiElement[])
671
+ const iterationCandidates: RankedResolutionCandidate[] = []
672
+ let iterationImprovedBest = false
606
673
  for (let i = 0; i < elements.length; i++) {
607
674
  const el = elements[i]
608
675
  try {
609
- const s = scoreElement(el)
610
- const interactable = !!(el.clickable || el.enabled || (el as any).focusable)
611
- if (s > bestScore) {
612
- bestScore = s
613
- best = el as UiElement
614
- if (best) { best._index = i; best._interactable = interactable }
676
+ const candidate = scoreElement(el, i)
677
+ if (!candidate) continue
678
+ iterationCandidates.push(candidate)
679
+ if (!best || candidate.score > best.score) {
680
+ best = candidate
681
+ bestTree = tree
682
+ iterationImprovedBest = true
683
+ if (best.score >= 0.95) {
684
+ shouldStop = true
685
+ break
686
+ }
615
687
  }
616
- if (bestScore >= 0.95) break
617
688
  } catch (e) { console.error('Error scoring element:', e) }
618
689
  }
619
- if (bestScore >= 0.95) break
690
+ if (iterationImprovedBest) {
691
+ bestIterationCandidates = iterationCandidates.slice()
692
+ }
620
693
  }
621
694
  } catch (e) { console.error('Error fetching UI tree:', e) }
622
- if (Date.now() > deadline) break
695
+ if (shouldStop || Date.now() > deadline) break
623
696
  await new Promise(r => setTimeout(r, 100))
624
697
  }
625
698
 
@@ -627,31 +700,32 @@ export class ToolsInteract {
627
700
 
628
701
  // If the best match is not interactable, try to resolve an actionable ancestor.
629
702
  try {
630
- const elements = (lastTree && Array.isArray(lastTree.elements)) ? (lastTree.elements as UiElement[]) : []
631
- const screen = lastTree?.resolution && typeof lastTree.resolution === 'object' ? lastTree.resolution as UiResolution : null
632
- let chosen = best as any
633
- const childBounds = Array.isArray(chosen?.bounds) ? chosen.bounds : null
703
+ const elements = (bestTree && Array.isArray(bestTree.elements)) ? (bestTree.elements as UiElement[]) : []
704
+ const screen = bestTree?.resolution && typeof bestTree.resolution === 'object' ? bestTree.resolution as UiResolution : null
705
+ let chosen = best as { el: UiElement, idx: number }
706
+ const childBounds = Array.isArray(chosen?.el?.bounds) ? chosen.el.bounds : null
634
707
 
635
708
  // Strategy 1: if parentId references an index, climb that chain
636
- let resolvedAncestor: any = null
637
- if (childBounds && (chosen.parentId !== undefined && chosen.parentId !== null)) {
709
+ let resolvedAncestor: { el: UiElement, idx: number } | null = null
710
+ if (childBounds && (chosen.el.parentId !== undefined && chosen.el.parentId !== null)) {
638
711
  let cur = chosen
639
712
  let safety = 0
640
- while (cur && safety < 20 && !(cur.clickable || cur.focusable) && (cur.parentId !== undefined && cur.parentId !== null)) {
641
- let pid = cur.parentId
713
+ while (cur && safety < 20 && !(cur.el.clickable || cur.el.focusable) && (cur.el.parentId !== undefined && cur.el.parentId !== null)) {
714
+ let pid = cur.el.parentId
642
715
  let idx: number | null = null
643
716
  if (typeof pid === 'number') idx = pid
644
717
  else if (typeof pid === 'string' && /^\d+$/.test(pid)) idx = Number(pid)
645
718
  // If parentId is not an index, try to find by matching resourceId or id field
646
719
  if (idx !== null && elements[idx]) {
647
- cur = elements[idx]
648
- if (cur && (cur.clickable || cur.enabled || cur.focusable)) { resolvedAncestor = cur; break }
720
+ cur = { el: elements[idx], idx }
721
+ if (cur && (cur.el.clickable || cur.el.enabled || cur.el.focusable)) { resolvedAncestor = cur; break }
649
722
  } else if (typeof pid === 'string') {
650
723
  // fallback: search elements for matching resourceId or id
651
- const found = elements.find((el: UiElement)=> (el.resourceId === pid || el.id === pid))
724
+ const foundIndex = elements.findIndex((el: UiElement)=> (el.resourceId === pid || el.id === pid))
725
+ const found = foundIndex >= 0 ? elements[foundIndex] : null
652
726
  if (found) {
653
- cur = found
654
- if (cur && (cur.clickable || cur.enabled || cur.focusable)) { resolvedAncestor = cur; break }
727
+ cur = { el: found, idx: foundIndex }
728
+ if (cur && (cur.el.clickable || cur.el.enabled || cur.el.focusable)) { resolvedAncestor = cur; break }
655
729
  // otherwise continue climbing if this found element has its own parentId
656
730
  } else {
657
731
  break
@@ -667,62 +741,77 @@ export class ToolsInteract {
667
741
  if (!resolvedAncestor && childBounds) {
668
742
  const [cl,ct,cr,cb] = childBounds
669
743
  // find candidates that are clickable and contain the child bounds
670
- const candidates = elements.filter((el: UiElement)=> el && (el.clickable || el.focusable) && Array.isArray(el.bounds) && el.bounds!.length>=4).map((el: UiElement)=>({el, bounds: el.bounds! as number[]}))
671
- let bestCandidate: any = null
744
+ const candidates = elements
745
+ .map((el: UiElement, idx: number) => ({ el, idx }))
746
+ .filter(({ el }) => el && (el.clickable || el.focusable) && Array.isArray(el.bounds) && el.bounds!.length >= 4)
747
+ let bestCandidate: { el: UiElement, idx: number } | null = null
672
748
  let bestCandidateArea = Infinity
673
749
  for (const c of candidates) {
674
- const [pl,pt,pr,pb] = c.bounds
750
+ const bounds = c.el.bounds as number[]
751
+ const [pl,pt,pr,pb] = bounds
675
752
  if (pl <= cl && pt <= ct && pr >= cr && pb >= cb) {
676
753
  const area = (pr-pl) * (pb-pt)
677
- if (area < bestCandidateArea) { bestCandidateArea = area; bestCandidate = c.el }
754
+ if (area < bestCandidateArea) { bestCandidateArea = area; bestCandidate = c }
678
755
  }
679
756
  }
680
757
  if (bestCandidate) resolvedAncestor = bestCandidate
681
758
  }
682
759
 
683
760
  if (resolvedAncestor) {
684
- best = resolvedAncestor
685
- // small score bump to reflect actionability
686
- bestScore = Math.min(1, bestScore + 0.02)
761
+ best = {
762
+ el: resolvedAncestor.el,
763
+ idx: resolvedAncestor.idx,
764
+ score: Math.min(1, best.score + 0.02),
765
+ reason: 'clickable_parent_preferred',
766
+ interactable: true
767
+ }
687
768
  }
688
769
 
689
- if (best && !(best.clickable || best.focusable)) {
690
- const nearbyActionable = ToolsInteract._resolveNearbyActionableControl(elements, { el: best, idx: best._index ?? elements.indexOf(best) }, screen)
770
+ if (best && !(best.el.clickable || best.el.focusable)) {
771
+ const nearbyActionable = ToolsInteract._resolveNearbyActionableControl(elements, { el: best.el, idx: best.idx }, screen)
691
772
  if (nearbyActionable) {
692
- best = nearbyActionable.el
693
- best._index = nearbyActionable.idx
694
- best._interactable = true
695
- best._sliderLike = nearbyActionable.sliderLike
773
+ best = {
774
+ el: nearbyActionable.el,
775
+ idx: nearbyActionable.idx,
776
+ score: Math.min(1, best.score + 0.02),
777
+ reason: nearbyActionable.sliderLike ? 'slider_track_preferred' : 'nearby_actionable_control',
778
+ interactable: true
779
+ }
696
780
  }
697
781
  }
698
782
  } catch (e) { console.error('Error resolving ancestor:', e) }
699
783
 
700
784
  if (!best) return { found: false, error: 'Element not found' }
701
785
 
702
- const boundsObj = Array.isArray(best.bounds) ? { left: best.bounds[0], top: best.bounds[1], right: best.bounds[2], bottom: best.bounds[3] } : null
786
+ const boundsObj = Array.isArray(best.el.bounds) ? { left: best.el.bounds[0], top: best.el.bounds[1], right: best.el.bounds[2], bottom: best.el.bounds[3] } : null
703
787
  const tapCoordinates = boundsObj ? { x: Math.floor((boundsObj.left + boundsObj.right) / 2), y: Math.floor((boundsObj.top + boundsObj.bottom) / 2) } : null
788
+ const uniqueRanked = bestIterationCandidates.filter((candidate, index, array) => index === array.findIndex((other) => other.idx === candidate.idx && other.el === candidate.el))
789
+ const alternateCandidates = uniqueRanked
790
+ .filter((candidate) => candidate.idx !== best.idx || candidate.el !== best.el)
791
+ .slice(0, 3)
792
+ .map((candidate) => ToolsInteract._summarizeResolutionCandidate(candidate))
704
793
 
705
794
  const outEl = {
706
- text: best.text ?? null,
707
- resourceId: best.resourceId ?? null,
708
- contentDesc: best.contentDescription ?? best.contentDesc ?? null,
709
- class: best.type ?? best.class ?? null,
795
+ text: best.el.text ?? null,
796
+ resourceId: best.el.resourceId ?? null,
797
+ contentDesc: best.el.contentDescription ?? best.el.contentDesc ?? null,
798
+ class: best.el.type ?? best.el.class ?? null,
710
799
  bounds: boundsObj,
711
- clickable: !!best.clickable,
712
- enabled: !!best.enabled,
713
- stable_id: best.stable_id ?? null,
714
- role: best.role ?? null,
715
- test_tag: best.test_tag ?? null,
716
- selector: best.selector ?? null,
717
- semantic: best.semantic ?? null,
800
+ clickable: !!best.el.clickable,
801
+ enabled: !!best.el.enabled,
802
+ stable_id: best.el.stable_id ?? null,
803
+ role: best.el.role ?? null,
804
+ test_tag: best.el.test_tag ?? null,
805
+ selector: best.el.selector ?? null,
806
+ semantic: best.el.semantic ?? null,
718
807
  tapCoordinates,
719
808
  telemetry: {
720
- matchedIndex: best?._index ?? null,
721
- matchedInteractable: !!best?._interactable,
722
- sliderLike: !!best?._sliderLike
809
+ matchedIndex: best.idx ?? null,
810
+ matchedInteractable: !!best.interactable,
811
+ sliderLike: best.reason === 'slider_track_preferred'
723
812
  }
724
813
  }
725
- if (best?._sliderLike) {
814
+ if (best.reason === 'slider_track_preferred') {
726
815
  const isVertical = !!boundsObj && (boundsObj.bottom - boundsObj.top) > (boundsObj.right - boundsObj.left)
727
816
  const interactionHint = {
728
817
  kind: 'slider',
@@ -731,8 +820,15 @@ export class ToolsInteract {
731
820
  }
732
821
  ;(outEl as any).interactionHint = interactionHint
733
822
  }
734
- const scoreVal = Math.min(1, Number(bestScore.toFixed(3)))
735
- return { found: true, element: outEl, score: scoreVal, confidence: scoreVal }
823
+ const scoreVal = Math.min(1, Number(best.score.toFixed(3)))
824
+ const resolution: FindElementResolutionSummary = {
825
+ confidence: scoreVal,
826
+ reason: best.reason,
827
+ fallback_available: alternateCandidates.length > 0,
828
+ matched_count: uniqueRanked.length,
829
+ alternates: alternateCandidates
830
+ }
831
+ return { found: true, element: outEl, score: scoreVal, confidence: scoreVal, resolution }
736
832
  }
737
833
 
738
834
  static async waitForUIHandler({ selector, condition = 'exists', timeout_ms = 60000, poll_interval_ms = 300, match, retry = { max_attempts: 1, backoff_ms: 0 }, platform, deviceId }: { selector?: { text?: string, resource_id?: string, accessibility_id?: string, contains?: boolean }, condition?: 'exists'|'not_exists'|'visible'|'clickable', timeout_ms?: number, poll_interval_ms?: number, match?: { index?: number }, retry?: { max_attempts?: number, backoff_ms?: number }, platform?: 'android'|'ios', deviceId?: string }) {
@@ -112,6 +112,23 @@ export function inferScrollFailure(message: string | undefined): { failureCode:
112
112
  return { failureCode: 'UNKNOWN', retryable: false }
113
113
  }
114
114
 
115
+ const ACTION_LIFECYCLE_STATE_BY_OUTCOME = {
116
+ success: 'pending_verification',
117
+ failure: 'failed'
118
+ } as const
119
+
120
+ export function determineActionLifecycleState({
121
+ success,
122
+ failure
123
+ }: {
124
+ success: boolean
125
+ failure?: { failureCode: ActionFailureCode; retryable: boolean }
126
+ }): NonNullable<ActionExecutionResult['lifecycle_state']> {
127
+ if (failure) return ACTION_LIFECYCLE_STATE_BY_OUTCOME.failure
128
+ if (success) return ACTION_LIFECYCLE_STATE_BY_OUTCOME.success
129
+ return ACTION_LIFECYCLE_STATE_BY_OUTCOME.success
130
+ }
131
+
115
132
  export function buildActionExecutionResult({
116
133
  actionType,
117
134
  device,
@@ -121,7 +138,8 @@ export function buildActionExecutionResult({
121
138
  uiFingerprintBefore,
122
139
  uiFingerprintAfter,
123
140
  failure,
124
- details
141
+ details,
142
+ sourceModule
125
143
  }: {
126
144
  actionType: string
127
145
  device?: ActionExecutionResult['device']
@@ -132,6 +150,7 @@ export function buildActionExecutionResult({
132
150
  uiFingerprintAfter: string | null
133
151
  failure?: { failureCode: ActionFailureCode; retryable: boolean }
134
152
  details?: Record<string, unknown>
153
+ sourceModule: 'server' | 'interact'
135
154
  }): ActionExecutionResult {
136
155
  const timestampMs = Date.now()
137
156
  const timestamp = new Date(timestampMs).toISOString()
@@ -139,6 +158,8 @@ export function buildActionExecutionResult({
139
158
  action_id: nextActionId(actionType, timestampMs),
140
159
  timestamp,
141
160
  action_type: actionType,
161
+ lifecycle_state: determineActionLifecycleState({ success, failure }),
162
+ source_module: sourceModule,
142
163
  ...(device ? { device } : {}),
143
164
  target: {
144
165
  selector,
@@ -11,7 +11,9 @@ Inputs:
11
11
 
12
12
  Output Structure:
13
13
  - action_id, timestamp (ISO 8601), action_type
14
- - target.selector = { appId }
14
+ - lifecycle_state: post-dispatch lifecycle state (pending_verification or failed)
15
+ - source_module: runtime source of the action envelope
16
+ - target.selector = { appId }
15
17
  - success = true when launch was dispatched successfully
16
18
  - failure_code/retryable when launch dispatch fails
17
19
  - ui_fingerprint_before/ui_fingerprint_after when available
@@ -84,7 +86,9 @@ Inputs:
84
86
 
85
87
  Output Structure:
86
88
  - action_id, timestamp (ISO 8601), action_type
87
- - target.selector = { appId }
89
+ - lifecycle_state: post-dispatch lifecycle state (pending_verification or failed)
90
+ - source_module: runtime source of the action envelope
91
+ - target.selector = { appId }
88
92
  - success = true when the restart command completed
89
93
  - failure_code/retryable when restart dispatch fails
90
94
  - ui_fingerprint_before/ui_fingerprint_after when available
@@ -592,7 +596,9 @@ Recommended Usage:
592
596
  },
593
597
  {
594
598
  name: 'find_element',
595
- description: 'Find a UI element by semantic query (text, content-desc, resource-id, class). Returns best match.',
599
+ description: `Find a UI element by semantic query (text, content-desc, resource-id, class).
600
+
601
+ Returns the best match plus resolution metadata when available, including confidence, selection reason, and fallback alternates.`,
596
602
  inputSchema: {
597
603
  type: 'object',
598
604
  properties: {
@@ -617,7 +623,9 @@ Inputs:
617
623
 
618
624
  Output Structure:
619
625
  - action_id, timestamp (ISO 8601), action_type
620
- - target.selector = { x, y }
626
+ - lifecycle_state: post-dispatch lifecycle state (pending_verification or failed)
627
+ - source_module: runtime source of the action envelope
628
+ - target.selector = { x, y }
621
629
  - success = true when the tap was dispatched
622
630
  - failure_code/retryable when dispatch fails
623
631
  - ui_fingerprint_before/ui_fingerprint_after when available
@@ -673,6 +681,8 @@ Output Structure:
673
681
  - action_id: unique timestamp-based action identifier
674
682
  - timestamp: ISO 8601 timestamp for the action attempt
675
683
  - action_type: "tap_element"
684
+ - lifecycle_state: post-dispatch lifecycle state (pending_verification or failed)
685
+ - source_module: runtime source of the action envelope
676
686
  - target.selector: original target handle ({ elementId })
677
687
  - target.resolved: minimal resolved element info used for the tap
678
688
  - success: true when the tap was dispatched
@@ -725,6 +735,8 @@ Inputs:
725
735
 
726
736
  Output Structure:
727
737
  - action_id, timestamp (ISO 8601), action_type
738
+ - lifecycle_state: post-dispatch lifecycle state (pending_verification or failed)
739
+ - source_module: runtime source of the action envelope
728
740
  - target.selector = { x1, y1, x2, y2, duration }
729
741
  - success = true when the swipe was dispatched
730
742
  - failure_code/retryable when dispatch fails
@@ -777,6 +789,8 @@ Inputs:
777
789
 
778
790
  Output Structure:
779
791
  - action_id, timestamp (ISO 8601), action_type
792
+ - lifecycle_state: post-dispatch lifecycle state (pending_verification or failed)
793
+ - source_module: runtime source of the action envelope
780
794
  - target.selector = original selector
781
795
  - target.resolved = minimal resolved element info when found
782
796
  - success = true when scrolling produced a visible target element
@@ -831,6 +845,8 @@ Inputs:
831
845
 
832
846
  Output Structure:
833
847
  - action_id, timestamp (ISO 8601), action_type
848
+ - lifecycle_state: post-dispatch lifecycle state (pending_verification or failed)
849
+ - source_module: runtime source of the action envelope
834
850
  - target.selector = { text }
835
851
  - success = true when text input was dispatched
836
852
  - failure_code/retryable when dispatch fails
@@ -880,6 +896,8 @@ Inputs:
880
896
 
881
897
  Output Structure:
882
898
  - action_id, timestamp (ISO 8601), action_type
899
+ - lifecycle_state: post-dispatch lifecycle state (pending_verification or failed)
900
+ - source_module: runtime source of the action envelope
883
901
  - target.selector = { key: "back" }
884
902
  - success = true when the back action was dispatched
885
903
  - failure_code/retryable when dispatch fails