mobile-debug-mcp 0.26.2 → 0.26.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,6 +10,8 @@ import { buildActionExecutionResult } from '../server/common.js'
10
10
  import type {
11
11
  ActionFailureCode,
12
12
  ActionTargetResolved,
13
+ AdjustControlResponse,
14
+ FindElementResponse,
13
15
  ExpectElementVisibleResponse,
14
16
  ExpectStateResponse,
15
17
  ExpectScreenResponse,
@@ -68,6 +70,32 @@ interface UiChangeSignatureSet {
68
70
  state: string | null
69
71
  }
70
72
 
73
+ interface RankedResolutionCandidate {
74
+ el: UiElement
75
+ idx: number
76
+ score: number
77
+ reason: string
78
+ interactable: boolean
79
+ }
80
+
81
+ interface FindElementResolutionSummary {
82
+ confidence: number
83
+ reason: string
84
+ fallback_available: boolean
85
+ matched_count: number
86
+ alternates: Array<{
87
+ text: string | null
88
+ resource_id: string | null
89
+ accessibility_id: string | null
90
+ class: string | null
91
+ bounds: { left: number; top: number; right: number; bottom: number } | null
92
+ clickable: boolean
93
+ enabled: boolean
94
+ score: number
95
+ reason: string
96
+ }>
97
+ }
98
+
71
99
 
72
100
  export class ToolsInteract {
73
101
  private static readonly _maxResolvedUiElements = 256
@@ -290,6 +318,106 @@ export class ToolsInteract {
290
318
  }
291
319
  }
292
320
 
321
+ private static _summarizeResolutionCandidate(candidate: RankedResolutionCandidate): FindElementResolutionSummary['alternates'][number] {
322
+ const bounds = ToolsInteract._normalizeBounds(candidate.el.bounds)
323
+ return {
324
+ text: candidate.el.text ?? null,
325
+ resource_id: candidate.el.resourceId ?? candidate.el.resourceID ?? candidate.el.id ?? null,
326
+ accessibility_id: candidate.el.contentDescription ?? candidate.el.contentDesc ?? candidate.el.accessibilityLabel ?? candidate.el.label ?? null,
327
+ class: candidate.el.type ?? candidate.el.class ?? null,
328
+ bounds: bounds
329
+ ? { left: bounds[0], top: bounds[1], right: bounds[2], bottom: bounds[3] }
330
+ : null,
331
+ clickable: !!candidate.el.clickable,
332
+ enabled: !!candidate.el.enabled,
333
+ score: candidate.score,
334
+ reason: candidate.reason
335
+ }
336
+ }
337
+
338
+ private static _isAdjustableControl(el: UiElement | null): boolean {
339
+ if (!el) return false
340
+ const type = ToolsInteract._normalize(el.type ?? el.class ?? '')
341
+ const role = ToolsInteract._normalize(el.role ?? '')
342
+ return !!el.state?.value_range || /slider|seekbar|stepper|adjustable|range/.test(type) || /slider|seekbar|stepper|adjustable|range/.test(role)
343
+ }
344
+
345
+ private static _readNumericControlValue(el: UiElement | null, property: string): number | null {
346
+ if (!el?.state) return null
347
+ const stateValue = el.state[property as keyof UIElementState]
348
+ if (typeof stateValue === 'number' && Number.isFinite(stateValue)) return stateValue
349
+ if (property === 'value' || property === 'raw_value') {
350
+ const fallback = el.state.raw_value ?? el.state.value
351
+ if (typeof fallback === 'number' && Number.isFinite(fallback)) return fallback
352
+ }
353
+ return null
354
+ }
355
+
356
+ private static _buildControlPoint(bounds: [number, number, number, number], ratio: number, axis: 'horizontal' | 'vertical') {
357
+ const clampedRatio = Math.max(0, Math.min(1, ratio))
358
+ const [left, top, right, bottom] = bounds
359
+ const width = Math.max(1, right - left)
360
+ const height = Math.max(1, bottom - top)
361
+ const insetX = Math.max(8, Math.floor(width * 0.08))
362
+ const insetY = Math.max(8, Math.floor(height * 0.08))
363
+ if (axis === 'vertical') {
364
+ const usableHeight = Math.max(1, height - (insetY * 2))
365
+ return {
366
+ x: Math.floor((left + right) / 2),
367
+ y: Math.floor(bottom - insetY - (usableHeight * clampedRatio))
368
+ }
369
+ }
370
+ const usableWidth = Math.max(1, width - (insetX * 2))
371
+ return {
372
+ x: Math.floor(left + insetX + (usableWidth * clampedRatio)),
373
+ y: Math.floor((top + bottom) / 2)
374
+ }
375
+ }
376
+
377
+ private static _buildConservativeControlPoint(
378
+ bounds: [number, number, number, number],
379
+ targetValue: number,
380
+ currentValue: number | null,
381
+ min: number,
382
+ max: number,
383
+ axis: 'horizontal' | 'vertical'
384
+ ) {
385
+ const range = Math.max(1, max - min)
386
+ const targetRatio = (targetValue - min) / range
387
+ const stepRatio = 1 / range
388
+ const centerBias = stepRatio / 2
389
+ const direction = currentValue === null ? 0 : Math.sign(targetValue - currentValue)
390
+ const controlLengthPx = axis === 'vertical' ? Math.max(1, bounds[3] - bounds[1]) : Math.max(1, bounds[2] - bounds[0])
391
+ const edgeWindow = Math.max(3, Math.floor(range * 0.1))
392
+ const isNearLowEdge = targetValue - min <= edgeWindow
393
+ const isNearHighEdge = max - targetValue <= edgeWindow
394
+ const directionBias = direction > 0
395
+ ? -stepRatio * 0.15
396
+ : direction < 0
397
+ ? stepRatio * 0.65
398
+ : 0
399
+ const pixelBasedMargin = Math.min(0.03, Math.max(0.005, 2 / controlLengthPx))
400
+ const endpointMargin = Math.max(stepRatio * 0.5, pixelBasedMargin)
401
+ const edgeBias = isNearLowEdge
402
+ ? endpointMargin
403
+ : isNearHighEdge
404
+ ? Math.max(stepRatio * 0.4, endpointMargin * 0.75)
405
+ : 0
406
+ const safeRatio = Math.min(
407
+ 1 - (endpointMargin * 0.25),
408
+ Math.max(endpointMargin, targetRatio + centerBias + directionBias + edgeBias)
409
+ )
410
+ return ToolsInteract._buildControlPoint(bounds, safeRatio, axis)
411
+ }
412
+
413
+ private static _controlAxis(el: UiElement, bounds: [number, number, number, number]): 'horizontal' | 'vertical' {
414
+ const type = ToolsInteract._normalize(el.type ?? el.class ?? '')
415
+ const role = ToolsInteract._normalize(el.role ?? '')
416
+ if (/vertical/.test(type) || /vertical/.test(role)) return 'vertical'
417
+ if (/horizontal/.test(type) || /horizontal/.test(role)) return 'horizontal'
418
+ return (bounds[3] - bounds[1]) > (bounds[2] - bounds[0]) ? 'vertical' : 'horizontal'
419
+ }
420
+
293
421
  private static _actionFailure(
294
422
  actionType: string,
295
423
  selector: Record<string, unknown> | null,
@@ -526,6 +654,507 @@ export class ToolsInteract {
526
654
  })
527
655
  }
528
656
 
657
+ static async adjustControlHandler({
658
+ selector,
659
+ element_id,
660
+ property = 'value',
661
+ targetValue,
662
+ tolerance = 0,
663
+ maxAttempts = 3,
664
+ platform,
665
+ deviceId
666
+ }: {
667
+ selector?: { text?: string, resource_id?: string, accessibility_id?: string, contains?: boolean },
668
+ element_id?: string,
669
+ property?: string,
670
+ targetValue: number,
671
+ tolerance?: number,
672
+ maxAttempts?: number,
673
+ platform?: 'android' | 'ios',
674
+ deviceId?: string
675
+ }): Promise<AdjustControlResponse> {
676
+ const actionType = 'adjust_control'
677
+ const targetSelector = selector ?? (element_id ? { elementId: element_id } : null)
678
+ const normalizedTolerance = Number.isFinite(tolerance) ? Math.max(0, tolerance) : 0
679
+ const attemptsLimit = Math.max(1, Math.floor(Number(maxAttempts) || 1))
680
+ const sourcePlatform: 'android' | 'ios' = platform || 'android'
681
+ let resolvedPlatform = sourcePlatform
682
+ let resolvedDeviceId = deviceId
683
+ const fingerprintBefore = await ToolsInteract._captureFingerprint(resolvedPlatform, resolvedDeviceId)
684
+ let semanticFallbackElement: FindElementResponse['element'] | null = null
685
+
686
+ const buildFailure = (
687
+ failureCode: ActionFailureCode,
688
+ reason: string,
689
+ resolved: ActionTargetResolved | null,
690
+ device: any,
691
+ actualState: { property: string; value: number | null; raw_value?: number | null } | null,
692
+ attempts: number,
693
+ adjustmentMode: 'semantic' | 'gesture' | 'coordinate' = 'gesture',
694
+ retryable = false,
695
+ uiFingerprintAfter: string | null = null
696
+ ): AdjustControlResponse => {
697
+ const base = buildActionExecutionResult({
698
+ actionType,
699
+ sourceModule: 'interact',
700
+ device,
701
+ selector: targetSelector,
702
+ resolved,
703
+ success: false,
704
+ uiFingerprintBefore: fingerprintBefore,
705
+ uiFingerprintAfter,
706
+ failure: { failureCode, retryable },
707
+ details: {
708
+ target_value: targetValue,
709
+ tolerance: normalizedTolerance,
710
+ property,
711
+ attempts,
712
+ adjustment_mode: adjustmentMode,
713
+ actual_state: actualState,
714
+ converged: false,
715
+ within_tolerance: false,
716
+ reason
717
+ }
718
+ }) as AdjustControlResponse
719
+
720
+ return {
721
+ ...base,
722
+ target_state: {
723
+ property,
724
+ target_value: targetValue,
725
+ tolerance: normalizedTolerance
726
+ },
727
+ actual_state: actualState,
728
+ within_tolerance: false,
729
+ converged: false,
730
+ attempts,
731
+ adjustment_mode: adjustmentMode
732
+ }
733
+ }
734
+
735
+ const resolveCurrentMatch = async (): Promise<{
736
+ tree: any
737
+ device: any
738
+ match: { el: UiElement, idx: number } | null
739
+ resolvedTarget: ActionTargetResolved | null
740
+ } | null> => {
741
+ const tree = await ToolsObserve.getUITreeHandler({ platform: resolvedPlatform, deviceId: resolvedDeviceId }) as any
742
+ resolvedPlatform = tree?.device?.platform === 'ios' ? 'ios' : resolvedPlatform
743
+ resolvedDeviceId = tree?.device?.id || resolvedDeviceId
744
+ const elements = Array.isArray(tree?.elements) ? tree.elements as UiElement[] : []
745
+
746
+ if (element_id) {
747
+ const stored = ToolsInteract._resolvedUiElements.get(element_id)
748
+ if (!stored) {
749
+ return null
750
+ }
751
+ const current = ToolsInteract._findCurrentResolvedElement(elements, resolvedPlatform, resolvedDeviceId, stored)
752
+ if (!current) {
753
+ return null
754
+ }
755
+ return {
756
+ tree,
757
+ device: tree?.device,
758
+ match: { el: current.el, idx: current.index },
759
+ resolvedTarget: ToolsInteract._resolvedTargetFromElement(
760
+ ToolsInteract._computeElementId(resolvedPlatform, resolvedDeviceId, current.el, current.index),
761
+ current.el,
762
+ current.index
763
+ )
764
+ }
765
+ }
766
+
767
+ if (semanticFallbackElement) {
768
+ const fallbackBounds = ToolsInteract._normalizeBounds(
769
+ Array.isArray(semanticFallbackElement.bounds)
770
+ ? semanticFallbackElement.bounds
771
+ : semanticFallbackElement.bounds && typeof semanticFallbackElement.bounds === 'object'
772
+ ? [
773
+ Number((semanticFallbackElement.bounds as any).left),
774
+ Number((semanticFallbackElement.bounds as any).top),
775
+ Number((semanticFallbackElement.bounds as any).right),
776
+ Number((semanticFallbackElement.bounds as any).bottom)
777
+ ]
778
+ : null
779
+ )
780
+
781
+ let matchedIndex = -1
782
+ if (fallbackBounds) {
783
+ matchedIndex = elements.findIndex((el) => {
784
+ const bounds = ToolsInteract._normalizeBounds(el.bounds)
785
+ return !!bounds && bounds[0] === fallbackBounds[0] && bounds[1] === fallbackBounds[1] && bounds[2] === fallbackBounds[2] && bounds[3] === fallbackBounds[3]
786
+ })
787
+ }
788
+
789
+ if (matchedIndex === -1 && fallbackBounds) {
790
+ const fallbackCenterX = Math.floor((fallbackBounds[0] + fallbackBounds[2]) / 2)
791
+ const fallbackCenterY = Math.floor((fallbackBounds[1] + fallbackBounds[3]) / 2)
792
+ let bestDistance = Infinity
793
+ for (let i = 0; i < elements.length; i++) {
794
+ const el = elements[i]
795
+ if (!ToolsInteract._isAdjustableControl(el)) continue
796
+ const bounds = ToolsInteract._normalizeBounds(el.bounds)
797
+ if (!bounds) continue
798
+ const centerX = Math.floor((bounds[0] + bounds[2]) / 2)
799
+ const centerY = Math.floor((bounds[1] + bounds[3]) / 2)
800
+ const distance = Math.abs(centerX - fallbackCenterX) + Math.abs(centerY - fallbackCenterY)
801
+ if (distance < bestDistance) {
802
+ bestDistance = distance
803
+ matchedIndex = i
804
+ }
805
+ }
806
+ }
807
+
808
+ if (matchedIndex >= 0 && elements[matchedIndex]) {
809
+ const matched = { el: elements[matchedIndex], idx: matchedIndex }
810
+ return {
811
+ tree,
812
+ device: tree?.device,
813
+ match: matched,
814
+ resolvedTarget: ToolsInteract._resolvedTargetFromElement(
815
+ ToolsInteract._computeElementId(resolvedPlatform, resolvedDeviceId, matched.el, matched.idx),
816
+ matched.el,
817
+ matched.idx
818
+ )
819
+ }
820
+ }
821
+ }
822
+
823
+ if (selector) {
824
+ const matched = ToolsInteract._findFirstMatchingElement(elements, selector)
825
+ if (!matched) {
826
+ return null
827
+ }
828
+ return {
829
+ tree,
830
+ device: tree?.device,
831
+ match: matched,
832
+ resolvedTarget: ToolsInteract._resolvedTargetFromElement(
833
+ ToolsInteract._computeElementId(resolvedPlatform, resolvedDeviceId, matched.el, matched.idx),
834
+ matched.el,
835
+ matched.idx
836
+ )
837
+ }
838
+ }
839
+
840
+ return null
841
+ }
842
+
843
+ if (!selector && !element_id) {
844
+ return buildFailure('ELEMENT_NOT_FOUND', 'selector or element_id is required', null, undefined, null, 0, 'gesture', false)
845
+ }
846
+
847
+ if (selector && !element_id) {
848
+ const waitResult = await ToolsInteract.waitForUIHandler({
849
+ selector,
850
+ condition: 'clickable',
851
+ timeout_ms: 5000,
852
+ poll_interval_ms: 300,
853
+ platform: resolvedPlatform,
854
+ deviceId: resolvedDeviceId
855
+ }) as any
856
+
857
+ if (waitResult?.status !== 'success' || !waitResult?.element?.elementId) {
858
+ const semanticQuery = selector.text ?? selector.resource_id ?? selector.accessibility_id ?? ''
859
+ if (!semanticQuery) {
860
+ return buildFailure(
861
+ waitResult?.error?.code === 'ELEMENT_NOT_FOUND' ? 'ELEMENT_NOT_FOUND' : 'TIMEOUT',
862
+ waitResult?.error?.message ?? 'adjustable control not found',
863
+ null,
864
+ waitResult?.device,
865
+ null,
866
+ 0,
867
+ 'gesture',
868
+ waitResult?.error?.code === 'ELEMENT_NOT_FOUND'
869
+ )
870
+ }
871
+
872
+ const fallback = await ToolsInteract.findElementHandler({
873
+ query: semanticQuery,
874
+ exact: false,
875
+ timeoutMs: 3000,
876
+ platform: resolvedPlatform,
877
+ deviceId: resolvedDeviceId
878
+ })
879
+
880
+ if (!fallback.found || !fallback.element) {
881
+ return buildFailure(
882
+ 'ELEMENT_NOT_FOUND',
883
+ waitResult?.error?.message ?? 'adjustable control not found',
884
+ null,
885
+ waitResult?.device,
886
+ null,
887
+ 0,
888
+ 'gesture',
889
+ true
890
+ )
891
+ }
892
+
893
+ semanticFallbackElement = fallback.element
894
+ } else {
895
+ element_id = waitResult.element.elementId
896
+ semanticFallbackElement = null
897
+ }
898
+ }
899
+
900
+ let lastObservedState: { property: string; value: number | null; raw_value?: number | null } | null = null
901
+ let lastAdjustmentMode: 'semantic' | 'gesture' | 'coordinate' = 'gesture'
902
+ let resolvedTarget: ActionTargetResolved | null = null
903
+ let currentDevice: any = undefined
904
+ let attemptCount = 0
905
+ let cachedResolvedMatch: { el: UiElement, idx: number } | null = null
906
+
907
+ for (let attempt = 0; attempt < attemptsLimit; attempt++) {
908
+ const resolved: {
909
+ tree: any
910
+ device: any
911
+ match: { el: UiElement, idx: number } | null
912
+ resolvedTarget: ActionTargetResolved | null
913
+ } | null = cachedResolvedMatch
914
+ ? {
915
+ tree: null,
916
+ device: currentDevice,
917
+ match: cachedResolvedMatch,
918
+ resolvedTarget: ToolsInteract._resolvedTargetFromElement(
919
+ ToolsInteract._computeElementId(resolvedPlatform, resolvedDeviceId, cachedResolvedMatch.el, cachedResolvedMatch.idx),
920
+ cachedResolvedMatch.el,
921
+ cachedResolvedMatch.idx
922
+ )
923
+ }
924
+ : await resolveCurrentMatch()
925
+ if (!resolved || !resolved.match || !resolved.resolvedTarget) {
926
+ return buildFailure('STALE_REFERENCE', 'adjustable control could not be resolved', resolvedTarget, currentDevice, lastObservedState, attemptCount, lastAdjustmentMode, true)
927
+ }
928
+
929
+ currentDevice = resolved.device
930
+ resolvedTarget = resolved.resolvedTarget
931
+ const currentEl: UiElement = resolved.match.el
932
+ cachedResolvedMatch = resolved.match
933
+ const bounds = ToolsInteract._normalizeBounds(currentEl.bounds)
934
+ const valueRange = currentEl.state?.value_range ?? null
935
+ const currentValue = ToolsInteract._readNumericControlValue(currentEl, property)
936
+ const actualState = currentValue !== null
937
+ ? { property, value: currentValue, raw_value: typeof currentEl.state?.raw_value === 'number' ? currentEl.state.raw_value : undefined }
938
+ : null
939
+
940
+ lastObservedState = actualState
941
+
942
+ if (property !== 'value' && property !== 'raw_value') {
943
+ return buildFailure('ELEMENT_NOT_INTERACTABLE', 'adjust_control currently supports numeric value and raw_value properties only', resolvedTarget, currentDevice, actualState, attemptCount, lastAdjustmentMode, false)
944
+ }
945
+
946
+ if (currentValue !== null && Math.abs(currentValue - targetValue) <= normalizedTolerance) {
947
+ const uiFingerprintAfter = await ToolsInteract._captureFingerprint(resolvedPlatform, resolvedDeviceId)
948
+ const base = buildActionExecutionResult({
949
+ actionType,
950
+ sourceModule: 'interact',
951
+ device: currentDevice,
952
+ selector: targetSelector,
953
+ resolved: resolvedTarget,
954
+ success: true,
955
+ uiFingerprintBefore: fingerprintBefore,
956
+ uiFingerprintAfter,
957
+ details: {
958
+ target_value: targetValue,
959
+ tolerance: normalizedTolerance,
960
+ property,
961
+ attempts: attemptCount,
962
+ adjustment_mode: 'semantic',
963
+ actual_state: actualState,
964
+ converged: true,
965
+ within_tolerance: true,
966
+ reason: 'control already within tolerance'
967
+ }
968
+ }) as AdjustControlResponse
969
+
970
+ return {
971
+ ...base,
972
+ target_state: {
973
+ property,
974
+ target_value: targetValue,
975
+ tolerance: normalizedTolerance
976
+ },
977
+ actual_state: actualState,
978
+ within_tolerance: true,
979
+ converged: true,
980
+ attempts: attemptCount,
981
+ adjustment_mode: 'semantic'
982
+ }
983
+ }
984
+
985
+ if (!ToolsInteract._isAdjustableControl(currentEl)) {
986
+ return buildFailure('ELEMENT_NOT_INTERACTABLE', 'target is not an adjustable control', resolvedTarget, currentDevice, actualState, attemptCount, lastAdjustmentMode, false)
987
+ }
988
+
989
+ if (!bounds) {
990
+ return buildFailure('ELEMENT_NOT_INTERACTABLE', 'adjustable control has no bounds', resolvedTarget, currentDevice, actualState, attemptCount, lastAdjustmentMode, false)
991
+ }
992
+
993
+ const min = typeof valueRange?.min === 'number' ? valueRange.min : null
994
+ const max = typeof valueRange?.max === 'number' ? valueRange.max : null
995
+ if (min === null || max === null || !Number.isFinite(min) || !Number.isFinite(max) || max <= min) {
996
+ return buildFailure('ELEMENT_NOT_INTERACTABLE', 'value_range unavailable', resolvedTarget, currentDevice, actualState, attemptCount, lastAdjustmentMode, false)
997
+ }
998
+
999
+ if (targetValue < min || targetValue > max) {
1000
+ return buildFailure('UNKNOWN', `targetValue ${targetValue} is outside the control range ${min}..${max}`, resolvedTarget, currentDevice, actualState, attemptCount, lastAdjustmentMode, false)
1001
+ }
1002
+
1003
+ const axis = ToolsInteract._controlAxis(currentEl, bounds)
1004
+ const targetPoint = ToolsInteract._buildConservativeControlPoint(bounds, targetValue, currentValue, min, max, axis)
1005
+ const currentPoint = currentValue !== null
1006
+ ? ToolsInteract._buildControlPoint(bounds, (currentValue - min) / (max - min), axis)
1007
+ : ToolsInteract._buildControlPoint(bounds, 0.5, axis)
1008
+
1009
+ const runVerification = async (): Promise<{
1010
+ verification: any
1011
+ observedState: { property: string; value: number | null; raw_value?: number | null } | null
1012
+ withinTolerance: boolean
1013
+ }> => {
1014
+ const verification = await ToolsInteract.expectStateHandler({
1015
+ element_id: resolvedTarget?.elementId ?? element_id,
1016
+ selector: selector ?? undefined,
1017
+ property,
1018
+ expected: targetValue,
1019
+ platform: resolvedPlatform,
1020
+ deviceId: resolvedDeviceId
1021
+ }) as any
1022
+
1023
+ const observedValue = typeof verification?.observed_state?.value === 'number'
1024
+ ? verification.observed_state.value
1025
+ : typeof verification?.observed_state?.raw_value === 'number'
1026
+ ? verification.observed_state.raw_value
1027
+ : null
1028
+ const observedState = observedValue !== null
1029
+ ? {
1030
+ property,
1031
+ value: observedValue,
1032
+ raw_value: typeof verification?.observed_state?.raw_value === 'number' ? verification.observed_state.raw_value : undefined
1033
+ }
1034
+ : actualState
1035
+
1036
+ return {
1037
+ verification,
1038
+ observedState,
1039
+ withinTolerance: observedValue !== null && Math.abs(observedValue - targetValue) <= normalizedTolerance
1040
+ }
1041
+ }
1042
+
1043
+ lastAdjustmentMode = 'coordinate'
1044
+ const primaryActionResult = await ToolsInteract.tapHandler({
1045
+ platform: resolvedPlatform,
1046
+ x: targetPoint.x,
1047
+ y: targetPoint.y,
1048
+ deviceId: resolvedDeviceId
1049
+ })
1050
+ let actionDevice = primaryActionResult.device ?? currentDevice
1051
+ attemptCount++
1052
+
1053
+ if (!primaryActionResult.success) {
1054
+ lastAdjustmentMode = 'gesture'
1055
+ const fallbackActionResult = await ToolsInteract.swipeHandler({
1056
+ platform: resolvedPlatform,
1057
+ x1: currentPoint.x,
1058
+ y1: currentPoint.y,
1059
+ x2: targetPoint.x,
1060
+ y2: targetPoint.y,
1061
+ duration: 220,
1062
+ deviceId: resolvedDeviceId
1063
+ })
1064
+ attemptCount++
1065
+
1066
+ if (!fallbackActionResult.success) {
1067
+ return buildFailure('UNKNOWN', fallbackActionResult.error ?? primaryActionResult.error ?? 'adjustment gesture failed', resolvedTarget, fallbackActionResult.device ?? primaryActionResult.device, actualState, attemptCount, lastAdjustmentMode, false)
1068
+ }
1069
+
1070
+ actionDevice = fallbackActionResult.device ?? actionDevice
1071
+ }
1072
+
1073
+ let verificationResult = await runVerification()
1074
+ let observedState = verificationResult.observedState
1075
+ lastObservedState = observedState
1076
+
1077
+ if (!verificationResult.withinTolerance && currentValue !== null) {
1078
+ lastAdjustmentMode = 'gesture'
1079
+ const fallbackActionResult = await ToolsInteract.swipeHandler({
1080
+ platform: resolvedPlatform,
1081
+ x1: currentPoint.x,
1082
+ y1: currentPoint.y,
1083
+ x2: targetPoint.x,
1084
+ y2: targetPoint.y,
1085
+ duration: 220,
1086
+ deviceId: resolvedDeviceId
1087
+ })
1088
+ attemptCount++
1089
+ if (!fallbackActionResult.success) {
1090
+ return buildFailure('UNKNOWN', fallbackActionResult.error ?? 'adjustment gesture failed', resolvedTarget, fallbackActionResult.device, observedState ?? actualState, attemptCount, lastAdjustmentMode, false)
1091
+ }
1092
+
1093
+ verificationResult = await runVerification()
1094
+ observedState = verificationResult.observedState
1095
+ }
1096
+
1097
+ const verification = verificationResult.verification
1098
+ lastObservedState = observedState
1099
+
1100
+ if (verificationResult.withinTolerance) {
1101
+ const uiFingerprintAfter = await ToolsInteract._captureFingerprint(resolvedPlatform, resolvedDeviceId)
1102
+ const base = buildActionExecutionResult({
1103
+ actionType,
1104
+ sourceModule: 'interact',
1105
+ device: actionDevice ?? currentDevice,
1106
+ selector: targetSelector,
1107
+ resolved: resolvedTarget,
1108
+ success: true,
1109
+ uiFingerprintBefore: fingerprintBefore,
1110
+ uiFingerprintAfter,
1111
+ details: {
1112
+ target_value: targetValue,
1113
+ tolerance: normalizedTolerance,
1114
+ property,
1115
+ attempts: attemptCount,
1116
+ adjustment_mode: lastAdjustmentMode,
1117
+ actual_state: observedState,
1118
+ converged: true,
1119
+ within_tolerance: true,
1120
+ reason: verification?.reason ?? 'control converged to target value'
1121
+ }
1122
+ }) as AdjustControlResponse
1123
+
1124
+ return {
1125
+ ...base,
1126
+ target_state: {
1127
+ property,
1128
+ target_value: targetValue,
1129
+ tolerance: normalizedTolerance
1130
+ },
1131
+ actual_state: observedState,
1132
+ within_tolerance: true,
1133
+ converged: true,
1134
+ attempts: attemptCount,
1135
+ adjustment_mode: lastAdjustmentMode
1136
+ }
1137
+ }
1138
+
1139
+ cachedResolvedMatch = {
1140
+ el: {
1141
+ ...currentEl,
1142
+ state: {
1143
+ ...(currentEl.state ?? null),
1144
+ ...(observedState ? {
1145
+ [observedState.property]: observedState.value,
1146
+ raw_value: observedState.raw_value ?? observedState.value
1147
+ } : {})
1148
+ }
1149
+ },
1150
+ idx: resolved.match.idx
1151
+ }
1152
+ }
1153
+
1154
+ const uiFingerprintAfter = await ToolsInteract._captureFingerprint(resolvedPlatform, resolvedDeviceId)
1155
+ return buildFailure('TIMEOUT', 'control did not converge within the allotted attempts', resolvedTarget, currentDevice, lastObservedState, attemptCount, lastAdjustmentMode, true, uiFingerprintAfter)
1156
+ }
1157
+
529
1158
  static async swipeHandler({ platform = 'android', x1, y1, x2, y2, duration, deviceId }: { platform?: 'android' | 'ios', x1: number, y1: number, x2: number, y2: number, duration: number, deviceId?: string }) {
530
1159
  const { interact, resolved } = await ToolsInteract.getInteractionService(platform, deviceId)
531
1160
  return await interact.swipe(x1, y1, x2, y2, duration, resolved.id)
@@ -546,7 +1175,7 @@ export class ToolsInteract {
546
1175
  return await interact.scrollToElement(selector, direction, maxScrolls, scrollAmount, resolved.id)
547
1176
  }
548
1177
 
549
- static async findElementHandler({ query, exact = false, timeoutMs = 3000, platform, deviceId }: { query: string, exact?: boolean, timeoutMs?: number, platform?: 'android' | 'ios', deviceId?: string }) {
1178
+ static async findElementHandler({ query, exact = false, timeoutMs = 3000, platform, deviceId }: { query: string, exact?: boolean, timeoutMs?: number, platform?: 'android' | 'ios', deviceId?: string }): Promise<FindElementResponse> {
550
1179
  // Try to use observe layer to fetch the current UI tree and perform a fast semantic search
551
1180
  const start = Date.now()
552
1181
  const deadline = start + timeoutMs
@@ -555,16 +1184,17 @@ export class ToolsInteract {
555
1184
  const q = normalize(query)
556
1185
  if (!q) return { found: false, error: 'Empty query' }
557
1186
 
558
- let best: UiElement | null = null
559
- let bestScore = 0
560
- let lastTree: any = null
1187
+ let best: RankedResolutionCandidate | null = null
1188
+ let bestTree: any = null
1189
+ let bestIterationCandidates: RankedResolutionCandidate[] = []
1190
+ let shouldStop = false
561
1191
 
562
- const scoreElement = (el: UiElement | null) => {
563
- if (!el || !el.visible) return 0
1192
+ const scoreElement = (el: UiElement | null, idx: number): RankedResolutionCandidate | null => {
1193
+ if (!el || !el.visible) return null
564
1194
  const bounds = el.bounds || [0,0,0,0]
565
- if (!Array.isArray(bounds) || bounds.length < 4) return 0
1195
+ if (!Array.isArray(bounds) || bounds.length < 4) return null
566
1196
  const [l,t,r,b] = bounds
567
- if (r <= l || b <= t) return 0
1197
+ if (r <= l || b <= t) return null
568
1198
  // Do not early-return on non-interactable elements — score them so we can locate their clickable ancestor later
569
1199
  const interactable = !!(el.clickable || el.enabled || el.focusable)
570
1200
 
@@ -574,44 +1204,80 @@ export class ToolsInteract {
574
1204
  const className = normalize(el.type ?? el.class ?? '')
575
1205
 
576
1206
  let score = 0
1207
+ let reason = 'best_scoring_candidate'
577
1208
  if (exact) {
578
- if (text && text === q) score = 1.0
579
- else if (content && content === q) score = 0.95
1209
+ if (text && text === q) {
1210
+ score = 1.0
1211
+ reason = 'exact_text_match'
1212
+ } else if (content && content === q) {
1213
+ score = 0.95
1214
+ reason = 'exact_content_desc_match'
1215
+ } else if (resourceId && resourceId === q) {
1216
+ score = 0.92
1217
+ reason = 'exact_resource_id_match'
1218
+ } else if (className && className === q) {
1219
+ score = 0.3
1220
+ reason = 'exact_class_match'
1221
+ }
580
1222
  } else {
581
- if (text && text === q) score = 1.0
582
- else if (content && content === q) score = 0.95
583
- else if (text && text.includes(q)) score = 0.6
584
- else if (content && content.includes(q)) score = 0.55
585
- else if (resourceId && resourceId.includes(q)) score = 0.7
586
- else if (className && className.includes(q)) score = 0.3
1223
+ if (text && text === q) {
1224
+ score = 1.0
1225
+ reason = 'exact_text_match'
1226
+ } else if (content && content === q) {
1227
+ score = 0.95
1228
+ reason = 'exact_content_desc_match'
1229
+ } else if (resourceId && resourceId === q) {
1230
+ score = 0.92
1231
+ reason = 'exact_resource_id_match'
1232
+ } else if (text && text.includes(q)) {
1233
+ score = 0.6
1234
+ reason = 'partial_text_match'
1235
+ } else if (content && content.includes(q)) {
1236
+ score = 0.55
1237
+ reason = 'partial_content_desc_match'
1238
+ } else if (resourceId && resourceId.includes(q)) {
1239
+ score = 0.7
1240
+ reason = 'partial_resource_id_match'
1241
+ } else if (className && className.includes(q)) {
1242
+ score = 0.3
1243
+ reason = 'partial_class_match'
1244
+ }
587
1245
  }
588
1246
  if (score > 0 && interactable) score += 0.05
589
- return score
1247
+ if (score <= 0) return null
1248
+ return { el, idx, score, reason, interactable }
590
1249
  }
591
1250
 
592
1251
  while (Date.now() <= deadline) {
593
1252
  try {
594
- const tree = await ToolsObserve.getUITreeHandler({ platform, deviceId })
595
- lastTree = tree
1253
+ const tree = await ToolsObserve.getUITreeHandler({ platform, deviceId })
596
1254
  if (tree && Array.isArray((tree as any).elements)) {
597
1255
  const elements = ((tree as any).elements as UiElement[])
1256
+ const iterationCandidates: RankedResolutionCandidate[] = []
1257
+ let iterationImprovedBest = false
598
1258
  for (let i = 0; i < elements.length; i++) {
599
1259
  const el = elements[i]
600
1260
  try {
601
- const s = scoreElement(el)
602
- const interactable = !!(el.clickable || el.enabled || (el as any).focusable)
603
- if (s > bestScore) {
604
- bestScore = s
605
- best = el as UiElement
606
- if (best) { best._index = i; best._interactable = interactable }
1261
+ const candidate = scoreElement(el, i)
1262
+ if (!candidate) continue
1263
+ iterationCandidates.push(candidate)
1264
+ if (!best || candidate.score > best.score) {
1265
+ best = candidate
1266
+ bestTree = tree
1267
+ iterationImprovedBest = true
1268
+ if (best.score >= 0.95) {
1269
+ shouldStop = true
1270
+ break
1271
+ }
607
1272
  }
608
- if (bestScore >= 0.95) break
609
1273
  } catch (e) { console.error('Error scoring element:', e) }
610
1274
  }
611
- if (bestScore >= 0.95) break
1275
+ if (iterationImprovedBest) {
1276
+ bestIterationCandidates = iterationCandidates.slice()
1277
+ }
612
1278
  }
613
1279
  } catch (e) { console.error('Error fetching UI tree:', e) }
614
- if (Date.now() > deadline) break
1280
+ if (shouldStop || Date.now() > deadline) break
615
1281
  await new Promise(r => setTimeout(r, 100))
616
1282
  }
617
1283
 
@@ -619,31 +1285,32 @@ export class ToolsInteract {
619
1285
 
620
1286
  // If the best match is not interactable, try to resolve an actionable ancestor.
621
1287
  try {
622
- const elements = (lastTree && Array.isArray(lastTree.elements)) ? (lastTree.elements as UiElement[]) : []
623
- const screen = lastTree?.resolution && typeof lastTree.resolution === 'object' ? lastTree.resolution as UiResolution : null
624
- let chosen = best as any
625
- const childBounds = Array.isArray(chosen?.bounds) ? chosen.bounds : null
1288
+ const elements = (bestTree && Array.isArray(bestTree.elements)) ? (bestTree.elements as UiElement[]) : []
1289
+ const screen = bestTree?.resolution && typeof bestTree.resolution === 'object' ? bestTree.resolution as UiResolution : null
1290
+ let chosen = best as { el: UiElement, idx: number }
1291
+ const childBounds = Array.isArray(chosen?.el?.bounds) ? chosen.el.bounds : null
626
1292
 
627
1293
  // Strategy 1: if parentId references an index, climb that chain
628
- let resolvedAncestor: any = null
629
- if (childBounds && (chosen.parentId !== undefined && chosen.parentId !== null)) {
1294
+ let resolvedAncestor: { el: UiElement, idx: number } | null = null
1295
+ if (childBounds && (chosen.el.parentId !== undefined && chosen.el.parentId !== null)) {
630
1296
  let cur = chosen
631
1297
  let safety = 0
632
- while (cur && safety < 20 && !(cur.clickable || cur.focusable) && (cur.parentId !== undefined && cur.parentId !== null)) {
633
- let pid = cur.parentId
1298
+ while (cur && safety < 20 && !(cur.el.clickable || cur.el.focusable) && (cur.el.parentId !== undefined && cur.el.parentId !== null)) {
1299
+ let pid = cur.el.parentId
634
1300
  let idx: number | null = null
635
1301
  if (typeof pid === 'number') idx = pid
636
1302
  else if (typeof pid === 'string' && /^\d+$/.test(pid)) idx = Number(pid)
637
1303
  // If parentId is not an index, try to find by matching resourceId or id field
638
1304
  if (idx !== null && elements[idx]) {
639
- cur = elements[idx]
640
- if (cur && (cur.clickable || cur.enabled || cur.focusable)) { resolvedAncestor = cur; break }
1305
+ cur = { el: elements[idx], idx }
1306
+ if (cur && (cur.el.clickable || cur.el.enabled || cur.el.focusable)) { resolvedAncestor = cur; break }
641
1307
  } else if (typeof pid === 'string') {
642
1308
  // fallback: search elements for matching resourceId or id
643
- const found = elements.find((el: UiElement)=> (el.resourceId === pid || el.id === pid))
1309
+ const foundIndex = elements.findIndex((el: UiElement)=> (el.resourceId === pid || el.id === pid))
1310
+ const found = foundIndex >= 0 ? elements[foundIndex] : null
644
1311
  if (found) {
645
- cur = found
646
- if (cur && (cur.clickable || cur.enabled || cur.focusable)) { resolvedAncestor = cur; break }
1312
+ cur = { el: found, idx: foundIndex }
1313
+ if (cur && (cur.el.clickable || cur.el.enabled || cur.el.focusable)) { resolvedAncestor = cur; break }
647
1314
  // otherwise continue climbing if this found element has its own parentId
648
1315
  } else {
649
1316
  break
@@ -659,62 +1326,77 @@ export class ToolsInteract {
659
1326
  if (!resolvedAncestor && childBounds) {
660
1327
  const [cl,ct,cr,cb] = childBounds
661
1328
  // find candidates that are clickable and contain the child bounds
662
- const candidates = elements.filter((el: UiElement)=> el && (el.clickable || el.focusable) && Array.isArray(el.bounds) && el.bounds!.length>=4).map((el: UiElement)=>({el, bounds: el.bounds! as number[]}))
663
- let bestCandidate: any = null
1329
+ const candidates = elements
1330
+ .map((el: UiElement, idx: number) => ({ el, idx }))
1331
+ .filter(({ el }) => el && (el.clickable || el.focusable) && Array.isArray(el.bounds) && el.bounds!.length >= 4)
1332
+ let bestCandidate: { el: UiElement, idx: number } | null = null
664
1333
  let bestCandidateArea = Infinity
665
1334
  for (const c of candidates) {
666
- const [pl,pt,pr,pb] = c.bounds
1335
+ const bounds = c.el.bounds as number[]
1336
+ const [pl,pt,pr,pb] = bounds
667
1337
  if (pl <= cl && pt <= ct && pr >= cr && pb >= cb) {
668
1338
  const area = (pr-pl) * (pb-pt)
669
- if (area < bestCandidateArea) { bestCandidateArea = area; bestCandidate = c.el }
1339
+ if (area < bestCandidateArea) { bestCandidateArea = area; bestCandidate = c }
670
1340
  }
671
1341
  }
672
1342
  if (bestCandidate) resolvedAncestor = bestCandidate
673
1343
  }
674
1344
 
675
1345
  if (resolvedAncestor) {
676
- best = resolvedAncestor
677
- // small score bump to reflect actionability
678
- bestScore = Math.min(1, bestScore + 0.02)
1346
+ best = {
1347
+ el: resolvedAncestor.el,
1348
+ idx: resolvedAncestor.idx,
1349
+ score: Math.min(1, best.score + 0.02),
1350
+ reason: 'clickable_parent_preferred',
1351
+ interactable: true
1352
+ }
679
1353
  }
680
1354
 
681
- if (best && !(best.clickable || best.focusable)) {
682
- const nearbyActionable = ToolsInteract._resolveNearbyActionableControl(elements, { el: best, idx: best._index ?? elements.indexOf(best) }, screen)
1355
+ if (best && !(best.el.clickable || best.el.focusable)) {
1356
+ const nearbyActionable = ToolsInteract._resolveNearbyActionableControl(elements, { el: best.el, idx: best.idx }, screen)
683
1357
  if (nearbyActionable) {
684
- best = nearbyActionable.el
685
- best._index = nearbyActionable.idx
686
- best._interactable = true
687
- best._sliderLike = nearbyActionable.sliderLike
1358
+ best = {
1359
+ el: nearbyActionable.el,
1360
+ idx: nearbyActionable.idx,
1361
+ score: Math.min(1, best.score + 0.02),
1362
+ reason: nearbyActionable.sliderLike ? 'slider_track_preferred' : 'nearby_actionable_control',
1363
+ interactable: true
1364
+ }
688
1365
  }
689
1366
  }
690
1367
  } catch (e) { console.error('Error resolving ancestor:', e) }
691
1368
 
692
1369
  if (!best) return { found: false, error: 'Element not found' }
693
1370
 
694
- const boundsObj = Array.isArray(best.bounds) ? { left: best.bounds[0], top: best.bounds[1], right: best.bounds[2], bottom: best.bounds[3] } : null
1371
+ const boundsObj = Array.isArray(best.el.bounds) ? { left: best.el.bounds[0], top: best.el.bounds[1], right: best.el.bounds[2], bottom: best.el.bounds[3] } : null
695
1372
  const tapCoordinates = boundsObj ? { x: Math.floor((boundsObj.left + boundsObj.right) / 2), y: Math.floor((boundsObj.top + boundsObj.bottom) / 2) } : null
1373
+ const uniqueRanked = bestIterationCandidates.filter((candidate, index, array) => index === array.findIndex((other) => other.idx === candidate.idx && other.el === candidate.el))
1374
+ const alternateCandidates = uniqueRanked
1375
+ .filter((candidate) => candidate.idx !== best.idx || candidate.el !== best.el)
1376
+ .slice(0, 3)
1377
+ .map((candidate) => ToolsInteract._summarizeResolutionCandidate(candidate))
696
1378
 
697
1379
  const outEl = {
698
- text: best.text ?? null,
699
- resourceId: best.resourceId ?? null,
700
- contentDesc: best.contentDescription ?? best.contentDesc ?? null,
701
- class: best.type ?? best.class ?? null,
1380
+ text: best.el.text ?? null,
1381
+ resourceId: best.el.resourceId ?? null,
1382
+ contentDesc: best.el.contentDescription ?? best.el.contentDesc ?? null,
1383
+ class: best.el.type ?? best.el.class ?? null,
702
1384
  bounds: boundsObj,
703
- clickable: !!best.clickable,
704
- enabled: !!best.enabled,
705
- stable_id: best.stable_id ?? null,
706
- role: best.role ?? null,
707
- test_tag: best.test_tag ?? null,
708
- selector: best.selector ?? null,
709
- semantic: best.semantic ?? null,
1385
+ clickable: !!best.el.clickable,
1386
+ enabled: !!best.el.enabled,
1387
+ stable_id: best.el.stable_id ?? null,
1388
+ role: best.el.role ?? null,
1389
+ test_tag: best.el.test_tag ?? null,
1390
+ selector: best.el.selector ?? null,
1391
+ semantic: best.el.semantic ?? null,
710
1392
  tapCoordinates,
711
1393
  telemetry: {
712
- matchedIndex: best?._index ?? null,
713
- matchedInteractable: !!best?._interactable,
714
- sliderLike: !!best?._sliderLike
1394
+ matchedIndex: best.idx ?? null,
1395
+ matchedInteractable: !!best.interactable,
1396
+ sliderLike: best.reason === 'slider_track_preferred'
715
1397
  }
716
1398
  }
717
- if (best?._sliderLike) {
1399
+ if (best.reason === 'slider_track_preferred') {
718
1400
  const isVertical = !!boundsObj && (boundsObj.bottom - boundsObj.top) > (boundsObj.right - boundsObj.left)
719
1401
  const interactionHint = {
720
1402
  kind: 'slider',
@@ -723,8 +1405,15 @@ export class ToolsInteract {
723
1405
  }
724
1406
  ;(outEl as any).interactionHint = interactionHint
725
1407
  }
726
- const scoreVal = Math.min(1, Number(bestScore.toFixed(3)))
727
- return { found: true, element: outEl, score: scoreVal, confidence: scoreVal }
1408
+ const scoreVal = Math.min(1, Number(best.score.toFixed(3)))
1409
+ const resolution: FindElementResolutionSummary = {
1410
+ confidence: scoreVal,
1411
+ reason: best.reason,
1412
+ fallback_available: alternateCandidates.length > 0,
1413
+ matched_count: uniqueRanked.length,
1414
+ alternates: alternateCandidates
1415
+ }
1416
+ return { found: true, element: outEl, score: scoreVal, confidence: scoreVal, resolution }
728
1417
  }
729
1418
 
730
1419
  static async waitForUIHandler({ selector, condition = 'exists', timeout_ms = 60000, poll_interval_ms = 300, match, retry = { max_attempts: 1, backoff_ms: 0 }, platform, deviceId }: { selector?: { text?: string, resource_id?: string, accessibility_id?: string, contains?: boolean }, condition?: 'exists'|'not_exists'|'visible'|'clickable', timeout_ms?: number, poll_interval_ms?: number, match?: { index?: number }, retry?: { max_attempts?: number, backoff_ms?: number }, platform?: 'android'|'ios', deviceId?: string }) {