mobile-debug-mcp 0.26.3 → 0.26.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,11 +10,13 @@ import { buildActionExecutionResult } from '../server/common.js'
10
10
  import type {
11
11
  ActionFailureCode,
12
12
  ActionTargetResolved,
13
+ AdjustControlResponse,
13
14
  FindElementResponse,
14
15
  ExpectElementVisibleResponse,
15
16
  ExpectStateResponse,
16
17
  ExpectScreenResponse,
17
18
  WaitForUIChangeResponse,
19
+ UIElementSemanticMetadata,
18
20
  UIElementState,
19
21
  TapElementResponse
20
22
  } from '../types.js'
@@ -47,7 +49,7 @@ interface UiElement {
47
49
  role?: string | null
48
50
  test_tag?: string | null
49
51
  selector?: { value: string | null, confidence: { score: number, reason: string } | null } | null
50
- semantic?: { is_clickable: boolean, is_container: boolean } | null
52
+ semantic?: UIElementSemanticMetadata | null
51
53
  }
52
54
 
53
55
  interface ResolvedUiElementContext {
@@ -334,6 +336,95 @@ export class ToolsInteract {
334
336
  }
335
337
  }
336
338
 
339
+ private static _isAdjustableControl(el: UiElement | null): boolean {
340
+ if (!el) return false
341
+ const type = ToolsInteract._normalize(el.type ?? el.class ?? '')
342
+ const role = ToolsInteract._normalize(el.role ?? '')
343
+ return !!el.state?.value_range || /slider|seekbar|stepper|adjustable|range/.test(type) || /slider|seekbar|stepper|adjustable|range/.test(role)
344
+ }
345
+
346
+ private static _isSemanticActionable(el: UiElement | null): boolean {
347
+ if (!el?.semantic) return false
348
+ if (el.semantic.adjustable) return true
349
+ return Array.isArray(el.semantic.supported_actions) && el.semantic.supported_actions.length > 0
350
+ }
351
+
352
+ private static _readNumericControlValue(el: UiElement | null, property: string): number | null {
353
+ if (!el?.state) return null
354
+ const stateValue = el.state[property as keyof UIElementState]
355
+ if (typeof stateValue === 'number' && Number.isFinite(stateValue)) return stateValue
356
+ if (property === 'value' || property === 'raw_value') {
357
+ const fallback = el.state.raw_value ?? el.state.value
358
+ if (typeof fallback === 'number' && Number.isFinite(fallback)) return fallback
359
+ }
360
+ return null
361
+ }
362
+
363
+ private static _buildControlPoint(bounds: [number, number, number, number], ratio: number, axis: 'horizontal' | 'vertical') {
364
+ const clampedRatio = Math.max(0, Math.min(1, ratio))
365
+ const [left, top, right, bottom] = bounds
366
+ const width = Math.max(1, right - left)
367
+ const height = Math.max(1, bottom - top)
368
+ const insetX = Math.max(8, Math.floor(width * 0.08))
369
+ const insetY = Math.max(8, Math.floor(height * 0.08))
370
+ if (axis === 'vertical') {
371
+ const usableHeight = Math.max(1, height - (insetY * 2))
372
+ return {
373
+ x: Math.floor((left + right) / 2),
374
+ y: Math.floor(bottom - insetY - (usableHeight * clampedRatio))
375
+ }
376
+ }
377
+ const usableWidth = Math.max(1, width - (insetX * 2))
378
+ return {
379
+ x: Math.floor(left + insetX + (usableWidth * clampedRatio)),
380
+ y: Math.floor((top + bottom) / 2)
381
+ }
382
+ }
383
+
384
+ private static _buildConservativeControlPoint(
385
+ bounds: [number, number, number, number],
386
+ targetValue: number,
387
+ currentValue: number | null,
388
+ min: number,
389
+ max: number,
390
+ axis: 'horizontal' | 'vertical'
391
+ ) {
392
+ const range = Math.max(1, max - min)
393
+ const targetRatio = (targetValue - min) / range
394
+ const stepRatio = 1 / range
395
+ const centerBias = stepRatio / 2
396
+ const direction = currentValue === null ? 0 : Math.sign(targetValue - currentValue)
397
+ const controlLengthPx = axis === 'vertical' ? Math.max(1, bounds[3] - bounds[1]) : Math.max(1, bounds[2] - bounds[0])
398
+ const edgeWindow = Math.max(3, Math.floor(range * 0.1))
399
+ const isNearLowEdge = targetValue - min <= edgeWindow
400
+ const isNearHighEdge = max - targetValue <= edgeWindow
401
+ const directionBias = direction > 0
402
+ ? -stepRatio * 0.15
403
+ : direction < 0
404
+ ? stepRatio * 0.65
405
+ : 0
406
+ const pixelBasedMargin = Math.min(0.03, Math.max(0.005, 2 / controlLengthPx))
407
+ const endpointMargin = Math.max(stepRatio * 0.5, pixelBasedMargin)
408
+ const edgeBias = isNearLowEdge
409
+ ? endpointMargin
410
+ : isNearHighEdge
411
+ ? Math.max(stepRatio * 0.4, endpointMargin * 0.75)
412
+ : 0
413
+ const safeRatio = Math.min(
414
+ 1 - (endpointMargin * 0.25),
415
+ Math.max(endpointMargin, targetRatio + centerBias + directionBias + edgeBias)
416
+ )
417
+ return ToolsInteract._buildControlPoint(bounds, safeRatio, axis)
418
+ }
419
+
420
+ private static _controlAxis(el: UiElement, bounds: [number, number, number, number]): 'horizontal' | 'vertical' {
421
+ const type = ToolsInteract._normalize(el.type ?? el.class ?? '')
422
+ const role = ToolsInteract._normalize(el.role ?? '')
423
+ if (/vertical/.test(type) || /vertical/.test(role)) return 'vertical'
424
+ if (/horizontal/.test(type) || /horizontal/.test(role)) return 'horizontal'
425
+ return (bounds[3] - bounds[1]) > (bounds[2] - bounds[0]) ? 'vertical' : 'horizontal'
426
+ }
427
+
337
428
  private static _actionFailure(
338
429
  actionType: string,
339
430
  selector: Record<string, unknown> | null,
@@ -376,12 +467,12 @@ export class ToolsInteract {
376
467
 
377
468
  private static _resolveActionableAncestor(elements: UiElement[], chosen: { el: UiElement, idx: number } | null): { el: UiElement, idx: number } | null {
378
469
  if (!chosen) return null
379
- if (chosen.el.clickable || chosen.el.focusable) return chosen
470
+ if (chosen.el.clickable || chosen.el.focusable || ToolsInteract._isSemanticActionable(chosen.el)) return chosen
380
471
 
381
472
  let current = chosen
382
473
  let safety = 0
383
474
 
384
- while (safety < 20 && current.el && !(current.el.clickable || current.el.focusable) && current.el.parentId !== undefined && current.el.parentId !== null) {
475
+ while (safety < 20 && current.el && !(current.el.clickable || current.el.focusable || ToolsInteract._isSemanticActionable(current.el)) && current.el.parentId !== undefined && current.el.parentId !== null) {
385
476
  const parentId = current.el.parentId
386
477
  let parentIndex: number | null = null
387
478
 
@@ -390,12 +481,12 @@ export class ToolsInteract {
390
481
 
391
482
  if (parentIndex !== null && elements[parentIndex]) {
392
483
  current = { el: elements[parentIndex], idx: parentIndex }
393
- if (current.el.clickable || current.el.focusable) return current
484
+ if (current.el.clickable || current.el.focusable || ToolsInteract._isSemanticActionable(current.el)) return current
394
485
  } else if (typeof parentId === 'string') {
395
486
  const foundIndex = elements.findIndex((el) => el.resourceId === parentId || el.id === parentId)
396
487
  if (foundIndex === -1) break
397
488
  current = { el: elements[foundIndex], idx: foundIndex }
398
- if (current.el.clickable || current.el.focusable) return current
489
+ if (current.el.clickable || current.el.focusable || ToolsInteract._isSemanticActionable(current.el)) return current
399
490
  } else {
400
491
  break
401
492
  }
@@ -412,7 +503,7 @@ export class ToolsInteract {
412
503
 
413
504
  for (let i = 0; i < elements.length; i++) {
414
505
  const el = elements[i]
415
- if (!el || !(el.clickable || el.focusable)) continue
506
+ if (!el || !(el.clickable || el.focusable || ToolsInteract._isSemanticActionable(el))) continue
416
507
  const bounds = ToolsInteract._normalizeBounds(el.bounds)
417
508
  if (!bounds) continue
418
509
  const [pl, pt, pr, pb] = bounds
@@ -570,6 +661,507 @@ export class ToolsInteract {
570
661
  })
571
662
  }
572
663
 
664
+ static async adjustControlHandler({
665
+ selector,
666
+ element_id,
667
+ property = 'value',
668
+ targetValue,
669
+ tolerance = 0,
670
+ maxAttempts = 3,
671
+ platform,
672
+ deviceId
673
+ }: {
674
+ selector?: { text?: string, resource_id?: string, accessibility_id?: string, contains?: boolean },
675
+ element_id?: string,
676
+ property?: string,
677
+ targetValue: number,
678
+ tolerance?: number,
679
+ maxAttempts?: number,
680
+ platform?: 'android' | 'ios',
681
+ deviceId?: string
682
+ }): Promise<AdjustControlResponse> {
683
+ const actionType = 'adjust_control'
684
+ const targetSelector = selector ?? (element_id ? { elementId: element_id } : null)
685
+ const normalizedTolerance = Number.isFinite(tolerance) ? Math.max(0, tolerance) : 0
686
+ const attemptsLimit = Math.max(1, Math.floor(Number(maxAttempts) || 1))
687
+ const sourcePlatform: 'android' | 'ios' = platform || 'android'
688
+ let resolvedPlatform = sourcePlatform
689
+ let resolvedDeviceId = deviceId
690
+ const fingerprintBefore = await ToolsInteract._captureFingerprint(resolvedPlatform, resolvedDeviceId)
691
+ let semanticFallbackElement: FindElementResponse['element'] | null = null
692
+
693
+ const buildFailure = (
694
+ failureCode: ActionFailureCode,
695
+ reason: string,
696
+ resolved: ActionTargetResolved | null,
697
+ device: any,
698
+ actualState: { property: string; value: number | null; raw_value?: number | null } | null,
699
+ attempts: number,
700
+ adjustmentMode: 'semantic' | 'gesture' | 'coordinate' = 'gesture',
701
+ retryable = false,
702
+ uiFingerprintAfter: string | null = null
703
+ ): AdjustControlResponse => {
704
+ const base = buildActionExecutionResult({
705
+ actionType,
706
+ sourceModule: 'interact',
707
+ device,
708
+ selector: targetSelector,
709
+ resolved,
710
+ success: false,
711
+ uiFingerprintBefore: fingerprintBefore,
712
+ uiFingerprintAfter,
713
+ failure: { failureCode, retryable },
714
+ details: {
715
+ target_value: targetValue,
716
+ tolerance: normalizedTolerance,
717
+ property,
718
+ attempts,
719
+ adjustment_mode: adjustmentMode,
720
+ actual_state: actualState,
721
+ converged: false,
722
+ within_tolerance: false,
723
+ reason
724
+ }
725
+ }) as AdjustControlResponse
726
+
727
+ return {
728
+ ...base,
729
+ target_state: {
730
+ property,
731
+ target_value: targetValue,
732
+ tolerance: normalizedTolerance
733
+ },
734
+ actual_state: actualState,
735
+ within_tolerance: false,
736
+ converged: false,
737
+ attempts,
738
+ adjustment_mode: adjustmentMode
739
+ }
740
+ }
741
+
742
+ const resolveCurrentMatch = async (): Promise<{
743
+ tree: any
744
+ device: any
745
+ match: { el: UiElement, idx: number } | null
746
+ resolvedTarget: ActionTargetResolved | null
747
+ } | null> => {
748
+ const tree = await ToolsObserve.getUITreeHandler({ platform: resolvedPlatform, deviceId: resolvedDeviceId }) as any
749
+ resolvedPlatform = tree?.device?.platform === 'ios' ? 'ios' : resolvedPlatform
750
+ resolvedDeviceId = tree?.device?.id || resolvedDeviceId
751
+ const elements = Array.isArray(tree?.elements) ? tree.elements as UiElement[] : []
752
+
753
+ if (element_id) {
754
+ const stored = ToolsInteract._resolvedUiElements.get(element_id)
755
+ if (!stored) {
756
+ return null
757
+ }
758
+ const current = ToolsInteract._findCurrentResolvedElement(elements, resolvedPlatform, resolvedDeviceId, stored)
759
+ if (!current) {
760
+ return null
761
+ }
762
+ return {
763
+ tree,
764
+ device: tree?.device,
765
+ match: { el: current.el, idx: current.index },
766
+ resolvedTarget: ToolsInteract._resolvedTargetFromElement(
767
+ ToolsInteract._computeElementId(resolvedPlatform, resolvedDeviceId, current.el, current.index),
768
+ current.el,
769
+ current.index
770
+ )
771
+ }
772
+ }
773
+
774
+ if (semanticFallbackElement) {
775
+ const fallbackBounds = ToolsInteract._normalizeBounds(
776
+ Array.isArray(semanticFallbackElement.bounds)
777
+ ? semanticFallbackElement.bounds
778
+ : semanticFallbackElement.bounds && typeof semanticFallbackElement.bounds === 'object'
779
+ ? [
780
+ Number((semanticFallbackElement.bounds as any).left),
781
+ Number((semanticFallbackElement.bounds as any).top),
782
+ Number((semanticFallbackElement.bounds as any).right),
783
+ Number((semanticFallbackElement.bounds as any).bottom)
784
+ ]
785
+ : null
786
+ )
787
+
788
+ let matchedIndex = -1
789
+ if (fallbackBounds) {
790
+ matchedIndex = elements.findIndex((el) => {
791
+ const bounds = ToolsInteract._normalizeBounds(el.bounds)
792
+ return !!bounds && bounds[0] === fallbackBounds[0] && bounds[1] === fallbackBounds[1] && bounds[2] === fallbackBounds[2] && bounds[3] === fallbackBounds[3]
793
+ })
794
+ }
795
+
796
+ if (matchedIndex === -1 && fallbackBounds) {
797
+ const fallbackCenterX = Math.floor((fallbackBounds[0] + fallbackBounds[2]) / 2)
798
+ const fallbackCenterY = Math.floor((fallbackBounds[1] + fallbackBounds[3]) / 2)
799
+ let bestDistance = Infinity
800
+ for (let i = 0; i < elements.length; i++) {
801
+ const el = elements[i]
802
+ if (!ToolsInteract._isAdjustableControl(el)) continue
803
+ const bounds = ToolsInteract._normalizeBounds(el.bounds)
804
+ if (!bounds) continue
805
+ const centerX = Math.floor((bounds[0] + bounds[2]) / 2)
806
+ const centerY = Math.floor((bounds[1] + bounds[3]) / 2)
807
+ const distance = Math.abs(centerX - fallbackCenterX) + Math.abs(centerY - fallbackCenterY)
808
+ if (distance < bestDistance) {
809
+ bestDistance = distance
810
+ matchedIndex = i
811
+ }
812
+ }
813
+ }
814
+
815
+ if (matchedIndex >= 0 && elements[matchedIndex]) {
816
+ const matched = { el: elements[matchedIndex], idx: matchedIndex }
817
+ return {
818
+ tree,
819
+ device: tree?.device,
820
+ match: matched,
821
+ resolvedTarget: ToolsInteract._resolvedTargetFromElement(
822
+ ToolsInteract._computeElementId(resolvedPlatform, resolvedDeviceId, matched.el, matched.idx),
823
+ matched.el,
824
+ matched.idx
825
+ )
826
+ }
827
+ }
828
+ }
829
+
830
+ if (selector) {
831
+ const matched = ToolsInteract._findFirstMatchingElement(elements, selector)
832
+ if (!matched) {
833
+ return null
834
+ }
835
+ return {
836
+ tree,
837
+ device: tree?.device,
838
+ match: matched,
839
+ resolvedTarget: ToolsInteract._resolvedTargetFromElement(
840
+ ToolsInteract._computeElementId(resolvedPlatform, resolvedDeviceId, matched.el, matched.idx),
841
+ matched.el,
842
+ matched.idx
843
+ )
844
+ }
845
+ }
846
+
847
+ return null
848
+ }
849
+
850
+ if (!selector && !element_id) {
851
+ return buildFailure('ELEMENT_NOT_FOUND', 'selector or element_id is required', null, undefined, null, 0, 'gesture', false)
852
+ }
853
+
854
+ if (selector && !element_id) {
855
+ const waitResult = await ToolsInteract.waitForUIHandler({
856
+ selector,
857
+ condition: 'clickable',
858
+ timeout_ms: 5000,
859
+ poll_interval_ms: 300,
860
+ platform: resolvedPlatform,
861
+ deviceId: resolvedDeviceId
862
+ }) as any
863
+
864
+ if (waitResult?.status !== 'success' || !waitResult?.element?.elementId) {
865
+ const semanticQuery = selector.text ?? selector.resource_id ?? selector.accessibility_id ?? ''
866
+ if (!semanticQuery) {
867
+ return buildFailure(
868
+ waitResult?.error?.code === 'ELEMENT_NOT_FOUND' ? 'ELEMENT_NOT_FOUND' : 'TIMEOUT',
869
+ waitResult?.error?.message ?? 'adjustable control not found',
870
+ null,
871
+ waitResult?.device,
872
+ null,
873
+ 0,
874
+ 'gesture',
875
+ waitResult?.error?.code === 'ELEMENT_NOT_FOUND'
876
+ )
877
+ }
878
+
879
+ const fallback = await ToolsInteract.findElementHandler({
880
+ query: semanticQuery,
881
+ exact: false,
882
+ timeoutMs: 3000,
883
+ platform: resolvedPlatform,
884
+ deviceId: resolvedDeviceId
885
+ })
886
+
887
+ if (!fallback.found || !fallback.element) {
888
+ return buildFailure(
889
+ 'ELEMENT_NOT_FOUND',
890
+ waitResult?.error?.message ?? 'adjustable control not found',
891
+ null,
892
+ waitResult?.device,
893
+ null,
894
+ 0,
895
+ 'gesture',
896
+ true
897
+ )
898
+ }
899
+
900
+ semanticFallbackElement = fallback.element
901
+ } else {
902
+ element_id = waitResult.element.elementId
903
+ semanticFallbackElement = null
904
+ }
905
+ }
906
+
907
+ let lastObservedState: { property: string; value: number | null; raw_value?: number | null } | null = null
908
+ let lastAdjustmentMode: 'semantic' | 'gesture' | 'coordinate' = 'gesture'
909
+ let resolvedTarget: ActionTargetResolved | null = null
910
+ let currentDevice: any = undefined
911
+ let attemptCount = 0
912
+ let cachedResolvedMatch: { el: UiElement, idx: number } | null = null
913
+
914
+ for (let attempt = 0; attempt < attemptsLimit; attempt++) {
915
+ const resolved: {
916
+ tree: any
917
+ device: any
918
+ match: { el: UiElement, idx: number } | null
919
+ resolvedTarget: ActionTargetResolved | null
920
+ } | null = cachedResolvedMatch
921
+ ? {
922
+ tree: null,
923
+ device: currentDevice,
924
+ match: cachedResolvedMatch,
925
+ resolvedTarget: ToolsInteract._resolvedTargetFromElement(
926
+ ToolsInteract._computeElementId(resolvedPlatform, resolvedDeviceId, cachedResolvedMatch.el, cachedResolvedMatch.idx),
927
+ cachedResolvedMatch.el,
928
+ cachedResolvedMatch.idx
929
+ )
930
+ }
931
+ : await resolveCurrentMatch()
932
+ if (!resolved || !resolved.match || !resolved.resolvedTarget) {
933
+ return buildFailure('STALE_REFERENCE', 'adjustable control could not be resolved', resolvedTarget, currentDevice, lastObservedState, attemptCount, lastAdjustmentMode, true)
934
+ }
935
+
936
+ currentDevice = resolved.device
937
+ resolvedTarget = resolved.resolvedTarget
938
+ const currentEl: UiElement = resolved.match.el
939
+ cachedResolvedMatch = resolved.match
940
+ const bounds = ToolsInteract._normalizeBounds(currentEl.bounds)
941
+ const valueRange = currentEl.state?.value_range ?? null
942
+ const currentValue = ToolsInteract._readNumericControlValue(currentEl, property)
943
+ const actualState = currentValue !== null
944
+ ? { property, value: currentValue, raw_value: typeof currentEl.state?.raw_value === 'number' ? currentEl.state.raw_value : undefined }
945
+ : null
946
+
947
+ lastObservedState = actualState
948
+
949
+ if (property !== 'value' && property !== 'raw_value') {
950
+ return buildFailure('ELEMENT_NOT_INTERACTABLE', 'adjust_control currently supports numeric value and raw_value properties only', resolvedTarget, currentDevice, actualState, attemptCount, lastAdjustmentMode, false)
951
+ }
952
+
953
+ if (currentValue !== null && Math.abs(currentValue - targetValue) <= normalizedTolerance) {
954
+ const uiFingerprintAfter = await ToolsInteract._captureFingerprint(resolvedPlatform, resolvedDeviceId)
955
+ const base = buildActionExecutionResult({
956
+ actionType,
957
+ sourceModule: 'interact',
958
+ device: currentDevice,
959
+ selector: targetSelector,
960
+ resolved: resolvedTarget,
961
+ success: true,
962
+ uiFingerprintBefore: fingerprintBefore,
963
+ uiFingerprintAfter,
964
+ details: {
965
+ target_value: targetValue,
966
+ tolerance: normalizedTolerance,
967
+ property,
968
+ attempts: attemptCount,
969
+ adjustment_mode: 'semantic',
970
+ actual_state: actualState,
971
+ converged: true,
972
+ within_tolerance: true,
973
+ reason: 'control already within tolerance'
974
+ }
975
+ }) as AdjustControlResponse
976
+
977
+ return {
978
+ ...base,
979
+ target_state: {
980
+ property,
981
+ target_value: targetValue,
982
+ tolerance: normalizedTolerance
983
+ },
984
+ actual_state: actualState,
985
+ within_tolerance: true,
986
+ converged: true,
987
+ attempts: attemptCount,
988
+ adjustment_mode: 'semantic'
989
+ }
990
+ }
991
+
992
+ if (!ToolsInteract._isAdjustableControl(currentEl)) {
993
+ return buildFailure('ELEMENT_NOT_INTERACTABLE', 'target is not an adjustable control', resolvedTarget, currentDevice, actualState, attemptCount, lastAdjustmentMode, false)
994
+ }
995
+
996
+ if (!bounds) {
997
+ return buildFailure('ELEMENT_NOT_INTERACTABLE', 'adjustable control has no bounds', resolvedTarget, currentDevice, actualState, attemptCount, lastAdjustmentMode, false)
998
+ }
999
+
1000
+ const min = typeof valueRange?.min === 'number' ? valueRange.min : null
1001
+ const max = typeof valueRange?.max === 'number' ? valueRange.max : null
1002
+ if (min === null || max === null || !Number.isFinite(min) || !Number.isFinite(max) || max <= min) {
1003
+ return buildFailure('ELEMENT_NOT_INTERACTABLE', 'value_range unavailable', resolvedTarget, currentDevice, actualState, attemptCount, lastAdjustmentMode, false)
1004
+ }
1005
+
1006
+ if (targetValue < min || targetValue > max) {
1007
+ return buildFailure('UNKNOWN', `targetValue ${targetValue} is outside the control range ${min}..${max}`, resolvedTarget, currentDevice, actualState, attemptCount, lastAdjustmentMode, false)
1008
+ }
1009
+
1010
+ const axis = ToolsInteract._controlAxis(currentEl, bounds)
1011
+ const targetPoint = ToolsInteract._buildConservativeControlPoint(bounds, targetValue, currentValue, min, max, axis)
1012
+ const currentPoint = currentValue !== null
1013
+ ? ToolsInteract._buildControlPoint(bounds, (currentValue - min) / (max - min), axis)
1014
+ : ToolsInteract._buildControlPoint(bounds, 0.5, axis)
1015
+
1016
+ const runVerification = async (): Promise<{
1017
+ verification: any
1018
+ observedState: { property: string; value: number | null; raw_value?: number | null } | null
1019
+ withinTolerance: boolean
1020
+ }> => {
1021
+ const verification = await ToolsInteract.expectStateHandler({
1022
+ element_id: resolvedTarget?.elementId ?? element_id,
1023
+ selector: selector ?? undefined,
1024
+ property,
1025
+ expected: targetValue,
1026
+ platform: resolvedPlatform,
1027
+ deviceId: resolvedDeviceId
1028
+ }) as any
1029
+
1030
+ const observedValue = typeof verification?.observed_state?.value === 'number'
1031
+ ? verification.observed_state.value
1032
+ : typeof verification?.observed_state?.raw_value === 'number'
1033
+ ? verification.observed_state.raw_value
1034
+ : null
1035
+ const observedState = observedValue !== null
1036
+ ? {
1037
+ property,
1038
+ value: observedValue,
1039
+ raw_value: typeof verification?.observed_state?.raw_value === 'number' ? verification.observed_state.raw_value : undefined
1040
+ }
1041
+ : actualState
1042
+
1043
+ return {
1044
+ verification,
1045
+ observedState,
1046
+ withinTolerance: observedValue !== null && Math.abs(observedValue - targetValue) <= normalizedTolerance
1047
+ }
1048
+ }
1049
+
1050
+ lastAdjustmentMode = 'coordinate'
1051
+ const primaryActionResult = await ToolsInteract.tapHandler({
1052
+ platform: resolvedPlatform,
1053
+ x: targetPoint.x,
1054
+ y: targetPoint.y,
1055
+ deviceId: resolvedDeviceId
1056
+ })
1057
+ let actionDevice = primaryActionResult.device ?? currentDevice
1058
+ attemptCount++
1059
+
1060
+ if (!primaryActionResult.success) {
1061
+ lastAdjustmentMode = 'gesture'
1062
+ const fallbackActionResult = await ToolsInteract.swipeHandler({
1063
+ platform: resolvedPlatform,
1064
+ x1: currentPoint.x,
1065
+ y1: currentPoint.y,
1066
+ x2: targetPoint.x,
1067
+ y2: targetPoint.y,
1068
+ duration: 220,
1069
+ deviceId: resolvedDeviceId
1070
+ })
1071
+ attemptCount++
1072
+
1073
+ if (!fallbackActionResult.success) {
1074
+ return buildFailure('UNKNOWN', fallbackActionResult.error ?? primaryActionResult.error ?? 'adjustment gesture failed', resolvedTarget, fallbackActionResult.device ?? primaryActionResult.device, actualState, attemptCount, lastAdjustmentMode, false)
1075
+ }
1076
+
1077
+ actionDevice = fallbackActionResult.device ?? actionDevice
1078
+ }
1079
+
1080
+ let verificationResult = await runVerification()
1081
+ let observedState = verificationResult.observedState
1082
+ lastObservedState = observedState
1083
+
1084
+ if (!verificationResult.withinTolerance && currentValue !== null) {
1085
+ lastAdjustmentMode = 'gesture'
1086
+ const fallbackActionResult = await ToolsInteract.swipeHandler({
1087
+ platform: resolvedPlatform,
1088
+ x1: currentPoint.x,
1089
+ y1: currentPoint.y,
1090
+ x2: targetPoint.x,
1091
+ y2: targetPoint.y,
1092
+ duration: 220,
1093
+ deviceId: resolvedDeviceId
1094
+ })
1095
+ attemptCount++
1096
+ if (!fallbackActionResult.success) {
1097
+ return buildFailure('UNKNOWN', fallbackActionResult.error ?? 'adjustment gesture failed', resolvedTarget, fallbackActionResult.device, observedState ?? actualState, attemptCount, lastAdjustmentMode, false)
1098
+ }
1099
+
1100
+ verificationResult = await runVerification()
1101
+ observedState = verificationResult.observedState
1102
+ }
1103
+
1104
+ const verification = verificationResult.verification
1105
+ lastObservedState = observedState
1106
+
1107
+ if (verificationResult.withinTolerance) {
1108
+ const uiFingerprintAfter = await ToolsInteract._captureFingerprint(resolvedPlatform, resolvedDeviceId)
1109
+ const base = buildActionExecutionResult({
1110
+ actionType,
1111
+ sourceModule: 'interact',
1112
+ device: actionDevice ?? currentDevice,
1113
+ selector: targetSelector,
1114
+ resolved: resolvedTarget,
1115
+ success: true,
1116
+ uiFingerprintBefore: fingerprintBefore,
1117
+ uiFingerprintAfter,
1118
+ details: {
1119
+ target_value: targetValue,
1120
+ tolerance: normalizedTolerance,
1121
+ property,
1122
+ attempts: attemptCount,
1123
+ adjustment_mode: lastAdjustmentMode,
1124
+ actual_state: observedState,
1125
+ converged: true,
1126
+ within_tolerance: true,
1127
+ reason: verification?.reason ?? 'control converged to target value'
1128
+ }
1129
+ }) as AdjustControlResponse
1130
+
1131
+ return {
1132
+ ...base,
1133
+ target_state: {
1134
+ property,
1135
+ target_value: targetValue,
1136
+ tolerance: normalizedTolerance
1137
+ },
1138
+ actual_state: observedState,
1139
+ within_tolerance: true,
1140
+ converged: true,
1141
+ attempts: attemptCount,
1142
+ adjustment_mode: lastAdjustmentMode
1143
+ }
1144
+ }
1145
+
1146
+ cachedResolvedMatch = {
1147
+ el: {
1148
+ ...currentEl,
1149
+ state: {
1150
+ ...(currentEl.state ?? null),
1151
+ ...(observedState ? {
1152
+ [observedState.property]: observedState.value,
1153
+ raw_value: observedState.raw_value ?? observedState.value
1154
+ } : {})
1155
+ }
1156
+ },
1157
+ idx: resolved.match.idx
1158
+ }
1159
+ }
1160
+
1161
+ const uiFingerprintAfter = await ToolsInteract._captureFingerprint(resolvedPlatform, resolvedDeviceId)
1162
+ return buildFailure('TIMEOUT', 'control did not converge within the allotted attempts', resolvedTarget, currentDevice, lastObservedState, attemptCount, lastAdjustmentMode, true, uiFingerprintAfter)
1163
+ }
1164
+
573
1165
  static async swipeHandler({ platform = 'android', x1, y1, x2, y2, duration, deviceId }: { platform?: 'android' | 'ios', x1: number, y1: number, x2: number, y2: number, duration: number, deviceId?: string }) {
574
1166
  const { interact, resolved } = await ToolsInteract.getInteractionService(platform, deviceId)
575
1167
  return await interact.swipe(x1, y1, x2, y2, duration, resolved.id)
@@ -611,12 +1203,14 @@ export class ToolsInteract {
611
1203
  const [l,t,r,b] = bounds
612
1204
  if (r <= l || b <= t) return null
613
1205
  // Do not early-return on non-interactable elements — score them so we can locate their clickable ancestor later
614
- const interactable = !!(el.clickable || el.enabled || el.focusable)
1206
+ const interactable = !!(el.clickable || el.enabled || el.focusable || ToolsInteract._isSemanticActionable(el))
615
1207
 
616
1208
  const text = normalize(el.text ?? el.label ?? el.value ?? '')
617
1209
  const content = normalize(el.contentDescription ?? el.contentDesc ?? el.accessibilityLabel ?? '')
618
1210
  const resourceId = normalize(el.resourceId ?? el.resourceID ?? el.id ?? '')
619
1211
  const className = normalize(el.type ?? el.class ?? '')
1212
+ const semanticRole = normalize(el.semantic?.semantic_role ?? '')
1213
+ const semanticActions = Array.isArray(el.semantic?.supported_actions) ? el.semantic.supported_actions.map((action) => normalize(action)).filter(Boolean) : []
620
1214
 
621
1215
  let score = 0
622
1216
  let reason = 'best_scoring_candidate'
@@ -658,6 +1252,29 @@ export class ToolsInteract {
658
1252
  reason = 'partial_class_match'
659
1253
  }
660
1254
  }
1255
+ if (!exact) {
1256
+ if (!score && semanticRole && semanticRole.includes(q)) {
1257
+ score = 0.5
1258
+ reason = 'semantic_role_match'
1259
+ }
1260
+ if (semanticActions.some((action) => action.includes(q))) {
1261
+ score = Math.max(score, score > 0 ? 0.65 : 0.6)
1262
+ reason = 'semantic_action_match'
1263
+ }
1264
+ if (score === 0 && el.semantic?.adjustable && /slider|stepper|dropdown|segment|control|adjust/.test(q)) {
1265
+ score = 0.45
1266
+ reason = 'semantic_control_match'
1267
+ }
1268
+ } else {
1269
+ if (!score && semanticRole && semanticRole === q) {
1270
+ score = 0.5
1271
+ reason = 'semantic_role_match'
1272
+ }
1273
+ if (semanticActions.some((action) => action === q)) {
1274
+ score = Math.max(score, score > 0 ? 0.65 : 0.6)
1275
+ reason = 'semantic_action_match'
1276
+ }
1277
+ }
661
1278
  if (score > 0 && interactable) score += 0.05
662
1279
  if (score <= 0) return null
663
1280
  return { el, idx, score, reason, interactable }
@@ -767,7 +1384,7 @@ export class ToolsInteract {
767
1384
  }
768
1385
  }
769
1386
 
770
- if (best && !(best.el.clickable || best.el.focusable)) {
1387
+ if (best && !(best.el.clickable || best.el.focusable || ToolsInteract._isSemanticActionable(best.el))) {
771
1388
  const nearbyActionable = ToolsInteract._resolveNearbyActionableControl(elements, { el: best.el, idx: best.idx }, screen)
772
1389
  if (nearbyActionable) {
773
1390
  best = {