mobile-debug-mcp 0.26.3 → 0.26.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/interact/index.js +496 -7
- package/dist/observe/ios.js +48 -4
- package/dist/server/tool-definitions.js +56 -0
- package/dist/server/tool-handlers.js +25 -0
- package/dist/server-core.js +1 -1
- package/dist/utils/android/utils.js +37 -5
- package/docs/CHANGELOG.md +6 -0
- package/docs/ROADMAP.md +69 -14
- package/docs/rfcs/008-adjustable-control-support-and-semantic-value-manipulation.md +273 -0
- package/docs/rfcs/009-semantic-control-modeling-for-custom-and-composite-controls.md +238 -0
- package/docs/specs/mcp-tooling-spec-v1.md +23 -1
- package/docs/tools/interact.md +21 -0
- package/package.json +1 -1
- package/src/interact/index.ts +625 -8
- package/src/observe/ios.ts +43 -4
- package/src/server/tool-definitions.ts +56 -0
- package/src/server/tool-handlers.ts +26 -0
- package/src/server-core.ts +1 -1
- package/src/types.ts +21 -0
- package/src/utils/android/utils.ts +32 -5
- package/test/unit/interact/adjust_control.test.ts +365 -0
- package/test/unit/observe/find_element.test.ts +46 -0
- package/test/unit/observe/state_extraction.test.ts +89 -2
- package/test/unit/server/contract.test.ts +8 -0
- package/test/unit/server/response_shapes.test.ts +39 -0
package/src/interact/index.ts
CHANGED
|
@@ -10,11 +10,13 @@ import { buildActionExecutionResult } from '../server/common.js'
|
|
|
10
10
|
import type {
|
|
11
11
|
ActionFailureCode,
|
|
12
12
|
ActionTargetResolved,
|
|
13
|
+
AdjustControlResponse,
|
|
13
14
|
FindElementResponse,
|
|
14
15
|
ExpectElementVisibleResponse,
|
|
15
16
|
ExpectStateResponse,
|
|
16
17
|
ExpectScreenResponse,
|
|
17
18
|
WaitForUIChangeResponse,
|
|
19
|
+
UIElementSemanticMetadata,
|
|
18
20
|
UIElementState,
|
|
19
21
|
TapElementResponse
|
|
20
22
|
} from '../types.js'
|
|
@@ -47,7 +49,7 @@ interface UiElement {
|
|
|
47
49
|
role?: string | null
|
|
48
50
|
test_tag?: string | null
|
|
49
51
|
selector?: { value: string | null, confidence: { score: number, reason: string } | null } | null
|
|
50
|
-
semantic?:
|
|
52
|
+
semantic?: UIElementSemanticMetadata | null
|
|
51
53
|
}
|
|
52
54
|
|
|
53
55
|
interface ResolvedUiElementContext {
|
|
@@ -334,6 +336,95 @@ export class ToolsInteract {
|
|
|
334
336
|
}
|
|
335
337
|
}
|
|
336
338
|
|
|
339
|
+
private static _isAdjustableControl(el: UiElement | null): boolean {
|
|
340
|
+
if (!el) return false
|
|
341
|
+
const type = ToolsInteract._normalize(el.type ?? el.class ?? '')
|
|
342
|
+
const role = ToolsInteract._normalize(el.role ?? '')
|
|
343
|
+
return !!el.state?.value_range || /slider|seekbar|stepper|adjustable|range/.test(type) || /slider|seekbar|stepper|adjustable|range/.test(role)
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
private static _isSemanticActionable(el: UiElement | null): boolean {
|
|
347
|
+
if (!el?.semantic) return false
|
|
348
|
+
if (el.semantic.adjustable) return true
|
|
349
|
+
return Array.isArray(el.semantic.supported_actions) && el.semantic.supported_actions.length > 0
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
private static _readNumericControlValue(el: UiElement | null, property: string): number | null {
|
|
353
|
+
if (!el?.state) return null
|
|
354
|
+
const stateValue = el.state[property as keyof UIElementState]
|
|
355
|
+
if (typeof stateValue === 'number' && Number.isFinite(stateValue)) return stateValue
|
|
356
|
+
if (property === 'value' || property === 'raw_value') {
|
|
357
|
+
const fallback = el.state.raw_value ?? el.state.value
|
|
358
|
+
if (typeof fallback === 'number' && Number.isFinite(fallback)) return fallback
|
|
359
|
+
}
|
|
360
|
+
return null
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
private static _buildControlPoint(bounds: [number, number, number, number], ratio: number, axis: 'horizontal' | 'vertical') {
|
|
364
|
+
const clampedRatio = Math.max(0, Math.min(1, ratio))
|
|
365
|
+
const [left, top, right, bottom] = bounds
|
|
366
|
+
const width = Math.max(1, right - left)
|
|
367
|
+
const height = Math.max(1, bottom - top)
|
|
368
|
+
const insetX = Math.max(8, Math.floor(width * 0.08))
|
|
369
|
+
const insetY = Math.max(8, Math.floor(height * 0.08))
|
|
370
|
+
if (axis === 'vertical') {
|
|
371
|
+
const usableHeight = Math.max(1, height - (insetY * 2))
|
|
372
|
+
return {
|
|
373
|
+
x: Math.floor((left + right) / 2),
|
|
374
|
+
y: Math.floor(bottom - insetY - (usableHeight * clampedRatio))
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
const usableWidth = Math.max(1, width - (insetX * 2))
|
|
378
|
+
return {
|
|
379
|
+
x: Math.floor(left + insetX + (usableWidth * clampedRatio)),
|
|
380
|
+
y: Math.floor((top + bottom) / 2)
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
private static _buildConservativeControlPoint(
|
|
385
|
+
bounds: [number, number, number, number],
|
|
386
|
+
targetValue: number,
|
|
387
|
+
currentValue: number | null,
|
|
388
|
+
min: number,
|
|
389
|
+
max: number,
|
|
390
|
+
axis: 'horizontal' | 'vertical'
|
|
391
|
+
) {
|
|
392
|
+
const range = Math.max(1, max - min)
|
|
393
|
+
const targetRatio = (targetValue - min) / range
|
|
394
|
+
const stepRatio = 1 / range
|
|
395
|
+
const centerBias = stepRatio / 2
|
|
396
|
+
const direction = currentValue === null ? 0 : Math.sign(targetValue - currentValue)
|
|
397
|
+
const controlLengthPx = axis === 'vertical' ? Math.max(1, bounds[3] - bounds[1]) : Math.max(1, bounds[2] - bounds[0])
|
|
398
|
+
const edgeWindow = Math.max(3, Math.floor(range * 0.1))
|
|
399
|
+
const isNearLowEdge = targetValue - min <= edgeWindow
|
|
400
|
+
const isNearHighEdge = max - targetValue <= edgeWindow
|
|
401
|
+
const directionBias = direction > 0
|
|
402
|
+
? -stepRatio * 0.15
|
|
403
|
+
: direction < 0
|
|
404
|
+
? stepRatio * 0.65
|
|
405
|
+
: 0
|
|
406
|
+
const pixelBasedMargin = Math.min(0.03, Math.max(0.005, 2 / controlLengthPx))
|
|
407
|
+
const endpointMargin = Math.max(stepRatio * 0.5, pixelBasedMargin)
|
|
408
|
+
const edgeBias = isNearLowEdge
|
|
409
|
+
? endpointMargin
|
|
410
|
+
: isNearHighEdge
|
|
411
|
+
? Math.max(stepRatio * 0.4, endpointMargin * 0.75)
|
|
412
|
+
: 0
|
|
413
|
+
const safeRatio = Math.min(
|
|
414
|
+
1 - (endpointMargin * 0.25),
|
|
415
|
+
Math.max(endpointMargin, targetRatio + centerBias + directionBias + edgeBias)
|
|
416
|
+
)
|
|
417
|
+
return ToolsInteract._buildControlPoint(bounds, safeRatio, axis)
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
private static _controlAxis(el: UiElement, bounds: [number, number, number, number]): 'horizontal' | 'vertical' {
|
|
421
|
+
const type = ToolsInteract._normalize(el.type ?? el.class ?? '')
|
|
422
|
+
const role = ToolsInteract._normalize(el.role ?? '')
|
|
423
|
+
if (/vertical/.test(type) || /vertical/.test(role)) return 'vertical'
|
|
424
|
+
if (/horizontal/.test(type) || /horizontal/.test(role)) return 'horizontal'
|
|
425
|
+
return (bounds[3] - bounds[1]) > (bounds[2] - bounds[0]) ? 'vertical' : 'horizontal'
|
|
426
|
+
}
|
|
427
|
+
|
|
337
428
|
private static _actionFailure(
|
|
338
429
|
actionType: string,
|
|
339
430
|
selector: Record<string, unknown> | null,
|
|
@@ -376,12 +467,12 @@ export class ToolsInteract {
|
|
|
376
467
|
|
|
377
468
|
private static _resolveActionableAncestor(elements: UiElement[], chosen: { el: UiElement, idx: number } | null): { el: UiElement, idx: number } | null {
|
|
378
469
|
if (!chosen) return null
|
|
379
|
-
if (chosen.el.clickable || chosen.el.focusable) return chosen
|
|
470
|
+
if (chosen.el.clickable || chosen.el.focusable || ToolsInteract._isSemanticActionable(chosen.el)) return chosen
|
|
380
471
|
|
|
381
472
|
let current = chosen
|
|
382
473
|
let safety = 0
|
|
383
474
|
|
|
384
|
-
while (safety < 20 && current.el && !(current.el.clickable || current.el.focusable) && current.el.parentId !== undefined && current.el.parentId !== null) {
|
|
475
|
+
while (safety < 20 && current.el && !(current.el.clickable || current.el.focusable || ToolsInteract._isSemanticActionable(current.el)) && current.el.parentId !== undefined && current.el.parentId !== null) {
|
|
385
476
|
const parentId = current.el.parentId
|
|
386
477
|
let parentIndex: number | null = null
|
|
387
478
|
|
|
@@ -390,12 +481,12 @@ export class ToolsInteract {
|
|
|
390
481
|
|
|
391
482
|
if (parentIndex !== null && elements[parentIndex]) {
|
|
392
483
|
current = { el: elements[parentIndex], idx: parentIndex }
|
|
393
|
-
if (current.el.clickable || current.el.focusable) return current
|
|
484
|
+
if (current.el.clickable || current.el.focusable || ToolsInteract._isSemanticActionable(current.el)) return current
|
|
394
485
|
} else if (typeof parentId === 'string') {
|
|
395
486
|
const foundIndex = elements.findIndex((el) => el.resourceId === parentId || el.id === parentId)
|
|
396
487
|
if (foundIndex === -1) break
|
|
397
488
|
current = { el: elements[foundIndex], idx: foundIndex }
|
|
398
|
-
if (current.el.clickable || current.el.focusable) return current
|
|
489
|
+
if (current.el.clickable || current.el.focusable || ToolsInteract._isSemanticActionable(current.el)) return current
|
|
399
490
|
} else {
|
|
400
491
|
break
|
|
401
492
|
}
|
|
@@ -412,7 +503,7 @@ export class ToolsInteract {
|
|
|
412
503
|
|
|
413
504
|
for (let i = 0; i < elements.length; i++) {
|
|
414
505
|
const el = elements[i]
|
|
415
|
-
if (!el || !(el.clickable || el.focusable)) continue
|
|
506
|
+
if (!el || !(el.clickable || el.focusable || ToolsInteract._isSemanticActionable(el))) continue
|
|
416
507
|
const bounds = ToolsInteract._normalizeBounds(el.bounds)
|
|
417
508
|
if (!bounds) continue
|
|
418
509
|
const [pl, pt, pr, pb] = bounds
|
|
@@ -570,6 +661,507 @@ export class ToolsInteract {
|
|
|
570
661
|
})
|
|
571
662
|
}
|
|
572
663
|
|
|
664
|
+
static async adjustControlHandler({
|
|
665
|
+
selector,
|
|
666
|
+
element_id,
|
|
667
|
+
property = 'value',
|
|
668
|
+
targetValue,
|
|
669
|
+
tolerance = 0,
|
|
670
|
+
maxAttempts = 3,
|
|
671
|
+
platform,
|
|
672
|
+
deviceId
|
|
673
|
+
}: {
|
|
674
|
+
selector?: { text?: string, resource_id?: string, accessibility_id?: string, contains?: boolean },
|
|
675
|
+
element_id?: string,
|
|
676
|
+
property?: string,
|
|
677
|
+
targetValue: number,
|
|
678
|
+
tolerance?: number,
|
|
679
|
+
maxAttempts?: number,
|
|
680
|
+
platform?: 'android' | 'ios',
|
|
681
|
+
deviceId?: string
|
|
682
|
+
}): Promise<AdjustControlResponse> {
|
|
683
|
+
const actionType = 'adjust_control'
|
|
684
|
+
const targetSelector = selector ?? (element_id ? { elementId: element_id } : null)
|
|
685
|
+
const normalizedTolerance = Number.isFinite(tolerance) ? Math.max(0, tolerance) : 0
|
|
686
|
+
const attemptsLimit = Math.max(1, Math.floor(Number(maxAttempts) || 1))
|
|
687
|
+
const sourcePlatform: 'android' | 'ios' = platform || 'android'
|
|
688
|
+
let resolvedPlatform = sourcePlatform
|
|
689
|
+
let resolvedDeviceId = deviceId
|
|
690
|
+
const fingerprintBefore = await ToolsInteract._captureFingerprint(resolvedPlatform, resolvedDeviceId)
|
|
691
|
+
let semanticFallbackElement: FindElementResponse['element'] | null = null
|
|
692
|
+
|
|
693
|
+
const buildFailure = (
|
|
694
|
+
failureCode: ActionFailureCode,
|
|
695
|
+
reason: string,
|
|
696
|
+
resolved: ActionTargetResolved | null,
|
|
697
|
+
device: any,
|
|
698
|
+
actualState: { property: string; value: number | null; raw_value?: number | null } | null,
|
|
699
|
+
attempts: number,
|
|
700
|
+
adjustmentMode: 'semantic' | 'gesture' | 'coordinate' = 'gesture',
|
|
701
|
+
retryable = false,
|
|
702
|
+
uiFingerprintAfter: string | null = null
|
|
703
|
+
): AdjustControlResponse => {
|
|
704
|
+
const base = buildActionExecutionResult({
|
|
705
|
+
actionType,
|
|
706
|
+
sourceModule: 'interact',
|
|
707
|
+
device,
|
|
708
|
+
selector: targetSelector,
|
|
709
|
+
resolved,
|
|
710
|
+
success: false,
|
|
711
|
+
uiFingerprintBefore: fingerprintBefore,
|
|
712
|
+
uiFingerprintAfter,
|
|
713
|
+
failure: { failureCode, retryable },
|
|
714
|
+
details: {
|
|
715
|
+
target_value: targetValue,
|
|
716
|
+
tolerance: normalizedTolerance,
|
|
717
|
+
property,
|
|
718
|
+
attempts,
|
|
719
|
+
adjustment_mode: adjustmentMode,
|
|
720
|
+
actual_state: actualState,
|
|
721
|
+
converged: false,
|
|
722
|
+
within_tolerance: false,
|
|
723
|
+
reason
|
|
724
|
+
}
|
|
725
|
+
}) as AdjustControlResponse
|
|
726
|
+
|
|
727
|
+
return {
|
|
728
|
+
...base,
|
|
729
|
+
target_state: {
|
|
730
|
+
property,
|
|
731
|
+
target_value: targetValue,
|
|
732
|
+
tolerance: normalizedTolerance
|
|
733
|
+
},
|
|
734
|
+
actual_state: actualState,
|
|
735
|
+
within_tolerance: false,
|
|
736
|
+
converged: false,
|
|
737
|
+
attempts,
|
|
738
|
+
adjustment_mode: adjustmentMode
|
|
739
|
+
}
|
|
740
|
+
}
|
|
741
|
+
|
|
742
|
+
const resolveCurrentMatch = async (): Promise<{
|
|
743
|
+
tree: any
|
|
744
|
+
device: any
|
|
745
|
+
match: { el: UiElement, idx: number } | null
|
|
746
|
+
resolvedTarget: ActionTargetResolved | null
|
|
747
|
+
} | null> => {
|
|
748
|
+
const tree = await ToolsObserve.getUITreeHandler({ platform: resolvedPlatform, deviceId: resolvedDeviceId }) as any
|
|
749
|
+
resolvedPlatform = tree?.device?.platform === 'ios' ? 'ios' : resolvedPlatform
|
|
750
|
+
resolvedDeviceId = tree?.device?.id || resolvedDeviceId
|
|
751
|
+
const elements = Array.isArray(tree?.elements) ? tree.elements as UiElement[] : []
|
|
752
|
+
|
|
753
|
+
if (element_id) {
|
|
754
|
+
const stored = ToolsInteract._resolvedUiElements.get(element_id)
|
|
755
|
+
if (!stored) {
|
|
756
|
+
return null
|
|
757
|
+
}
|
|
758
|
+
const current = ToolsInteract._findCurrentResolvedElement(elements, resolvedPlatform, resolvedDeviceId, stored)
|
|
759
|
+
if (!current) {
|
|
760
|
+
return null
|
|
761
|
+
}
|
|
762
|
+
return {
|
|
763
|
+
tree,
|
|
764
|
+
device: tree?.device,
|
|
765
|
+
match: { el: current.el, idx: current.index },
|
|
766
|
+
resolvedTarget: ToolsInteract._resolvedTargetFromElement(
|
|
767
|
+
ToolsInteract._computeElementId(resolvedPlatform, resolvedDeviceId, current.el, current.index),
|
|
768
|
+
current.el,
|
|
769
|
+
current.index
|
|
770
|
+
)
|
|
771
|
+
}
|
|
772
|
+
}
|
|
773
|
+
|
|
774
|
+
if (semanticFallbackElement) {
|
|
775
|
+
const fallbackBounds = ToolsInteract._normalizeBounds(
|
|
776
|
+
Array.isArray(semanticFallbackElement.bounds)
|
|
777
|
+
? semanticFallbackElement.bounds
|
|
778
|
+
: semanticFallbackElement.bounds && typeof semanticFallbackElement.bounds === 'object'
|
|
779
|
+
? [
|
|
780
|
+
Number((semanticFallbackElement.bounds as any).left),
|
|
781
|
+
Number((semanticFallbackElement.bounds as any).top),
|
|
782
|
+
Number((semanticFallbackElement.bounds as any).right),
|
|
783
|
+
Number((semanticFallbackElement.bounds as any).bottom)
|
|
784
|
+
]
|
|
785
|
+
: null
|
|
786
|
+
)
|
|
787
|
+
|
|
788
|
+
let matchedIndex = -1
|
|
789
|
+
if (fallbackBounds) {
|
|
790
|
+
matchedIndex = elements.findIndex((el) => {
|
|
791
|
+
const bounds = ToolsInteract._normalizeBounds(el.bounds)
|
|
792
|
+
return !!bounds && bounds[0] === fallbackBounds[0] && bounds[1] === fallbackBounds[1] && bounds[2] === fallbackBounds[2] && bounds[3] === fallbackBounds[3]
|
|
793
|
+
})
|
|
794
|
+
}
|
|
795
|
+
|
|
796
|
+
if (matchedIndex === -1 && fallbackBounds) {
|
|
797
|
+
const fallbackCenterX = Math.floor((fallbackBounds[0] + fallbackBounds[2]) / 2)
|
|
798
|
+
const fallbackCenterY = Math.floor((fallbackBounds[1] + fallbackBounds[3]) / 2)
|
|
799
|
+
let bestDistance = Infinity
|
|
800
|
+
for (let i = 0; i < elements.length; i++) {
|
|
801
|
+
const el = elements[i]
|
|
802
|
+
if (!ToolsInteract._isAdjustableControl(el)) continue
|
|
803
|
+
const bounds = ToolsInteract._normalizeBounds(el.bounds)
|
|
804
|
+
if (!bounds) continue
|
|
805
|
+
const centerX = Math.floor((bounds[0] + bounds[2]) / 2)
|
|
806
|
+
const centerY = Math.floor((bounds[1] + bounds[3]) / 2)
|
|
807
|
+
const distance = Math.abs(centerX - fallbackCenterX) + Math.abs(centerY - fallbackCenterY)
|
|
808
|
+
if (distance < bestDistance) {
|
|
809
|
+
bestDistance = distance
|
|
810
|
+
matchedIndex = i
|
|
811
|
+
}
|
|
812
|
+
}
|
|
813
|
+
}
|
|
814
|
+
|
|
815
|
+
if (matchedIndex >= 0 && elements[matchedIndex]) {
|
|
816
|
+
const matched = { el: elements[matchedIndex], idx: matchedIndex }
|
|
817
|
+
return {
|
|
818
|
+
tree,
|
|
819
|
+
device: tree?.device,
|
|
820
|
+
match: matched,
|
|
821
|
+
resolvedTarget: ToolsInteract._resolvedTargetFromElement(
|
|
822
|
+
ToolsInteract._computeElementId(resolvedPlatform, resolvedDeviceId, matched.el, matched.idx),
|
|
823
|
+
matched.el,
|
|
824
|
+
matched.idx
|
|
825
|
+
)
|
|
826
|
+
}
|
|
827
|
+
}
|
|
828
|
+
}
|
|
829
|
+
|
|
830
|
+
if (selector) {
|
|
831
|
+
const matched = ToolsInteract._findFirstMatchingElement(elements, selector)
|
|
832
|
+
if (!matched) {
|
|
833
|
+
return null
|
|
834
|
+
}
|
|
835
|
+
return {
|
|
836
|
+
tree,
|
|
837
|
+
device: tree?.device,
|
|
838
|
+
match: matched,
|
|
839
|
+
resolvedTarget: ToolsInteract._resolvedTargetFromElement(
|
|
840
|
+
ToolsInteract._computeElementId(resolvedPlatform, resolvedDeviceId, matched.el, matched.idx),
|
|
841
|
+
matched.el,
|
|
842
|
+
matched.idx
|
|
843
|
+
)
|
|
844
|
+
}
|
|
845
|
+
}
|
|
846
|
+
|
|
847
|
+
return null
|
|
848
|
+
}
|
|
849
|
+
|
|
850
|
+
if (!selector && !element_id) {
|
|
851
|
+
return buildFailure('ELEMENT_NOT_FOUND', 'selector or element_id is required', null, undefined, null, 0, 'gesture', false)
|
|
852
|
+
}
|
|
853
|
+
|
|
854
|
+
if (selector && !element_id) {
|
|
855
|
+
const waitResult = await ToolsInteract.waitForUIHandler({
|
|
856
|
+
selector,
|
|
857
|
+
condition: 'clickable',
|
|
858
|
+
timeout_ms: 5000,
|
|
859
|
+
poll_interval_ms: 300,
|
|
860
|
+
platform: resolvedPlatform,
|
|
861
|
+
deviceId: resolvedDeviceId
|
|
862
|
+
}) as any
|
|
863
|
+
|
|
864
|
+
if (waitResult?.status !== 'success' || !waitResult?.element?.elementId) {
|
|
865
|
+
const semanticQuery = selector.text ?? selector.resource_id ?? selector.accessibility_id ?? ''
|
|
866
|
+
if (!semanticQuery) {
|
|
867
|
+
return buildFailure(
|
|
868
|
+
waitResult?.error?.code === 'ELEMENT_NOT_FOUND' ? 'ELEMENT_NOT_FOUND' : 'TIMEOUT',
|
|
869
|
+
waitResult?.error?.message ?? 'adjustable control not found',
|
|
870
|
+
null,
|
|
871
|
+
waitResult?.device,
|
|
872
|
+
null,
|
|
873
|
+
0,
|
|
874
|
+
'gesture',
|
|
875
|
+
waitResult?.error?.code === 'ELEMENT_NOT_FOUND'
|
|
876
|
+
)
|
|
877
|
+
}
|
|
878
|
+
|
|
879
|
+
const fallback = await ToolsInteract.findElementHandler({
|
|
880
|
+
query: semanticQuery,
|
|
881
|
+
exact: false,
|
|
882
|
+
timeoutMs: 3000,
|
|
883
|
+
platform: resolvedPlatform,
|
|
884
|
+
deviceId: resolvedDeviceId
|
|
885
|
+
})
|
|
886
|
+
|
|
887
|
+
if (!fallback.found || !fallback.element) {
|
|
888
|
+
return buildFailure(
|
|
889
|
+
'ELEMENT_NOT_FOUND',
|
|
890
|
+
waitResult?.error?.message ?? 'adjustable control not found',
|
|
891
|
+
null,
|
|
892
|
+
waitResult?.device,
|
|
893
|
+
null,
|
|
894
|
+
0,
|
|
895
|
+
'gesture',
|
|
896
|
+
true
|
|
897
|
+
)
|
|
898
|
+
}
|
|
899
|
+
|
|
900
|
+
semanticFallbackElement = fallback.element
|
|
901
|
+
} else {
|
|
902
|
+
element_id = waitResult.element.elementId
|
|
903
|
+
semanticFallbackElement = null
|
|
904
|
+
}
|
|
905
|
+
}
|
|
906
|
+
|
|
907
|
+
let lastObservedState: { property: string; value: number | null; raw_value?: number | null } | null = null
|
|
908
|
+
let lastAdjustmentMode: 'semantic' | 'gesture' | 'coordinate' = 'gesture'
|
|
909
|
+
let resolvedTarget: ActionTargetResolved | null = null
|
|
910
|
+
let currentDevice: any = undefined
|
|
911
|
+
let attemptCount = 0
|
|
912
|
+
let cachedResolvedMatch: { el: UiElement, idx: number } | null = null
|
|
913
|
+
|
|
914
|
+
for (let attempt = 0; attempt < attemptsLimit; attempt++) {
|
|
915
|
+
const resolved: {
|
|
916
|
+
tree: any
|
|
917
|
+
device: any
|
|
918
|
+
match: { el: UiElement, idx: number } | null
|
|
919
|
+
resolvedTarget: ActionTargetResolved | null
|
|
920
|
+
} | null = cachedResolvedMatch
|
|
921
|
+
? {
|
|
922
|
+
tree: null,
|
|
923
|
+
device: currentDevice,
|
|
924
|
+
match: cachedResolvedMatch,
|
|
925
|
+
resolvedTarget: ToolsInteract._resolvedTargetFromElement(
|
|
926
|
+
ToolsInteract._computeElementId(resolvedPlatform, resolvedDeviceId, cachedResolvedMatch.el, cachedResolvedMatch.idx),
|
|
927
|
+
cachedResolvedMatch.el,
|
|
928
|
+
cachedResolvedMatch.idx
|
|
929
|
+
)
|
|
930
|
+
}
|
|
931
|
+
: await resolveCurrentMatch()
|
|
932
|
+
if (!resolved || !resolved.match || !resolved.resolvedTarget) {
|
|
933
|
+
return buildFailure('STALE_REFERENCE', 'adjustable control could not be resolved', resolvedTarget, currentDevice, lastObservedState, attemptCount, lastAdjustmentMode, true)
|
|
934
|
+
}
|
|
935
|
+
|
|
936
|
+
currentDevice = resolved.device
|
|
937
|
+
resolvedTarget = resolved.resolvedTarget
|
|
938
|
+
const currentEl: UiElement = resolved.match.el
|
|
939
|
+
cachedResolvedMatch = resolved.match
|
|
940
|
+
const bounds = ToolsInteract._normalizeBounds(currentEl.bounds)
|
|
941
|
+
const valueRange = currentEl.state?.value_range ?? null
|
|
942
|
+
const currentValue = ToolsInteract._readNumericControlValue(currentEl, property)
|
|
943
|
+
const actualState = currentValue !== null
|
|
944
|
+
? { property, value: currentValue, raw_value: typeof currentEl.state?.raw_value === 'number' ? currentEl.state.raw_value : undefined }
|
|
945
|
+
: null
|
|
946
|
+
|
|
947
|
+
lastObservedState = actualState
|
|
948
|
+
|
|
949
|
+
if (property !== 'value' && property !== 'raw_value') {
|
|
950
|
+
return buildFailure('ELEMENT_NOT_INTERACTABLE', 'adjust_control currently supports numeric value and raw_value properties only', resolvedTarget, currentDevice, actualState, attemptCount, lastAdjustmentMode, false)
|
|
951
|
+
}
|
|
952
|
+
|
|
953
|
+
if (currentValue !== null && Math.abs(currentValue - targetValue) <= normalizedTolerance) {
|
|
954
|
+
const uiFingerprintAfter = await ToolsInteract._captureFingerprint(resolvedPlatform, resolvedDeviceId)
|
|
955
|
+
const base = buildActionExecutionResult({
|
|
956
|
+
actionType,
|
|
957
|
+
sourceModule: 'interact',
|
|
958
|
+
device: currentDevice,
|
|
959
|
+
selector: targetSelector,
|
|
960
|
+
resolved: resolvedTarget,
|
|
961
|
+
success: true,
|
|
962
|
+
uiFingerprintBefore: fingerprintBefore,
|
|
963
|
+
uiFingerprintAfter,
|
|
964
|
+
details: {
|
|
965
|
+
target_value: targetValue,
|
|
966
|
+
tolerance: normalizedTolerance,
|
|
967
|
+
property,
|
|
968
|
+
attempts: attemptCount,
|
|
969
|
+
adjustment_mode: 'semantic',
|
|
970
|
+
actual_state: actualState,
|
|
971
|
+
converged: true,
|
|
972
|
+
within_tolerance: true,
|
|
973
|
+
reason: 'control already within tolerance'
|
|
974
|
+
}
|
|
975
|
+
}) as AdjustControlResponse
|
|
976
|
+
|
|
977
|
+
return {
|
|
978
|
+
...base,
|
|
979
|
+
target_state: {
|
|
980
|
+
property,
|
|
981
|
+
target_value: targetValue,
|
|
982
|
+
tolerance: normalizedTolerance
|
|
983
|
+
},
|
|
984
|
+
actual_state: actualState,
|
|
985
|
+
within_tolerance: true,
|
|
986
|
+
converged: true,
|
|
987
|
+
attempts: attemptCount,
|
|
988
|
+
adjustment_mode: 'semantic'
|
|
989
|
+
}
|
|
990
|
+
}
|
|
991
|
+
|
|
992
|
+
if (!ToolsInteract._isAdjustableControl(currentEl)) {
|
|
993
|
+
return buildFailure('ELEMENT_NOT_INTERACTABLE', 'target is not an adjustable control', resolvedTarget, currentDevice, actualState, attemptCount, lastAdjustmentMode, false)
|
|
994
|
+
}
|
|
995
|
+
|
|
996
|
+
if (!bounds) {
|
|
997
|
+
return buildFailure('ELEMENT_NOT_INTERACTABLE', 'adjustable control has no bounds', resolvedTarget, currentDevice, actualState, attemptCount, lastAdjustmentMode, false)
|
|
998
|
+
}
|
|
999
|
+
|
|
1000
|
+
const min = typeof valueRange?.min === 'number' ? valueRange.min : null
|
|
1001
|
+
const max = typeof valueRange?.max === 'number' ? valueRange.max : null
|
|
1002
|
+
if (min === null || max === null || !Number.isFinite(min) || !Number.isFinite(max) || max <= min) {
|
|
1003
|
+
return buildFailure('ELEMENT_NOT_INTERACTABLE', 'value_range unavailable', resolvedTarget, currentDevice, actualState, attemptCount, lastAdjustmentMode, false)
|
|
1004
|
+
}
|
|
1005
|
+
|
|
1006
|
+
if (targetValue < min || targetValue > max) {
|
|
1007
|
+
return buildFailure('UNKNOWN', `targetValue ${targetValue} is outside the control range ${min}..${max}`, resolvedTarget, currentDevice, actualState, attemptCount, lastAdjustmentMode, false)
|
|
1008
|
+
}
|
|
1009
|
+
|
|
1010
|
+
const axis = ToolsInteract._controlAxis(currentEl, bounds)
|
|
1011
|
+
const targetPoint = ToolsInteract._buildConservativeControlPoint(bounds, targetValue, currentValue, min, max, axis)
|
|
1012
|
+
const currentPoint = currentValue !== null
|
|
1013
|
+
? ToolsInteract._buildControlPoint(bounds, (currentValue - min) / (max - min), axis)
|
|
1014
|
+
: ToolsInteract._buildControlPoint(bounds, 0.5, axis)
|
|
1015
|
+
|
|
1016
|
+
const runVerification = async (): Promise<{
|
|
1017
|
+
verification: any
|
|
1018
|
+
observedState: { property: string; value: number | null; raw_value?: number | null } | null
|
|
1019
|
+
withinTolerance: boolean
|
|
1020
|
+
}> => {
|
|
1021
|
+
const verification = await ToolsInteract.expectStateHandler({
|
|
1022
|
+
element_id: resolvedTarget?.elementId ?? element_id,
|
|
1023
|
+
selector: selector ?? undefined,
|
|
1024
|
+
property,
|
|
1025
|
+
expected: targetValue,
|
|
1026
|
+
platform: resolvedPlatform,
|
|
1027
|
+
deviceId: resolvedDeviceId
|
|
1028
|
+
}) as any
|
|
1029
|
+
|
|
1030
|
+
const observedValue = typeof verification?.observed_state?.value === 'number'
|
|
1031
|
+
? verification.observed_state.value
|
|
1032
|
+
: typeof verification?.observed_state?.raw_value === 'number'
|
|
1033
|
+
? verification.observed_state.raw_value
|
|
1034
|
+
: null
|
|
1035
|
+
const observedState = observedValue !== null
|
|
1036
|
+
? {
|
|
1037
|
+
property,
|
|
1038
|
+
value: observedValue,
|
|
1039
|
+
raw_value: typeof verification?.observed_state?.raw_value === 'number' ? verification.observed_state.raw_value : undefined
|
|
1040
|
+
}
|
|
1041
|
+
: actualState
|
|
1042
|
+
|
|
1043
|
+
return {
|
|
1044
|
+
verification,
|
|
1045
|
+
observedState,
|
|
1046
|
+
withinTolerance: observedValue !== null && Math.abs(observedValue - targetValue) <= normalizedTolerance
|
|
1047
|
+
}
|
|
1048
|
+
}
|
|
1049
|
+
|
|
1050
|
+
lastAdjustmentMode = 'coordinate'
|
|
1051
|
+
const primaryActionResult = await ToolsInteract.tapHandler({
|
|
1052
|
+
platform: resolvedPlatform,
|
|
1053
|
+
x: targetPoint.x,
|
|
1054
|
+
y: targetPoint.y,
|
|
1055
|
+
deviceId: resolvedDeviceId
|
|
1056
|
+
})
|
|
1057
|
+
let actionDevice = primaryActionResult.device ?? currentDevice
|
|
1058
|
+
attemptCount++
|
|
1059
|
+
|
|
1060
|
+
if (!primaryActionResult.success) {
|
|
1061
|
+
lastAdjustmentMode = 'gesture'
|
|
1062
|
+
const fallbackActionResult = await ToolsInteract.swipeHandler({
|
|
1063
|
+
platform: resolvedPlatform,
|
|
1064
|
+
x1: currentPoint.x,
|
|
1065
|
+
y1: currentPoint.y,
|
|
1066
|
+
x2: targetPoint.x,
|
|
1067
|
+
y2: targetPoint.y,
|
|
1068
|
+
duration: 220,
|
|
1069
|
+
deviceId: resolvedDeviceId
|
|
1070
|
+
})
|
|
1071
|
+
attemptCount++
|
|
1072
|
+
|
|
1073
|
+
if (!fallbackActionResult.success) {
|
|
1074
|
+
return buildFailure('UNKNOWN', fallbackActionResult.error ?? primaryActionResult.error ?? 'adjustment gesture failed', resolvedTarget, fallbackActionResult.device ?? primaryActionResult.device, actualState, attemptCount, lastAdjustmentMode, false)
|
|
1075
|
+
}
|
|
1076
|
+
|
|
1077
|
+
actionDevice = fallbackActionResult.device ?? actionDevice
|
|
1078
|
+
}
|
|
1079
|
+
|
|
1080
|
+
let verificationResult = await runVerification()
|
|
1081
|
+
let observedState = verificationResult.observedState
|
|
1082
|
+
lastObservedState = observedState
|
|
1083
|
+
|
|
1084
|
+
if (!verificationResult.withinTolerance && currentValue !== null) {
|
|
1085
|
+
lastAdjustmentMode = 'gesture'
|
|
1086
|
+
const fallbackActionResult = await ToolsInteract.swipeHandler({
|
|
1087
|
+
platform: resolvedPlatform,
|
|
1088
|
+
x1: currentPoint.x,
|
|
1089
|
+
y1: currentPoint.y,
|
|
1090
|
+
x2: targetPoint.x,
|
|
1091
|
+
y2: targetPoint.y,
|
|
1092
|
+
duration: 220,
|
|
1093
|
+
deviceId: resolvedDeviceId
|
|
1094
|
+
})
|
|
1095
|
+
attemptCount++
|
|
1096
|
+
if (!fallbackActionResult.success) {
|
|
1097
|
+
return buildFailure('UNKNOWN', fallbackActionResult.error ?? 'adjustment gesture failed', resolvedTarget, fallbackActionResult.device, observedState ?? actualState, attemptCount, lastAdjustmentMode, false)
|
|
1098
|
+
}
|
|
1099
|
+
|
|
1100
|
+
verificationResult = await runVerification()
|
|
1101
|
+
observedState = verificationResult.observedState
|
|
1102
|
+
}
|
|
1103
|
+
|
|
1104
|
+
const verification = verificationResult.verification
|
|
1105
|
+
lastObservedState = observedState
|
|
1106
|
+
|
|
1107
|
+
if (verificationResult.withinTolerance) {
|
|
1108
|
+
const uiFingerprintAfter = await ToolsInteract._captureFingerprint(resolvedPlatform, resolvedDeviceId)
|
|
1109
|
+
const base = buildActionExecutionResult({
|
|
1110
|
+
actionType,
|
|
1111
|
+
sourceModule: 'interact',
|
|
1112
|
+
device: actionDevice ?? currentDevice,
|
|
1113
|
+
selector: targetSelector,
|
|
1114
|
+
resolved: resolvedTarget,
|
|
1115
|
+
success: true,
|
|
1116
|
+
uiFingerprintBefore: fingerprintBefore,
|
|
1117
|
+
uiFingerprintAfter,
|
|
1118
|
+
details: {
|
|
1119
|
+
target_value: targetValue,
|
|
1120
|
+
tolerance: normalizedTolerance,
|
|
1121
|
+
property,
|
|
1122
|
+
attempts: attemptCount,
|
|
1123
|
+
adjustment_mode: lastAdjustmentMode,
|
|
1124
|
+
actual_state: observedState,
|
|
1125
|
+
converged: true,
|
|
1126
|
+
within_tolerance: true,
|
|
1127
|
+
reason: verification?.reason ?? 'control converged to target value'
|
|
1128
|
+
}
|
|
1129
|
+
}) as AdjustControlResponse
|
|
1130
|
+
|
|
1131
|
+
return {
|
|
1132
|
+
...base,
|
|
1133
|
+
target_state: {
|
|
1134
|
+
property,
|
|
1135
|
+
target_value: targetValue,
|
|
1136
|
+
tolerance: normalizedTolerance
|
|
1137
|
+
},
|
|
1138
|
+
actual_state: observedState,
|
|
1139
|
+
within_tolerance: true,
|
|
1140
|
+
converged: true,
|
|
1141
|
+
attempts: attemptCount,
|
|
1142
|
+
adjustment_mode: lastAdjustmentMode
|
|
1143
|
+
}
|
|
1144
|
+
}
|
|
1145
|
+
|
|
1146
|
+
cachedResolvedMatch = {
|
|
1147
|
+
el: {
|
|
1148
|
+
...currentEl,
|
|
1149
|
+
state: {
|
|
1150
|
+
...(currentEl.state ?? null),
|
|
1151
|
+
...(observedState ? {
|
|
1152
|
+
[observedState.property]: observedState.value,
|
|
1153
|
+
raw_value: observedState.raw_value ?? observedState.value
|
|
1154
|
+
} : {})
|
|
1155
|
+
}
|
|
1156
|
+
},
|
|
1157
|
+
idx: resolved.match.idx
|
|
1158
|
+
}
|
|
1159
|
+
}
|
|
1160
|
+
|
|
1161
|
+
const uiFingerprintAfter = await ToolsInteract._captureFingerprint(resolvedPlatform, resolvedDeviceId)
|
|
1162
|
+
return buildFailure('TIMEOUT', 'control did not converge within the allotted attempts', resolvedTarget, currentDevice, lastObservedState, attemptCount, lastAdjustmentMode, true, uiFingerprintAfter)
|
|
1163
|
+
}
|
|
1164
|
+
|
|
573
1165
|
static async swipeHandler({ platform = 'android', x1, y1, x2, y2, duration, deviceId }: { platform?: 'android' | 'ios', x1: number, y1: number, x2: number, y2: number, duration: number, deviceId?: string }) {
|
|
574
1166
|
const { interact, resolved } = await ToolsInteract.getInteractionService(platform, deviceId)
|
|
575
1167
|
return await interact.swipe(x1, y1, x2, y2, duration, resolved.id)
|
|
@@ -611,12 +1203,14 @@ export class ToolsInteract {
|
|
|
611
1203
|
const [l,t,r,b] = bounds
|
|
612
1204
|
if (r <= l || b <= t) return null
|
|
613
1205
|
// Do not early-return on non-interactable elements — score them so we can locate their clickable ancestor later
|
|
614
|
-
const interactable = !!(el.clickable || el.enabled || el.focusable)
|
|
1206
|
+
const interactable = !!(el.clickable || el.enabled || el.focusable || ToolsInteract._isSemanticActionable(el))
|
|
615
1207
|
|
|
616
1208
|
const text = normalize(el.text ?? el.label ?? el.value ?? '')
|
|
617
1209
|
const content = normalize(el.contentDescription ?? el.contentDesc ?? el.accessibilityLabel ?? '')
|
|
618
1210
|
const resourceId = normalize(el.resourceId ?? el.resourceID ?? el.id ?? '')
|
|
619
1211
|
const className = normalize(el.type ?? el.class ?? '')
|
|
1212
|
+
const semanticRole = normalize(el.semantic?.semantic_role ?? '')
|
|
1213
|
+
const semanticActions = Array.isArray(el.semantic?.supported_actions) ? el.semantic.supported_actions.map((action) => normalize(action)).filter(Boolean) : []
|
|
620
1214
|
|
|
621
1215
|
let score = 0
|
|
622
1216
|
let reason = 'best_scoring_candidate'
|
|
@@ -658,6 +1252,29 @@ export class ToolsInteract {
|
|
|
658
1252
|
reason = 'partial_class_match'
|
|
659
1253
|
}
|
|
660
1254
|
}
|
|
1255
|
+
if (!exact) {
|
|
1256
|
+
if (!score && semanticRole && semanticRole.includes(q)) {
|
|
1257
|
+
score = 0.5
|
|
1258
|
+
reason = 'semantic_role_match'
|
|
1259
|
+
}
|
|
1260
|
+
if (semanticActions.some((action) => action.includes(q))) {
|
|
1261
|
+
score = Math.max(score, score > 0 ? 0.65 : 0.6)
|
|
1262
|
+
reason = 'semantic_action_match'
|
|
1263
|
+
}
|
|
1264
|
+
if (score === 0 && el.semantic?.adjustable && /slider|stepper|dropdown|segment|control|adjust/.test(q)) {
|
|
1265
|
+
score = 0.45
|
|
1266
|
+
reason = 'semantic_control_match'
|
|
1267
|
+
}
|
|
1268
|
+
} else {
|
|
1269
|
+
if (!score && semanticRole && semanticRole === q) {
|
|
1270
|
+
score = 0.5
|
|
1271
|
+
reason = 'semantic_role_match'
|
|
1272
|
+
}
|
|
1273
|
+
if (semanticActions.some((action) => action === q)) {
|
|
1274
|
+
score = Math.max(score, score > 0 ? 0.65 : 0.6)
|
|
1275
|
+
reason = 'semantic_action_match'
|
|
1276
|
+
}
|
|
1277
|
+
}
|
|
661
1278
|
if (score > 0 && interactable) score += 0.05
|
|
662
1279
|
if (score <= 0) return null
|
|
663
1280
|
return { el, idx, score, reason, interactable }
|
|
@@ -767,7 +1384,7 @@ export class ToolsInteract {
|
|
|
767
1384
|
}
|
|
768
1385
|
}
|
|
769
1386
|
|
|
770
|
-
if (best && !(best.el.clickable || best.el.focusable)) {
|
|
1387
|
+
if (best && !(best.el.clickable || best.el.focusable || ToolsInteract._isSemanticActionable(best.el))) {
|
|
771
1388
|
const nearbyActionable = ToolsInteract._resolveNearbyActionableControl(elements, { el: best.el, idx: best.idx }, screen)
|
|
772
1389
|
if (nearbyActionable) {
|
|
773
1390
|
best = {
|