mobile-debug-mcp 0.24.7 → 0.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -6
- package/dist/interact/index.js +302 -6
- package/dist/observe/ios.js +56 -2
- package/dist/server/common.js +2 -1
- package/dist/server/tool-definitions.js +55 -0
- package/dist/server/tool-handlers.js +17 -0
- package/dist/server-core.js +1 -1
- package/dist/utils/android/utils.js +67 -1
- package/docs/CHANGELOG.md +6 -0
- package/docs/ROADMAP.md +388 -0
- package/docs/rfcs/001-state-verification.md +452 -0
- package/docs/specs/mcp-tooling-spec-v1.md +4 -0
- package/docs/tools/interact.md +25 -0
- package/docs/tools/observe.md +2 -1
- package/package.json +1 -1
- package/src/interact/index.ts +352 -7
- package/src/observe/ios.ts +62 -3
- package/src/server/common.ts +2 -1
- package/src/server/tool-definitions.ts +55 -0
- package/src/server/tool-handlers.ts +18 -0
- package/src/server-core.ts +1 -1
- package/src/types.ts +41 -0
- package/src/utils/android/utils.ts +78 -14
- package/test/unit/observe/find_element.test.ts +64 -5
- package/test/unit/observe/state_extraction.test.ts +43 -0
- package/test/unit/server/response_shapes.test.ts +40 -2
package/src/interact/index.ts
CHANGED
|
@@ -10,7 +10,9 @@ import type {
|
|
|
10
10
|
ActionFailureCode,
|
|
11
11
|
ActionTargetResolved,
|
|
12
12
|
ExpectElementVisibleResponse,
|
|
13
|
+
ExpectStateResponse,
|
|
13
14
|
ExpectScreenResponse,
|
|
15
|
+
UIElementState,
|
|
14
16
|
TapElementResponse
|
|
15
17
|
} from '../types.js'
|
|
16
18
|
|
|
@@ -36,6 +38,8 @@ interface UiElement {
|
|
|
36
38
|
parentId?: number | string | null
|
|
37
39
|
_index?: number
|
|
38
40
|
_interactable?: boolean
|
|
41
|
+
_sliderLike?: boolean
|
|
42
|
+
state?: UIElementState | null
|
|
39
43
|
}
|
|
40
44
|
|
|
41
45
|
interface ResolvedUiElementContext {
|
|
@@ -46,9 +50,22 @@ interface ResolvedUiElementContext {
|
|
|
46
50
|
index: number
|
|
47
51
|
}
|
|
48
52
|
|
|
53
|
+
interface UiResolution {
|
|
54
|
+
width?: number
|
|
55
|
+
height?: number
|
|
56
|
+
}
|
|
57
|
+
|
|
49
58
|
|
|
50
59
|
export class ToolsInteract {
|
|
51
60
|
private static readonly _maxResolvedUiElements = 256
|
|
61
|
+
private static readonly _sliderSearchLookahead = 8
|
|
62
|
+
private static readonly _sliderNegativeGapTolerancePx = 32
|
|
63
|
+
private static readonly _sliderPositiveGapLimitPx = 640
|
|
64
|
+
private static readonly _sliderTrackMinLengthPx = 220
|
|
65
|
+
private static readonly _sliderTrackMaxThicknessPx = 180
|
|
66
|
+
private static readonly _sliderTrackLengthRatio = 0.18
|
|
67
|
+
private static readonly _sliderTrackThicknessRatio = 0.08
|
|
68
|
+
private static readonly _sliderLabelWidthRatio = 1.5
|
|
52
69
|
private static _resolvedUiElements = new Map<string, ResolvedUiElementContext>()
|
|
53
70
|
|
|
54
71
|
private static _normalize(s: any): string {
|
|
@@ -63,6 +80,45 @@ export class ToolsInteract {
|
|
|
63
80
|
return normalized as [number, number, number, number]
|
|
64
81
|
}
|
|
65
82
|
|
|
83
|
+
private static _matchesSelector(el: UiElement, selector?: { text?: string, resource_id?: string, accessibility_id?: string, contains?: boolean }): boolean {
|
|
84
|
+
if (!selector) return false
|
|
85
|
+
const normalize = ToolsInteract._normalize
|
|
86
|
+
const containsFlag = !!selector.contains
|
|
87
|
+
const text = normalize(el.text ?? el.label ?? el.value ?? '')
|
|
88
|
+
const resourceId = normalize(el.resourceId ?? el.resourceID ?? el.id ?? '')
|
|
89
|
+
const accessibilityId = normalize(el.contentDescription ?? el.contentDesc ?? el.accessibilityLabel ?? el.label ?? '')
|
|
90
|
+
|
|
91
|
+
if (selector.text !== undefined && selector.text !== null) {
|
|
92
|
+
const q = normalize(selector.text)
|
|
93
|
+
if (containsFlag ? !text.includes(q) : text !== q) return false
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
if (selector.resource_id !== undefined && selector.resource_id !== null) {
|
|
97
|
+
const q = normalize(selector.resource_id)
|
|
98
|
+
if (containsFlag ? !resourceId.includes(q) : resourceId !== q) return false
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
if (selector.accessibility_id !== undefined && selector.accessibility_id !== null) {
|
|
102
|
+
const q = normalize(selector.accessibility_id)
|
|
103
|
+
if (containsFlag ? !accessibilityId.includes(q) : accessibilityId !== q) return false
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
return true
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
private static _findFirstMatchingElement(
|
|
110
|
+
elements: UiElement[],
|
|
111
|
+
selector?: { text?: string, resource_id?: string, accessibility_id?: string, contains?: boolean }
|
|
112
|
+
): { el: UiElement, idx: number } | null {
|
|
113
|
+
if (!selector) return null
|
|
114
|
+
for (let i = 0; i < elements.length; i++) {
|
|
115
|
+
const el = elements[i]
|
|
116
|
+
if (!el) continue
|
|
117
|
+
if (ToolsInteract._matchesSelector(el, selector)) return { el, idx: i }
|
|
118
|
+
}
|
|
119
|
+
return null
|
|
120
|
+
}
|
|
121
|
+
|
|
66
122
|
private static _isVisibleElement(el: UiElement): boolean {
|
|
67
123
|
const bounds = ToolsInteract._normalizeBounds(el.bounds)
|
|
68
124
|
return !!el.visible && !!bounds && bounds[2] > bounds[0] && bounds[3] > bounds[1]
|
|
@@ -140,7 +196,8 @@ export class ToolsInteract {
|
|
|
140
196
|
accessibility_id: element.contentDescription ?? element.contentDesc ?? element.accessibilityLabel ?? element.label ?? null,
|
|
141
197
|
class: element.type ?? element.class ?? null,
|
|
142
198
|
bounds: ToolsInteract._normalizeBounds(element.bounds),
|
|
143
|
-
index
|
|
199
|
+
index,
|
|
200
|
+
state: element.state ?? null
|
|
144
201
|
}
|
|
145
202
|
}
|
|
146
203
|
|
|
@@ -240,6 +297,78 @@ export class ToolsInteract {
|
|
|
240
297
|
return best
|
|
241
298
|
}
|
|
242
299
|
|
|
300
|
+
private static _resolveNearbyActionableControl(
|
|
301
|
+
elements: UiElement[],
|
|
302
|
+
chosen: { el: UiElement, idx: number } | null,
|
|
303
|
+
screen?: UiResolution | null
|
|
304
|
+
): { el: UiElement, idx: number, sliderLike?: boolean } | null {
|
|
305
|
+
if (!chosen) return null
|
|
306
|
+
|
|
307
|
+
const labelBounds = ToolsInteract._normalizeBounds(chosen.el.bounds)
|
|
308
|
+
if (!labelBounds) return null
|
|
309
|
+
|
|
310
|
+
const [labelLeft, labelTop, labelRight, labelBottom] = labelBounds
|
|
311
|
+
const labelWidth = labelRight - labelLeft
|
|
312
|
+
const labelHeight = labelBottom - labelTop
|
|
313
|
+
const screenWidth = Number(screen?.width) > 0 ? Number(screen?.width) : 0
|
|
314
|
+
const screenHeight = Number(screen?.height) > 0 ? Number(screen?.height) : 0
|
|
315
|
+
const minTrackLengthPx = Math.max(
|
|
316
|
+
ToolsInteract._sliderTrackMinLengthPx,
|
|
317
|
+
screenWidth > 0 ? Math.floor(screenWidth * ToolsInteract._sliderTrackLengthRatio) : 0,
|
|
318
|
+
screenHeight > 0 ? Math.floor(screenHeight * ToolsInteract._sliderTrackLengthRatio) : 0
|
|
319
|
+
)
|
|
320
|
+
const maxTrackThicknessPx = Math.max(
|
|
321
|
+
ToolsInteract._sliderTrackMaxThicknessPx,
|
|
322
|
+
screenWidth > 0 ? Math.floor(screenWidth * ToolsInteract._sliderTrackThicknessRatio) : 0,
|
|
323
|
+
screenHeight > 0 ? Math.floor(screenHeight * ToolsInteract._sliderTrackThicknessRatio) : 0
|
|
324
|
+
)
|
|
325
|
+
|
|
326
|
+
let best: { el: UiElement, idx: number, sliderLike?: boolean } | null = null
|
|
327
|
+
let bestScore = Infinity
|
|
328
|
+
|
|
329
|
+
for (let i = chosen.idx + 1; i < Math.min(elements.length, chosen.idx + ToolsInteract._sliderSearchLookahead); i++) {
|
|
330
|
+
const candidate = elements[i]
|
|
331
|
+
if (!candidate || !(candidate.clickable || candidate.focusable) || candidate.visible === false) continue
|
|
332
|
+
|
|
333
|
+
const candidateBounds = ToolsInteract._normalizeBounds(candidate.bounds)
|
|
334
|
+
if (!candidateBounds) continue
|
|
335
|
+
|
|
336
|
+
const [left, top, right] = candidateBounds
|
|
337
|
+
const width = right - left
|
|
338
|
+
const height = candidateBounds[3] - top
|
|
339
|
+
const verticalGap = top - labelBottom
|
|
340
|
+
if (verticalGap < -ToolsInteract._sliderNegativeGapTolerancePx || verticalGap > ToolsInteract._sliderPositiveGapLimitPx) continue
|
|
341
|
+
|
|
342
|
+
const horizontalOverlap = Math.min(labelRight, right) - Math.max(labelLeft, left)
|
|
343
|
+
if (horizontalOverlap < -ToolsInteract._sliderNegativeGapTolerancePx) continue
|
|
344
|
+
|
|
345
|
+
const candidateText = ToolsInteract._normalize(candidate.text ?? candidate.label ?? candidate.value ?? '')
|
|
346
|
+
const candidateContent = ToolsInteract._normalize(candidate.contentDescription ?? candidate.contentDesc ?? candidate.accessibilityLabel ?? '')
|
|
347
|
+
const candidateClass = ToolsInteract._normalize(candidate.type ?? candidate.class ?? '')
|
|
348
|
+
|
|
349
|
+
let score = verticalGap
|
|
350
|
+
const horizontalTrackLike =
|
|
351
|
+
width >= Math.max(minTrackLengthPx, Math.floor(labelWidth * ToolsInteract._sliderLabelWidthRatio)) &&
|
|
352
|
+
height <= maxTrackThicknessPx
|
|
353
|
+
const verticalTrackLike =
|
|
354
|
+
height >= Math.max(minTrackLengthPx, Math.floor(labelHeight * ToolsInteract._sliderLabelWidthRatio)) &&
|
|
355
|
+
width <= maxTrackThicknessPx
|
|
356
|
+
const trackLike = /slider|seek|range/i.test(candidateClass) || horizontalTrackLike || verticalTrackLike
|
|
357
|
+
if (!candidateText && !candidateContent) score -= 18
|
|
358
|
+
if (trackLike) score -= 30
|
|
359
|
+
if (/view|layout|group|frame/i.test(candidateClass)) score -= 10
|
|
360
|
+
if (width > labelWidth * ToolsInteract._sliderLabelWidthRatio) score -= 8
|
|
361
|
+
if (candidateText || candidateContent) score += 20
|
|
362
|
+
|
|
363
|
+
if (score < bestScore) {
|
|
364
|
+
bestScore = score
|
|
365
|
+
best = { el: candidate, idx: i, sliderLike: trackLike }
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
return best
|
|
370
|
+
}
|
|
371
|
+
|
|
243
372
|
|
|
244
373
|
private static async getInteractionService(platform?: 'android' | 'ios', deviceId?: string) {
|
|
245
374
|
const effectivePlatform = platform || 'android'
|
|
@@ -347,6 +476,7 @@ export class ToolsInteract {
|
|
|
347
476
|
|
|
348
477
|
let best: UiElement | null = null
|
|
349
478
|
let bestScore = 0
|
|
479
|
+
let lastTree: any = null
|
|
350
480
|
|
|
351
481
|
const scoreElement = (el: UiElement | null) => {
|
|
352
482
|
if (!el || !el.visible) return 0
|
|
@@ -380,7 +510,8 @@ export class ToolsInteract {
|
|
|
380
510
|
|
|
381
511
|
while (Date.now() <= deadline) {
|
|
382
512
|
try {
|
|
383
|
-
|
|
513
|
+
const tree = await ToolsObserve.getUITreeHandler({ platform, deviceId })
|
|
514
|
+
lastTree = tree
|
|
384
515
|
if (tree && Array.isArray((tree as any).elements)) {
|
|
385
516
|
const elements = ((tree as any).elements as UiElement[])
|
|
386
517
|
for (let i = 0; i < elements.length; i++) {
|
|
@@ -407,8 +538,8 @@ export class ToolsInteract {
|
|
|
407
538
|
|
|
408
539
|
// If the best match is not interactable, try to resolve an actionable ancestor.
|
|
409
540
|
try {
|
|
410
|
-
const
|
|
411
|
-
const
|
|
541
|
+
const elements = (lastTree && Array.isArray(lastTree.elements)) ? (lastTree.elements as UiElement[]) : []
|
|
542
|
+
const screen = lastTree?.resolution && typeof lastTree.resolution === 'object' ? lastTree.resolution as UiResolution : null
|
|
412
543
|
let chosen = best as any
|
|
413
544
|
const childBounds = Array.isArray(chosen?.bounds) ? chosen.bounds : null
|
|
414
545
|
|
|
@@ -465,6 +596,16 @@ export class ToolsInteract {
|
|
|
465
596
|
// small score bump to reflect actionability
|
|
466
597
|
bestScore = Math.min(1, bestScore + 0.02)
|
|
467
598
|
}
|
|
599
|
+
|
|
600
|
+
if (best && !(best.clickable || best.focusable)) {
|
|
601
|
+
const nearbyActionable = ToolsInteract._resolveNearbyActionableControl(elements, { el: best, idx: best._index ?? elements.indexOf(best) }, screen)
|
|
602
|
+
if (nearbyActionable) {
|
|
603
|
+
best = nearbyActionable.el
|
|
604
|
+
best._index = nearbyActionable.idx
|
|
605
|
+
best._interactable = true
|
|
606
|
+
best._sliderLike = nearbyActionable.sliderLike
|
|
607
|
+
}
|
|
608
|
+
}
|
|
468
609
|
} catch (e) { console.error('Error resolving ancestor:', e) }
|
|
469
610
|
|
|
470
611
|
if (!best) return { found: false, error: 'Element not found' }
|
|
@@ -483,8 +624,18 @@ export class ToolsInteract {
|
|
|
483
624
|
tapCoordinates,
|
|
484
625
|
telemetry: {
|
|
485
626
|
matchedIndex: best?._index ?? null,
|
|
486
|
-
matchedInteractable: !!best?._interactable
|
|
627
|
+
matchedInteractable: !!best?._interactable,
|
|
628
|
+
sliderLike: !!best?._sliderLike
|
|
629
|
+
}
|
|
630
|
+
}
|
|
631
|
+
if (best?._sliderLike) {
|
|
632
|
+
const isVertical = !!boundsObj && (boundsObj.bottom - boundsObj.top) > (boundsObj.right - boundsObj.left)
|
|
633
|
+
const interactionHint = {
|
|
634
|
+
kind: 'slider',
|
|
635
|
+
axis: isVertical ? 'vertical' : 'horizontal',
|
|
636
|
+
trackBounds: boundsObj
|
|
487
637
|
}
|
|
638
|
+
;(outEl as any).interactionHint = interactionHint
|
|
488
639
|
}
|
|
489
640
|
const scoreVal = Math.min(1, Number(bestScore.toFixed(3)))
|
|
490
641
|
return { found: true, element: outEl, score: scoreVal, confidence: scoreVal }
|
|
@@ -888,7 +1039,8 @@ export class ToolsInteract {
|
|
|
888
1039
|
accessibility_id: result.element.accessibility_id ?? null,
|
|
889
1040
|
class: result.element.class ?? null,
|
|
890
1041
|
bounds: result.element.bounds ?? null,
|
|
891
|
-
index: typeof result.element.index === 'number' ? result.element.index : null
|
|
1042
|
+
index: typeof result.element.index === 'number' ? result.element.index : null,
|
|
1043
|
+
state: (result.element as any).state ?? null
|
|
892
1044
|
},
|
|
893
1045
|
observed: {
|
|
894
1046
|
status: result.status,
|
|
@@ -902,7 +1054,8 @@ export class ToolsInteract {
|
|
|
902
1054
|
accessibility_id: result.element.accessibility_id ?? null,
|
|
903
1055
|
class: result.element.class ?? null,
|
|
904
1056
|
bounds: result.element.bounds ?? null,
|
|
905
|
-
index: typeof result.element.index === 'number' ? result.element.index : null
|
|
1057
|
+
index: typeof result.element.index === 'number' ? result.element.index : null,
|
|
1058
|
+
state: (result.element as any).state ?? null
|
|
906
1059
|
}
|
|
907
1060
|
},
|
|
908
1061
|
reason: 'selector is visible'
|
|
@@ -928,6 +1081,198 @@ export class ToolsInteract {
|
|
|
928
1081
|
}
|
|
929
1082
|
}
|
|
930
1083
|
|
|
1084
|
+
static async expectStateHandler({
|
|
1085
|
+
selector,
|
|
1086
|
+
element_id,
|
|
1087
|
+
property,
|
|
1088
|
+
expected,
|
|
1089
|
+
platform,
|
|
1090
|
+
deviceId
|
|
1091
|
+
}: {
|
|
1092
|
+
selector?: { text?: string, resource_id?: string, accessibility_id?: string, contains?: boolean },
|
|
1093
|
+
element_id?: string,
|
|
1094
|
+
property: string,
|
|
1095
|
+
expected: boolean | number | string | Record<string, unknown>,
|
|
1096
|
+
platform?: 'android' | 'ios',
|
|
1097
|
+
deviceId?: string
|
|
1098
|
+
}): Promise<ExpectStateResponse> {
|
|
1099
|
+
const tree = await ToolsObserve.getUITreeHandler({ platform, deviceId }) as any
|
|
1100
|
+
const elements = Array.isArray(tree?.elements) ? tree.elements as UiElement[] : []
|
|
1101
|
+
const treePlatform = tree?.device?.platform === 'ios' ? 'ios' : (platform || 'android')
|
|
1102
|
+
const treeDeviceId = tree?.device?.id || deviceId
|
|
1103
|
+
|
|
1104
|
+
let matched: { el: UiElement, idx: number } | null = null
|
|
1105
|
+
|
|
1106
|
+
if (element_id) {
|
|
1107
|
+
const resolved = ToolsInteract._resolvedUiElements.get(element_id)
|
|
1108
|
+
if (resolved) {
|
|
1109
|
+
const current = ToolsInteract._findCurrentResolvedElement(elements, treePlatform, treeDeviceId, resolved)
|
|
1110
|
+
if (current) matched = { el: current.el, idx: current.index }
|
|
1111
|
+
}
|
|
1112
|
+
}
|
|
1113
|
+
|
|
1114
|
+
if (!matched && selector) {
|
|
1115
|
+
matched = ToolsInteract._findFirstMatchingElement(elements, selector)
|
|
1116
|
+
}
|
|
1117
|
+
|
|
1118
|
+
if (!matched) {
|
|
1119
|
+
return {
|
|
1120
|
+
success: false,
|
|
1121
|
+
selector,
|
|
1122
|
+
element_id: element_id ?? null,
|
|
1123
|
+
expected_state: { property, expected },
|
|
1124
|
+
reason: 'element not found',
|
|
1125
|
+
failure_code: 'ELEMENT_NOT_FOUND',
|
|
1126
|
+
retryable: true
|
|
1127
|
+
}
|
|
1128
|
+
}
|
|
1129
|
+
|
|
1130
|
+
const resolvedElement = ToolsInteract._resolvedTargetFromElement(
|
|
1131
|
+
ToolsInteract._computeElementId(treePlatform, treeDeviceId, matched.el, matched.idx),
|
|
1132
|
+
matched.el,
|
|
1133
|
+
matched.idx
|
|
1134
|
+
)
|
|
1135
|
+
const observedState = matched.el.state ?? null
|
|
1136
|
+
const actual = observedState?.[property as keyof UIElementState] ?? null
|
|
1137
|
+
|
|
1138
|
+
const compareBoolean = (value: unknown) => typeof value === 'boolean' ? value : null
|
|
1139
|
+
const compareString = (value: unknown) => typeof value === 'string' ? value : null
|
|
1140
|
+
const compareNumber = (value: unknown) => typeof value === 'number' && Number.isFinite(value) ? value : null
|
|
1141
|
+
|
|
1142
|
+
let success = false
|
|
1143
|
+
let reason = ''
|
|
1144
|
+
let rawValue: boolean | number | string | null = null
|
|
1145
|
+
let observedValue: boolean | number | string | Record<string, unknown> | null = actual as any
|
|
1146
|
+
|
|
1147
|
+
switch (property) {
|
|
1148
|
+
case 'checked':
|
|
1149
|
+
case 'focused':
|
|
1150
|
+
case 'expanded':
|
|
1151
|
+
case 'enabled': {
|
|
1152
|
+
const expectedBool = compareBoolean(expected)
|
|
1153
|
+
const actualBool = compareBoolean(actual)
|
|
1154
|
+
if (expectedBool === null) {
|
|
1155
|
+
reason = `expected ${property} must be boolean`
|
|
1156
|
+
} else if (actualBool === null) {
|
|
1157
|
+
reason = `${property} state unavailable`
|
|
1158
|
+
} else {
|
|
1159
|
+
rawValue = actualBool
|
|
1160
|
+
success = actualBool === expectedBool
|
|
1161
|
+
reason = success ? `${property} matches expected value` : `expected ${property}=${expectedBool} but observed ${actualBool}`
|
|
1162
|
+
}
|
|
1163
|
+
observedValue = actualBool
|
|
1164
|
+
break
|
|
1165
|
+
}
|
|
1166
|
+
case 'value':
|
|
1167
|
+
case 'raw_value': {
|
|
1168
|
+
const expectedNumber = compareNumber(expected)
|
|
1169
|
+
const actualNumber = compareNumber(actual)
|
|
1170
|
+
if (expectedNumber !== null && actualNumber !== null) {
|
|
1171
|
+
success = actualNumber === expectedNumber
|
|
1172
|
+
rawValue = actualNumber
|
|
1173
|
+
observedValue = actualNumber
|
|
1174
|
+
reason = success ? 'value matches expected value' : `expected value=${expectedNumber} but observed ${actualNumber}`
|
|
1175
|
+
break
|
|
1176
|
+
}
|
|
1177
|
+
const expectedString = typeof expected === 'string' ? expected : null
|
|
1178
|
+
const actualString = compareString(actual)
|
|
1179
|
+
if (expectedString !== null && actualString !== null) {
|
|
1180
|
+
success = actualString === expectedString
|
|
1181
|
+
rawValue = actualString
|
|
1182
|
+
observedValue = actualString
|
|
1183
|
+
reason = success ? 'value matches expected value' : `expected value=${expectedString} but observed ${actualString}`
|
|
1184
|
+
} else {
|
|
1185
|
+
reason = 'value state unavailable'
|
|
1186
|
+
}
|
|
1187
|
+
break
|
|
1188
|
+
}
|
|
1189
|
+
case 'selected': {
|
|
1190
|
+
const expectedBool = typeof expected === 'boolean' ? expected : null
|
|
1191
|
+
const expectedString = typeof expected === 'string'
|
|
1192
|
+
? expected
|
|
1193
|
+
: expected && typeof expected === 'object'
|
|
1194
|
+
? String((expected as { id?: unknown; label?: unknown }).id ?? (expected as { id?: unknown; label?: unknown }).label ?? '')
|
|
1195
|
+
: null
|
|
1196
|
+
if (!observedState || observedState.selected === undefined || observedState.selected === null) {
|
|
1197
|
+
reason = 'selected state unavailable'
|
|
1198
|
+
break
|
|
1199
|
+
}
|
|
1200
|
+
if (expectedBool !== null) {
|
|
1201
|
+
const actualBool = typeof observedState.selected === 'boolean' ? observedState.selected : null
|
|
1202
|
+
if (actualBool === null) {
|
|
1203
|
+
reason = 'selected state is not boolean'
|
|
1204
|
+
break
|
|
1205
|
+
}
|
|
1206
|
+
rawValue = actualBool
|
|
1207
|
+
observedValue = actualBool
|
|
1208
|
+
success = actualBool === expectedBool
|
|
1209
|
+
reason = success ? 'selected matches expected value' : `expected selected=${expectedBool} but observed ${actualBool}`
|
|
1210
|
+
break
|
|
1211
|
+
}
|
|
1212
|
+
const actualSelected = typeof observedState.selected === 'object' && observedState.selected !== null
|
|
1213
|
+
? String((observedState.selected as { id?: unknown; label?: unknown }).id ?? (observedState.selected as { id?: unknown; label?: unknown }).label ?? '')
|
|
1214
|
+
: String(observedState.selected)
|
|
1215
|
+
const actualString = actualSelected.trim()
|
|
1216
|
+
if (!expectedString) {
|
|
1217
|
+
reason = 'expected selected must be boolean, string, or object with id/label'
|
|
1218
|
+
break
|
|
1219
|
+
}
|
|
1220
|
+
rawValue = actualString
|
|
1221
|
+
observedValue = actualString
|
|
1222
|
+
success = actualString === expectedString
|
|
1223
|
+
reason = success ? 'selected matches expected value' : `expected selected=${expectedString} but observed ${actualString}`
|
|
1224
|
+
break
|
|
1225
|
+
}
|
|
1226
|
+
case 'text_value': {
|
|
1227
|
+
const expectedString = typeof expected === 'string' ? expected : null
|
|
1228
|
+
const actualString = compareString(actual)
|
|
1229
|
+
if (!expectedString) {
|
|
1230
|
+
reason = 'expected text_value must be string'
|
|
1231
|
+
} else if (!actualString) {
|
|
1232
|
+
reason = 'text_value state unavailable'
|
|
1233
|
+
} else {
|
|
1234
|
+
success = actualString === expectedString
|
|
1235
|
+
rawValue = actualString
|
|
1236
|
+
observedValue = actualString
|
|
1237
|
+
reason = success ? 'text_value matches expected value' : `expected text_value=${expectedString} but observed ${actualString}`
|
|
1238
|
+
}
|
|
1239
|
+
break
|
|
1240
|
+
}
|
|
1241
|
+
default: {
|
|
1242
|
+
if (actual !== null && actual !== undefined) {
|
|
1243
|
+
success = actual === expected
|
|
1244
|
+
observedValue = actual as any
|
|
1245
|
+
rawValue = typeof actual === 'string' || typeof actual === 'number' || typeof actual === 'boolean' ? actual : null
|
|
1246
|
+
reason = success ? `${property} matches expected value` : `expected ${property} to match but observed ${String(actual)}`
|
|
1247
|
+
} else {
|
|
1248
|
+
reason = `unsupported or unavailable state property: ${property}`
|
|
1249
|
+
}
|
|
1250
|
+
}
|
|
1251
|
+
}
|
|
1252
|
+
|
|
1253
|
+
if (!success && !reason) {
|
|
1254
|
+
reason = `${property} did not match expected value`
|
|
1255
|
+
}
|
|
1256
|
+
|
|
1257
|
+
return {
|
|
1258
|
+
success,
|
|
1259
|
+
selector,
|
|
1260
|
+
element_id: element_id ?? resolvedElement.elementId,
|
|
1261
|
+
expected_state: { property, expected },
|
|
1262
|
+
element: {
|
|
1263
|
+
...resolvedElement,
|
|
1264
|
+
state: observedState
|
|
1265
|
+
},
|
|
1266
|
+
observed_state: {
|
|
1267
|
+
property,
|
|
1268
|
+
value: observedValue,
|
|
1269
|
+
...(rawValue !== null ? { raw_value: rawValue } : {})
|
|
1270
|
+
},
|
|
1271
|
+
reason,
|
|
1272
|
+
...(success ? {} : { failure_code: 'UNKNOWN', retryable: false })
|
|
1273
|
+
}
|
|
1274
|
+
}
|
|
1275
|
+
|
|
931
1276
|
static async waitForUICore({ type = 'ui', query, timeoutMs = 30000, pollIntervalMs = 300, includeSnapshotOnFailure = true, match = 'present', stability_ms = 700, observationDelayMs = 0, platform, deviceId }: { type?: 'ui' | 'log' | 'screen' | 'idle', query?: string, timeoutMs?: number, pollIntervalMs?: number, includeSnapshotOnFailure?: boolean, match?: 'present'|'absent', stability_ms?: number, observationDelayMs?: number, platform?: 'android' | 'ios', deviceId?: string }) {
|
|
932
1277
|
const start = Date.now()
|
|
933
1278
|
const deadline = start + (timeoutMs || 0)
|
package/src/observe/ios.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { spawn } from "child_process"
|
|
2
2
|
import { promises as fs } from "fs"
|
|
3
|
-
import { GetLogsResponse, CaptureIOSScreenshotResponse, GetUITreeResponse, UIElement, DeviceInfo } from "../types.js"
|
|
3
|
+
import { GetLogsResponse, CaptureIOSScreenshotResponse, GetUITreeResponse, UIElement, DeviceInfo, UIElementState } from "../types.js"
|
|
4
4
|
import { execCommand, getIOSDeviceMetadata, validateBundleId, getIdbCmd, getXcrunCmd, isIDBInstalled } from "../utils/ios/utils.js"
|
|
5
5
|
import { createWriteStream, promises as fsPromises } from 'fs'
|
|
6
6
|
import path from 'path'
|
|
@@ -56,7 +56,64 @@ function getCenter(bounds: [number, number, number, number]): [number, number] {
|
|
|
56
56
|
return [Math.floor((x1 + x2) / 2), Math.floor((y1 + y2) / 2)];
|
|
57
57
|
}
|
|
58
58
|
|
|
59
|
-
function
|
|
59
|
+
function parseIOSNumber(value: unknown): number | null {
|
|
60
|
+
if (typeof value === 'number' && Number.isFinite(value)) return value
|
|
61
|
+
if (typeof value !== 'string') return null
|
|
62
|
+
const parsed = Number(value)
|
|
63
|
+
return Number.isFinite(parsed) ? parsed : null
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
function isIOSAdjustable(node: IDBElement, type: string, traits: string[]): boolean {
|
|
67
|
+
return /slider|adjustable|stepper|progress/i.test(type) || traits.some((trait) => /adjustable|slider|progress/i.test(trait))
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
function extractIOSState(node: IDBElement, type: string, label: string | null, value: string | null, traits: string[]): UIElementState | null {
|
|
71
|
+
const state: UIElementState = {}
|
|
72
|
+
const normalizedTraits = traits.map((trait) => String(trait).toLowerCase())
|
|
73
|
+
|
|
74
|
+
if (normalizedTraits.some((trait) => /selected/.test(trait))) {
|
|
75
|
+
state.selected = label || value || true
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
if (normalizedTraits.some((trait) => /focused/.test(trait))) {
|
|
79
|
+
state.focused = true
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
if (normalizedTraits.some((trait) => /enabled/.test(trait))) {
|
|
83
|
+
state.enabled = true
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
if (normalizedTraits.some((trait) => /disabled/.test(trait))) {
|
|
87
|
+
state.enabled = false
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
if (value && /textfield|search|text/i.test(type)) {
|
|
91
|
+
state.text_value = value
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
if (isIOSAdjustable(node, type, traits)) {
|
|
95
|
+
const rawValue = parseIOSNumber(value)
|
|
96
|
+
if (rawValue !== null) {
|
|
97
|
+
state.raw_value = rawValue
|
|
98
|
+
state.value = rawValue >= 0 && rawValue <= 1 ? Math.round(rawValue * 100) : rawValue
|
|
99
|
+
} else if (value) {
|
|
100
|
+
state.raw_value = value
|
|
101
|
+
state.value = value
|
|
102
|
+
}
|
|
103
|
+
} else if (value) {
|
|
104
|
+
const numericValue = parseIOSNumber(value)
|
|
105
|
+
if (numericValue !== null) {
|
|
106
|
+
state.value = numericValue
|
|
107
|
+
state.raw_value = numericValue
|
|
108
|
+
} else {
|
|
109
|
+
state.value = value
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
return Object.keys(state).length > 0 ? state : null
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
export function traverseIDBNode(node: IDBElement, elements: UIElement[], parentIndex: number = -1, depth: number = 0): number {
|
|
60
117
|
if (!node) return -1;
|
|
61
118
|
|
|
62
119
|
let currentIndex = -1;
|
|
@@ -66,6 +123,7 @@ function traverseIDBNode(node: IDBElement, elements: UIElement[], parentIndex: n
|
|
|
66
123
|
const value = node.AXValue || null;
|
|
67
124
|
const frame = node.AXFrame || node.frame;
|
|
68
125
|
const traits = node.AXTraits || [];
|
|
126
|
+
const state = extractIOSState(node, type, label, value, traits);
|
|
69
127
|
|
|
70
128
|
const clickable = traits.includes("UIAccessibilityTraitButton") || type === "Button" || type === "Cell";
|
|
71
129
|
|
|
@@ -83,7 +141,8 @@ function traverseIDBNode(node: IDBElement, elements: UIElement[], parentIndex: n
|
|
|
83
141
|
visible: true,
|
|
84
142
|
bounds: bounds,
|
|
85
143
|
center: getCenter(bounds),
|
|
86
|
-
depth: depth
|
|
144
|
+
depth: depth,
|
|
145
|
+
state
|
|
87
146
|
};
|
|
88
147
|
|
|
89
148
|
if (parentIndex !== -1) {
|
package/src/server/common.ts
CHANGED
|
@@ -96,7 +96,8 @@ export function normalizeResolvedTarget(value: Partial<ActionTargetResolved> | n
|
|
|
96
96
|
accessibility_id: value.accessibility_id ?? null,
|
|
97
97
|
class: value.class ?? null,
|
|
98
98
|
bounds: value.bounds ?? null,
|
|
99
|
-
index: value.index ?? null
|
|
99
|
+
index: value.index ?? null,
|
|
100
|
+
state: value.state ?? null
|
|
100
101
|
}
|
|
101
102
|
}
|
|
102
103
|
|
|
@@ -468,6 +468,61 @@ Failure Handling:
|
|
|
468
468
|
required: ['selector']
|
|
469
469
|
}
|
|
470
470
|
},
|
|
471
|
+
{
|
|
472
|
+
name: 'expect_state',
|
|
473
|
+
description: `Purpose:
|
|
474
|
+
Verify a readable UI state property on the currently visible element.
|
|
475
|
+
|
|
476
|
+
Inputs:
|
|
477
|
+
- selector or element_id
|
|
478
|
+
- property
|
|
479
|
+
- expected
|
|
480
|
+
- platform/deviceId (optional)
|
|
481
|
+
|
|
482
|
+
Supported properties:
|
|
483
|
+
- checked, selected, focused, expanded, enabled, text_value, value, raw_value
|
|
484
|
+
|
|
485
|
+
Verification Guidance:
|
|
486
|
+
- Use this when the UI element is visible but its state must also be confirmed
|
|
487
|
+
- Prefer the canonical property names above
|
|
488
|
+
- The tool compares the normalized readable state and returns the observed value when available
|
|
489
|
+
|
|
490
|
+
Constraints:
|
|
491
|
+
- Returns structured success/failure only
|
|
492
|
+
- Does not infer a state when the property is unavailable
|
|
493
|
+
|
|
494
|
+
Failure Handling:
|
|
495
|
+
- ELEMENT_NOT_FOUND → re-resolve the element or wait for UI stabilization
|
|
496
|
+
- UNKNOWN → capture a snapshot and stop`,
|
|
497
|
+
inputSchema: {
|
|
498
|
+
type: 'object',
|
|
499
|
+
properties: {
|
|
500
|
+
selector: {
|
|
501
|
+
type: 'object',
|
|
502
|
+
properties: {
|
|
503
|
+
text: { type: 'string' },
|
|
504
|
+
resource_id: { type: 'string' },
|
|
505
|
+
accessibility_id: { type: 'string' },
|
|
506
|
+
contains: { type: 'boolean', default: false }
|
|
507
|
+
}
|
|
508
|
+
},
|
|
509
|
+
element_id: { type: 'string', description: 'Optional previously resolved element identifier.' },
|
|
510
|
+
property: { type: 'string', description: 'Readable state property to verify.' },
|
|
511
|
+
expected: {
|
|
512
|
+
description: 'Expected normalized state value.',
|
|
513
|
+
oneOf: [
|
|
514
|
+
{ type: 'boolean' },
|
|
515
|
+
{ type: 'number' },
|
|
516
|
+
{ type: 'string' },
|
|
517
|
+
{ type: 'object' }
|
|
518
|
+
]
|
|
519
|
+
},
|
|
520
|
+
platform: { type: 'string', enum: ['android', 'ios'], description: 'Optional platform override' },
|
|
521
|
+
deviceId: { type: 'string', description: 'Optional device serial/udid' }
|
|
522
|
+
},
|
|
523
|
+
required: ['property', 'expected']
|
|
524
|
+
}
|
|
525
|
+
},
|
|
471
526
|
{
|
|
472
527
|
name: 'wait_for_ui',
|
|
473
528
|
description: `Purpose:
|
|
@@ -258,6 +258,23 @@ async function handleExpectElementVisible(args: ToolCallArgs) {
|
|
|
258
258
|
return wrapResponse(res)
|
|
259
259
|
}
|
|
260
260
|
|
|
261
|
+
async function handleExpectState(args: ToolCallArgs) {
|
|
262
|
+
const selector = getObjectArg<ExpectElementSelectorArg>(args, 'selector')
|
|
263
|
+
const element_id = getStringArg(args, 'element_id')
|
|
264
|
+
const property = requireStringArg(args, 'property')
|
|
265
|
+
const platform = getStringArg(args, 'platform') as PlatformArg | undefined
|
|
266
|
+
const deviceId = getStringArg(args, 'deviceId')
|
|
267
|
+
if (!selector && !element_id) {
|
|
268
|
+
throw new Error('Missing selector or element_id argument')
|
|
269
|
+
}
|
|
270
|
+
if (!Object.prototype.hasOwnProperty.call(args, 'expected')) {
|
|
271
|
+
throw new Error('Missing expected argument')
|
|
272
|
+
}
|
|
273
|
+
const expected = args.expected as boolean | number | string | Record<string, unknown>
|
|
274
|
+
const res = await ToolsInteract.expectStateHandler({ selector: selector ?? undefined, element_id: element_id ?? undefined, property, expected, platform, deviceId })
|
|
275
|
+
return wrapResponse(res)
|
|
276
|
+
}
|
|
277
|
+
|
|
261
278
|
async function handleWaitForUI(args: ToolCallArgs) {
|
|
262
279
|
const selector = getObjectArg<ExpectElementSelectorArg>(args, 'selector')
|
|
263
280
|
const condition = (getStringArg(args, 'condition') as 'exists' | 'not_exists' | 'visible' | 'clickable' | undefined) ?? 'exists'
|
|
@@ -458,6 +475,7 @@ export const toolHandlers: Record<string, ToolHandler> = {
|
|
|
458
475
|
wait_for_screen_change: handleWaitForScreenChange,
|
|
459
476
|
expect_screen: handleExpectScreen,
|
|
460
477
|
expect_element_visible: handleExpectElementVisible,
|
|
478
|
+
expect_state: handleExpectState,
|
|
461
479
|
wait_for_ui: handleWaitForUI,
|
|
462
480
|
find_element: handleFindElement,
|
|
463
481
|
tap: handleTap,
|