mobile-debug-mcp 0.24.8 → 0.25.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/interact/index.js +240 -4
- package/dist/observe/ios.js +126 -3
- package/dist/server/common.js +2 -1
- package/dist/server/tool-definitions.js +55 -0
- package/dist/server/tool-handlers.js +17 -0
- package/dist/server-core.js +1 -1
- package/dist/utils/android/utils.js +134 -3
- package/docs/CHANGELOG.md +9 -0
- package/docs/ROADMAP.md +406 -0
- package/docs/rfcs/001-state-verification.md +452 -0
- package/docs/rfcs/002-richer-element-identity +400 -0
- package/docs/rfcs/003-wait-and-synchronization-reliability +232 -0
- package/docs/specs/mcp-tooling-spec-v1.md +5 -0
- package/docs/tools/interact.md +25 -0
- package/docs/tools/observe.md +3 -1
- package/package.json +1 -1
- package/src/interact/index.ts +272 -4
- package/src/observe/index.ts +6 -0
- package/src/observe/ios.ts +129 -4
- package/src/server/common.ts +2 -1
- package/src/server/tool-definitions.ts +55 -0
- package/src/server/tool-handlers.ts +18 -0
- package/src/server-core.ts +1 -1
- package/src/types.ts +67 -1
- package/src/utils/android/utils.ts +126 -4
- package/test/unit/observe/state_extraction.test.ts +90 -0
- package/test/unit/server/response_shapes.test.ts +40 -2
package/docs/tools/observe.md
CHANGED
|
@@ -83,11 +83,13 @@ Input:
|
|
|
83
83
|
Response (example):
|
|
84
84
|
|
|
85
85
|
```json
|
|
86
|
-
{ "device": { "platform": "android", "id": "emulator-5554" }, "screen": "", "resolution": { "width": 1080, "height": 2400 }, "elements": [ { "text": "Sign in", "type": "android.widget.Button", "resourceId": "com.example:id/signin", "clickable": true, "bounds": [0,0,100,50] } ] }
|
|
86
|
+
{ "device": { "platform": "android", "id": "emulator-5554" }, "screen": "", "resolution": { "width": 1080, "height": 2400 }, "elements": [ { "text": "Sign in", "type": "android.widget.Button", "resourceId": "com.example:id/signin", "clickable": true, "bounds": [0,0,100,50], "state": { "enabled": true }, "stable_id": "com.example:id/signin", "role": "button", "test_tag": "com.example:id/signin", "selector": { "value": "com.example:id/signin", "confidence": { "score": 1, "reason": "resource_id" } }, "semantic": { "is_clickable": true, "is_container": false } } ] }
|
|
87
87
|
```
|
|
88
88
|
|
|
89
89
|
Notes:
|
|
90
90
|
- Useful for inspection, selector development, and fallback debugging.
|
|
91
|
+
- Elements may include a normalized `state` object when the platform exposes readable state such as checked, selected, focused, expanded, text input, or slider values.
|
|
92
|
+
- Elements may also include platform-native identity hints such as `stable_id`, `role`, `test_tag`, `selector`, and `semantic`.
|
|
91
93
|
- Prefer `wait_for_ui` for deterministic element resolution in interactive flows.
|
|
92
94
|
|
|
93
95
|
---
|
package/package.json
CHANGED
package/src/interact/index.ts
CHANGED
|
@@ -10,7 +10,9 @@ import type {
|
|
|
10
10
|
ActionFailureCode,
|
|
11
11
|
ActionTargetResolved,
|
|
12
12
|
ExpectElementVisibleResponse,
|
|
13
|
+
ExpectStateResponse,
|
|
13
14
|
ExpectScreenResponse,
|
|
15
|
+
UIElementState,
|
|
14
16
|
TapElementResponse
|
|
15
17
|
} from '../types.js'
|
|
16
18
|
|
|
@@ -37,6 +39,12 @@ interface UiElement {
|
|
|
37
39
|
_index?: number
|
|
38
40
|
_interactable?: boolean
|
|
39
41
|
_sliderLike?: boolean
|
|
42
|
+
state?: UIElementState | null
|
|
43
|
+
stable_id?: string | null
|
|
44
|
+
role?: string | null
|
|
45
|
+
test_tag?: string | null
|
|
46
|
+
selector?: { value: string | null, confidence: { score: number, reason: string } | null } | null
|
|
47
|
+
semantic?: { is_clickable: boolean, is_container: boolean } | null
|
|
40
48
|
}
|
|
41
49
|
|
|
42
50
|
interface ResolvedUiElementContext {
|
|
@@ -77,6 +85,45 @@ export class ToolsInteract {
|
|
|
77
85
|
return normalized as [number, number, number, number]
|
|
78
86
|
}
|
|
79
87
|
|
|
88
|
+
private static _matchesSelector(el: UiElement, selector?: { text?: string, resource_id?: string, accessibility_id?: string, contains?: boolean }): boolean {
|
|
89
|
+
if (!selector) return false
|
|
90
|
+
const normalize = ToolsInteract._normalize
|
|
91
|
+
const containsFlag = !!selector.contains
|
|
92
|
+
const text = normalize(el.text ?? el.label ?? el.value ?? '')
|
|
93
|
+
const resourceId = normalize(el.resourceId ?? el.resourceID ?? el.id ?? '')
|
|
94
|
+
const accessibilityId = normalize(el.contentDescription ?? el.contentDesc ?? el.accessibilityLabel ?? el.label ?? '')
|
|
95
|
+
|
|
96
|
+
if (selector.text !== undefined && selector.text !== null) {
|
|
97
|
+
const q = normalize(selector.text)
|
|
98
|
+
if (containsFlag ? !text.includes(q) : text !== q) return false
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
if (selector.resource_id !== undefined && selector.resource_id !== null) {
|
|
102
|
+
const q = normalize(selector.resource_id)
|
|
103
|
+
if (containsFlag ? !resourceId.includes(q) : resourceId !== q) return false
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
if (selector.accessibility_id !== undefined && selector.accessibility_id !== null) {
|
|
107
|
+
const q = normalize(selector.accessibility_id)
|
|
108
|
+
if (containsFlag ? !accessibilityId.includes(q) : accessibilityId !== q) return false
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
return true
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
private static _findFirstMatchingElement(
|
|
115
|
+
elements: UiElement[],
|
|
116
|
+
selector?: { text?: string, resource_id?: string, accessibility_id?: string, contains?: boolean }
|
|
117
|
+
): { el: UiElement, idx: number } | null {
|
|
118
|
+
if (!selector) return null
|
|
119
|
+
for (let i = 0; i < elements.length; i++) {
|
|
120
|
+
const el = elements[i]
|
|
121
|
+
if (!el) continue
|
|
122
|
+
if (ToolsInteract._matchesSelector(el, selector)) return { el, idx: i }
|
|
123
|
+
}
|
|
124
|
+
return null
|
|
125
|
+
}
|
|
126
|
+
|
|
80
127
|
private static _isVisibleElement(el: UiElement): boolean {
|
|
81
128
|
const bounds = ToolsInteract._normalizeBounds(el.bounds)
|
|
82
129
|
return !!el.visible && !!bounds && bounds[2] > bounds[0] && bounds[3] > bounds[1]
|
|
@@ -115,7 +162,13 @@ export class ToolsInteract {
|
|
|
115
162
|
class: el.type ?? el.class ?? null,
|
|
116
163
|
bounds,
|
|
117
164
|
index,
|
|
118
|
-
elementId
|
|
165
|
+
elementId,
|
|
166
|
+
state: el.state ?? null,
|
|
167
|
+
stable_id: el.stable_id ?? null,
|
|
168
|
+
role: el.role ?? null,
|
|
169
|
+
test_tag: el.test_tag ?? null,
|
|
170
|
+
selector: el.selector ?? null,
|
|
171
|
+
semantic: el.semantic ?? null
|
|
119
172
|
}
|
|
120
173
|
}
|
|
121
174
|
|
|
@@ -154,7 +207,13 @@ export class ToolsInteract {
|
|
|
154
207
|
accessibility_id: element.contentDescription ?? element.contentDesc ?? element.accessibilityLabel ?? element.label ?? null,
|
|
155
208
|
class: element.type ?? element.class ?? null,
|
|
156
209
|
bounds: ToolsInteract._normalizeBounds(element.bounds),
|
|
157
|
-
index
|
|
210
|
+
index,
|
|
211
|
+
state: element.state ?? null,
|
|
212
|
+
stable_id: element.stable_id ?? null,
|
|
213
|
+
role: element.role ?? null,
|
|
214
|
+
test_tag: element.test_tag ?? null,
|
|
215
|
+
selector: element.selector ?? null,
|
|
216
|
+
semantic: element.semantic ?? null
|
|
158
217
|
}
|
|
159
218
|
}
|
|
160
219
|
|
|
@@ -578,6 +637,11 @@ export class ToolsInteract {
|
|
|
578
637
|
bounds: boundsObj,
|
|
579
638
|
clickable: !!best.clickable,
|
|
580
639
|
enabled: !!best.enabled,
|
|
640
|
+
stable_id: best.stable_id ?? null,
|
|
641
|
+
role: best.role ?? null,
|
|
642
|
+
test_tag: best.test_tag ?? null,
|
|
643
|
+
selector: best.selector ?? null,
|
|
644
|
+
semantic: best.semantic ?? null,
|
|
581
645
|
tapCoordinates,
|
|
582
646
|
telemetry: {
|
|
583
647
|
matchedIndex: best?._index ?? null,
|
|
@@ -996,7 +1060,13 @@ export class ToolsInteract {
|
|
|
996
1060
|
accessibility_id: result.element.accessibility_id ?? null,
|
|
997
1061
|
class: result.element.class ?? null,
|
|
998
1062
|
bounds: result.element.bounds ?? null,
|
|
999
|
-
index: typeof result.element.index === 'number' ? result.element.index : null
|
|
1063
|
+
index: typeof result.element.index === 'number' ? result.element.index : null,
|
|
1064
|
+
state: (result.element as any).state ?? null,
|
|
1065
|
+
stable_id: (result.element as any).stable_id ?? null,
|
|
1066
|
+
role: (result.element as any).role ?? null,
|
|
1067
|
+
test_tag: (result.element as any).test_tag ?? null,
|
|
1068
|
+
selector: (result.element as any).selector ?? null,
|
|
1069
|
+
semantic: (result.element as any).semantic ?? null
|
|
1000
1070
|
},
|
|
1001
1071
|
observed: {
|
|
1002
1072
|
status: result.status,
|
|
@@ -1010,7 +1080,13 @@ export class ToolsInteract {
|
|
|
1010
1080
|
accessibility_id: result.element.accessibility_id ?? null,
|
|
1011
1081
|
class: result.element.class ?? null,
|
|
1012
1082
|
bounds: result.element.bounds ?? null,
|
|
1013
|
-
index: typeof result.element.index === 'number' ? result.element.index : null
|
|
1083
|
+
index: typeof result.element.index === 'number' ? result.element.index : null,
|
|
1084
|
+
state: (result.element as any).state ?? null,
|
|
1085
|
+
stable_id: (result.element as any).stable_id ?? null,
|
|
1086
|
+
role: (result.element as any).role ?? null,
|
|
1087
|
+
test_tag: (result.element as any).test_tag ?? null,
|
|
1088
|
+
selector: (result.element as any).selector ?? null,
|
|
1089
|
+
semantic: (result.element as any).semantic ?? null
|
|
1014
1090
|
}
|
|
1015
1091
|
},
|
|
1016
1092
|
reason: 'selector is visible'
|
|
@@ -1036,6 +1112,198 @@ export class ToolsInteract {
|
|
|
1036
1112
|
}
|
|
1037
1113
|
}
|
|
1038
1114
|
|
|
1115
|
+
static async expectStateHandler({
|
|
1116
|
+
selector,
|
|
1117
|
+
element_id,
|
|
1118
|
+
property,
|
|
1119
|
+
expected,
|
|
1120
|
+
platform,
|
|
1121
|
+
deviceId
|
|
1122
|
+
}: {
|
|
1123
|
+
selector?: { text?: string, resource_id?: string, accessibility_id?: string, contains?: boolean },
|
|
1124
|
+
element_id?: string,
|
|
1125
|
+
property: string,
|
|
1126
|
+
expected: boolean | number | string | Record<string, unknown>,
|
|
1127
|
+
platform?: 'android' | 'ios',
|
|
1128
|
+
deviceId?: string
|
|
1129
|
+
}): Promise<ExpectStateResponse> {
|
|
1130
|
+
const tree = await ToolsObserve.getUITreeHandler({ platform, deviceId }) as any
|
|
1131
|
+
const elements = Array.isArray(tree?.elements) ? tree.elements as UiElement[] : []
|
|
1132
|
+
const treePlatform = tree?.device?.platform === 'ios' ? 'ios' : (platform || 'android')
|
|
1133
|
+
const treeDeviceId = tree?.device?.id || deviceId
|
|
1134
|
+
|
|
1135
|
+
let matched: { el: UiElement, idx: number } | null = null
|
|
1136
|
+
|
|
1137
|
+
if (element_id) {
|
|
1138
|
+
const resolved = ToolsInteract._resolvedUiElements.get(element_id)
|
|
1139
|
+
if (resolved) {
|
|
1140
|
+
const current = ToolsInteract._findCurrentResolvedElement(elements, treePlatform, treeDeviceId, resolved)
|
|
1141
|
+
if (current) matched = { el: current.el, idx: current.index }
|
|
1142
|
+
}
|
|
1143
|
+
}
|
|
1144
|
+
|
|
1145
|
+
if (!matched && selector) {
|
|
1146
|
+
matched = ToolsInteract._findFirstMatchingElement(elements, selector)
|
|
1147
|
+
}
|
|
1148
|
+
|
|
1149
|
+
if (!matched) {
|
|
1150
|
+
return {
|
|
1151
|
+
success: false,
|
|
1152
|
+
selector,
|
|
1153
|
+
element_id: element_id ?? null,
|
|
1154
|
+
expected_state: { property, expected },
|
|
1155
|
+
reason: 'element not found',
|
|
1156
|
+
failure_code: 'ELEMENT_NOT_FOUND',
|
|
1157
|
+
retryable: true
|
|
1158
|
+
}
|
|
1159
|
+
}
|
|
1160
|
+
|
|
1161
|
+
const resolvedElement = ToolsInteract._resolvedTargetFromElement(
|
|
1162
|
+
ToolsInteract._computeElementId(treePlatform, treeDeviceId, matched.el, matched.idx),
|
|
1163
|
+
matched.el,
|
|
1164
|
+
matched.idx
|
|
1165
|
+
)
|
|
1166
|
+
const observedState = matched.el.state ?? null
|
|
1167
|
+
const actual = observedState?.[property as keyof UIElementState] ?? null
|
|
1168
|
+
|
|
1169
|
+
const compareBoolean = (value: unknown) => typeof value === 'boolean' ? value : null
|
|
1170
|
+
const compareString = (value: unknown) => typeof value === 'string' ? value : null
|
|
1171
|
+
const compareNumber = (value: unknown) => typeof value === 'number' && Number.isFinite(value) ? value : null
|
|
1172
|
+
|
|
1173
|
+
let success = false
|
|
1174
|
+
let reason = ''
|
|
1175
|
+
let rawValue: boolean | number | string | null = null
|
|
1176
|
+
let observedValue: boolean | number | string | Record<string, unknown> | null = actual as any
|
|
1177
|
+
|
|
1178
|
+
switch (property) {
|
|
1179
|
+
case 'checked':
|
|
1180
|
+
case 'focused':
|
|
1181
|
+
case 'expanded':
|
|
1182
|
+
case 'enabled': {
|
|
1183
|
+
const expectedBool = compareBoolean(expected)
|
|
1184
|
+
const actualBool = compareBoolean(actual)
|
|
1185
|
+
if (expectedBool === null) {
|
|
1186
|
+
reason = `expected ${property} must be boolean`
|
|
1187
|
+
} else if (actualBool === null) {
|
|
1188
|
+
reason = `${property} state unavailable`
|
|
1189
|
+
} else {
|
|
1190
|
+
rawValue = actualBool
|
|
1191
|
+
success = actualBool === expectedBool
|
|
1192
|
+
reason = success ? `${property} matches expected value` : `expected ${property}=${expectedBool} but observed ${actualBool}`
|
|
1193
|
+
}
|
|
1194
|
+
observedValue = actualBool
|
|
1195
|
+
break
|
|
1196
|
+
}
|
|
1197
|
+
case 'value':
|
|
1198
|
+
case 'raw_value': {
|
|
1199
|
+
const expectedNumber = compareNumber(expected)
|
|
1200
|
+
const actualNumber = compareNumber(actual)
|
|
1201
|
+
if (expectedNumber !== null && actualNumber !== null) {
|
|
1202
|
+
success = actualNumber === expectedNumber
|
|
1203
|
+
rawValue = actualNumber
|
|
1204
|
+
observedValue = actualNumber
|
|
1205
|
+
reason = success ? 'value matches expected value' : `expected value=${expectedNumber} but observed ${actualNumber}`
|
|
1206
|
+
break
|
|
1207
|
+
}
|
|
1208
|
+
const expectedString = typeof expected === 'string' ? expected : null
|
|
1209
|
+
const actualString = compareString(actual)
|
|
1210
|
+
if (expectedString !== null && actualString !== null) {
|
|
1211
|
+
success = actualString === expectedString
|
|
1212
|
+
rawValue = actualString
|
|
1213
|
+
observedValue = actualString
|
|
1214
|
+
reason = success ? 'value matches expected value' : `expected value=${expectedString} but observed ${actualString}`
|
|
1215
|
+
} else {
|
|
1216
|
+
reason = 'value state unavailable'
|
|
1217
|
+
}
|
|
1218
|
+
break
|
|
1219
|
+
}
|
|
1220
|
+
case 'selected': {
|
|
1221
|
+
const expectedBool = typeof expected === 'boolean' ? expected : null
|
|
1222
|
+
const expectedString = typeof expected === 'string'
|
|
1223
|
+
? expected
|
|
1224
|
+
: expected && typeof expected === 'object'
|
|
1225
|
+
? String((expected as { id?: unknown; label?: unknown }).id ?? (expected as { id?: unknown; label?: unknown }).label ?? '')
|
|
1226
|
+
: null
|
|
1227
|
+
if (!observedState || observedState.selected === undefined || observedState.selected === null) {
|
|
1228
|
+
reason = 'selected state unavailable'
|
|
1229
|
+
break
|
|
1230
|
+
}
|
|
1231
|
+
if (expectedBool !== null) {
|
|
1232
|
+
const actualBool = typeof observedState.selected === 'boolean' ? observedState.selected : null
|
|
1233
|
+
if (actualBool === null) {
|
|
1234
|
+
reason = 'selected state is not boolean'
|
|
1235
|
+
break
|
|
1236
|
+
}
|
|
1237
|
+
rawValue = actualBool
|
|
1238
|
+
observedValue = actualBool
|
|
1239
|
+
success = actualBool === expectedBool
|
|
1240
|
+
reason = success ? 'selected matches expected value' : `expected selected=${expectedBool} but observed ${actualBool}`
|
|
1241
|
+
break
|
|
1242
|
+
}
|
|
1243
|
+
const actualSelected = typeof observedState.selected === 'object' && observedState.selected !== null
|
|
1244
|
+
? String((observedState.selected as { id?: unknown; label?: unknown }).id ?? (observedState.selected as { id?: unknown; label?: unknown }).label ?? '')
|
|
1245
|
+
: String(observedState.selected)
|
|
1246
|
+
const actualString = actualSelected.trim()
|
|
1247
|
+
if (!expectedString) {
|
|
1248
|
+
reason = 'expected selected must be boolean, string, or object with id/label'
|
|
1249
|
+
break
|
|
1250
|
+
}
|
|
1251
|
+
rawValue = actualString
|
|
1252
|
+
observedValue = actualString
|
|
1253
|
+
success = actualString === expectedString
|
|
1254
|
+
reason = success ? 'selected matches expected value' : `expected selected=${expectedString} but observed ${actualString}`
|
|
1255
|
+
break
|
|
1256
|
+
}
|
|
1257
|
+
case 'text_value': {
|
|
1258
|
+
const expectedString = typeof expected === 'string' ? expected : null
|
|
1259
|
+
const actualString = compareString(actual)
|
|
1260
|
+
if (!expectedString) {
|
|
1261
|
+
reason = 'expected text_value must be string'
|
|
1262
|
+
} else if (!actualString) {
|
|
1263
|
+
reason = 'text_value state unavailable'
|
|
1264
|
+
} else {
|
|
1265
|
+
success = actualString === expectedString
|
|
1266
|
+
rawValue = actualString
|
|
1267
|
+
observedValue = actualString
|
|
1268
|
+
reason = success ? 'text_value matches expected value' : `expected text_value=${expectedString} but observed ${actualString}`
|
|
1269
|
+
}
|
|
1270
|
+
break
|
|
1271
|
+
}
|
|
1272
|
+
default: {
|
|
1273
|
+
if (actual !== null && actual !== undefined) {
|
|
1274
|
+
success = actual === expected
|
|
1275
|
+
observedValue = actual as any
|
|
1276
|
+
rawValue = typeof actual === 'string' || typeof actual === 'number' || typeof actual === 'boolean' ? actual : null
|
|
1277
|
+
reason = success ? `${property} matches expected value` : `expected ${property} to match but observed ${String(actual)}`
|
|
1278
|
+
} else {
|
|
1279
|
+
reason = `unsupported or unavailable state property: ${property}`
|
|
1280
|
+
}
|
|
1281
|
+
}
|
|
1282
|
+
}
|
|
1283
|
+
|
|
1284
|
+
if (!success && !reason) {
|
|
1285
|
+
reason = `${property} did not match expected value`
|
|
1286
|
+
}
|
|
1287
|
+
|
|
1288
|
+
return {
|
|
1289
|
+
success,
|
|
1290
|
+
selector,
|
|
1291
|
+
element_id: element_id ?? resolvedElement.elementId,
|
|
1292
|
+
expected_state: { property, expected },
|
|
1293
|
+
element: {
|
|
1294
|
+
...resolvedElement,
|
|
1295
|
+
state: observedState
|
|
1296
|
+
},
|
|
1297
|
+
observed_state: {
|
|
1298
|
+
property,
|
|
1299
|
+
value: observedValue,
|
|
1300
|
+
...(rawValue !== null ? { raw_value: rawValue } : {})
|
|
1301
|
+
},
|
|
1302
|
+
reason,
|
|
1303
|
+
...(success ? {} : { failure_code: 'UNKNOWN', retryable: false })
|
|
1304
|
+
}
|
|
1305
|
+
}
|
|
1306
|
+
|
|
1039
1307
|
static async waitForUICore({ type = 'ui', query, timeoutMs = 30000, pollIntervalMs = 300, includeSnapshotOnFailure = true, match = 'present', stability_ms = 700, observationDelayMs = 0, platform, deviceId }: { type?: 'ui' | 'log' | 'screen' | 'idle', query?: string, timeoutMs?: number, pollIntervalMs?: number, includeSnapshotOnFailure?: boolean, match?: 'present'|'absent', stability_ms?: number, observationDelayMs?: number, platform?: 'android' | 'ios', deviceId?: string }) {
|
|
1040
1308
|
const start = Date.now()
|
|
1041
1309
|
const deadline = start + (timeoutMs || 0)
|
package/src/observe/index.ts
CHANGED
|
@@ -21,6 +21,12 @@ interface SnapshotTreeElementLike {
|
|
|
21
21
|
clickable?: boolean
|
|
22
22
|
enabled?: boolean
|
|
23
23
|
visible?: boolean
|
|
24
|
+
state?: unknown
|
|
25
|
+
stable_id?: string | null
|
|
26
|
+
role?: string | null
|
|
27
|
+
test_tag?: string | null
|
|
28
|
+
selector?: unknown
|
|
29
|
+
semantic?: unknown
|
|
24
30
|
}
|
|
25
31
|
|
|
26
32
|
interface SnapshotTreeLike {
|
package/src/observe/ios.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { spawn } from "child_process"
|
|
2
2
|
import { promises as fs } from "fs"
|
|
3
|
-
import { GetLogsResponse, CaptureIOSScreenshotResponse, GetUITreeResponse, UIElement, DeviceInfo } from "../types.js"
|
|
3
|
+
import { GetLogsResponse, CaptureIOSScreenshotResponse, GetUITreeResponse, UIElement, DeviceInfo, UIElementSemanticMetadata, UIElementState, UIResolutionSelector, SelectorConfidence } from "../types.js"
|
|
4
4
|
import { execCommand, getIOSDeviceMetadata, validateBundleId, getIdbCmd, getXcrunCmd, isIDBInstalled } from "../utils/ios/utils.js"
|
|
5
5
|
import { createWriteStream, promises as fsPromises } from 'fs'
|
|
6
6
|
import path from 'path'
|
|
@@ -22,6 +22,9 @@ export function _resetIOSExecCommandForTests() {
|
|
|
22
22
|
interface IDBElement {
|
|
23
23
|
AXFrame?: { x: number | string, y: number | string, width: number | string, height: number | string, w?: number | string, h?: number | string };
|
|
24
24
|
frame?: { x: number | string, y: number | string, width: number | string, height: number | string, w?: number | string, h?: number | string };
|
|
25
|
+
AXIdentifier?: string;
|
|
26
|
+
accessibilityIdentifier?: string;
|
|
27
|
+
identifier?: string;
|
|
25
28
|
AXUniqueId?: string;
|
|
26
29
|
AXLabel?: string;
|
|
27
30
|
AXValue?: string;
|
|
@@ -56,7 +59,117 @@ function getCenter(bounds: [number, number, number, number]): [number, number] {
|
|
|
56
59
|
return [Math.floor((x1 + x2) / 2), Math.floor((y1 + y2) / 2)];
|
|
57
60
|
}
|
|
58
61
|
|
|
59
|
-
function
|
|
62
|
+
function parseIOSNumber(value: unknown): number | null {
|
|
63
|
+
if (typeof value === 'number' && Number.isFinite(value)) return value
|
|
64
|
+
if (typeof value !== 'string') return null
|
|
65
|
+
const parsed = Number(value)
|
|
66
|
+
return Number.isFinite(parsed) ? parsed : null
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
function normalizeIOSType(value: unknown): string {
|
|
70
|
+
return typeof value === 'string' ? value.trim().toLowerCase() : ''
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
function inferIOSRole(type: string, traits: string[]): string | null {
|
|
74
|
+
if (/slider|adjustable/.test(type) || traits.some((trait) => /adjustable|slider/.test(trait))) return 'slider'
|
|
75
|
+
if (/button/.test(type) || traits.some((trait) => /button/.test(trait))) return 'button'
|
|
76
|
+
if (/cell/.test(type)) return 'cell'
|
|
77
|
+
if (/switch/.test(type)) return 'switch'
|
|
78
|
+
if (/text field|textfield|search field/.test(type)) return 'text_field'
|
|
79
|
+
if (/image/.test(type)) return 'image'
|
|
80
|
+
if (/window|application|group|scroll view|collection view/.test(type)) return 'container'
|
|
81
|
+
return null
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
function getIOSStableId(node: IDBElement): string | null {
|
|
85
|
+
const candidates = [node.AXIdentifier, node.accessibilityIdentifier, node.identifier, node.AXUniqueId]
|
|
86
|
+
for (const candidate of candidates) {
|
|
87
|
+
if (typeof candidate === 'string' && candidate.trim().length > 0) return candidate
|
|
88
|
+
}
|
|
89
|
+
return null
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
function buildIOSSelectorConfidence(source: 'identifier' | 'label' | 'value' | 'type' | 'none'): SelectorConfidence | null {
|
|
93
|
+
switch (source) {
|
|
94
|
+
case 'identifier':
|
|
95
|
+
return { score: 1, reason: 'accessibility_identifier' }
|
|
96
|
+
case 'label':
|
|
97
|
+
return { score: 0.9, reason: 'label_match' }
|
|
98
|
+
case 'value':
|
|
99
|
+
return { score: 0.75, reason: 'value_match' }
|
|
100
|
+
case 'type':
|
|
101
|
+
return { score: 0.35, reason: 'type_match' }
|
|
102
|
+
default:
|
|
103
|
+
return null
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
function buildIOSSelector(type: string, label: string | null, value: string | null, stableId: string | null): UIResolutionSelector | null {
|
|
108
|
+
if (stableId) return { value: stableId, confidence: buildIOSSelectorConfidence('identifier') }
|
|
109
|
+
if (label) return { value: label, confidence: buildIOSSelectorConfidence('label') }
|
|
110
|
+
if (value) return { value: value, confidence: buildIOSSelectorConfidence('value') }
|
|
111
|
+
if (type) return { value: type, confidence: buildIOSSelectorConfidence('type') }
|
|
112
|
+
return null
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
function buildIOSSemantic(type: string, traits: string[]): UIElementSemanticMetadata {
|
|
116
|
+
return {
|
|
117
|
+
is_clickable: traits.includes("UIAccessibilityTraitButton") || /adjustable|slider/.test(type) || type === "Button" || type === "Cell",
|
|
118
|
+
is_container: /window|application|group|scroll view|collection view/.test(type)
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
function isIOSAdjustable(node: IDBElement, type: string, traits: string[]): boolean {
|
|
123
|
+
return /slider|adjustable|stepper|progress/i.test(type) || traits.some((trait) => /adjustable|slider|progress/i.test(trait))
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
function extractIOSState(node: IDBElement, type: string, label: string | null, value: string | null, traits: string[]): UIElementState | null {
|
|
127
|
+
const state: UIElementState = {}
|
|
128
|
+
const normalizedTraits = traits.map((trait) => String(trait).toLowerCase())
|
|
129
|
+
|
|
130
|
+
if (normalizedTraits.some((trait) => /selected/.test(trait))) {
|
|
131
|
+
state.selected = label || value || true
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
if (normalizedTraits.some((trait) => /focused/.test(trait))) {
|
|
135
|
+
state.focused = true
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
if (normalizedTraits.some((trait) => /enabled/.test(trait))) {
|
|
139
|
+
state.enabled = true
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
if (normalizedTraits.some((trait) => /disabled/.test(trait))) {
|
|
143
|
+
state.enabled = false
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
if (value && /textfield|search|text/i.test(type)) {
|
|
147
|
+
state.text_value = value
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
if (isIOSAdjustable(node, type, traits)) {
|
|
151
|
+
const rawValue = parseIOSNumber(value)
|
|
152
|
+
if (rawValue !== null) {
|
|
153
|
+
state.raw_value = rawValue
|
|
154
|
+
state.value = rawValue >= 0 && rawValue <= 1 ? Math.round(rawValue * 100) : rawValue
|
|
155
|
+
} else if (value) {
|
|
156
|
+
state.raw_value = value
|
|
157
|
+
state.value = value
|
|
158
|
+
}
|
|
159
|
+
} else if (value) {
|
|
160
|
+
const numericValue = parseIOSNumber(value)
|
|
161
|
+
if (numericValue !== null) {
|
|
162
|
+
state.value = numericValue
|
|
163
|
+
state.raw_value = numericValue
|
|
164
|
+
} else {
|
|
165
|
+
state.value = value
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
return Object.keys(state).length > 0 ? state : null
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
export function traverseIDBNode(node: IDBElement, elements: UIElement[], parentIndex: number = -1, depth: number = 0): number {
|
|
60
173
|
if (!node) return -1;
|
|
61
174
|
|
|
62
175
|
let currentIndex = -1;
|
|
@@ -66,6 +179,12 @@ function traverseIDBNode(node: IDBElement, elements: UIElement[], parentIndex: n
|
|
|
66
179
|
const value = node.AXValue || null;
|
|
67
180
|
const frame = node.AXFrame || node.frame;
|
|
68
181
|
const traits = node.AXTraits || [];
|
|
182
|
+
const state = extractIOSState(node, type, label, value, traits);
|
|
183
|
+
const normalizedType = normalizeIOSType(type)
|
|
184
|
+
const stableId = getIOSStableId(node)
|
|
185
|
+
const selector = buildIOSSelector(type, label, value, stableId)
|
|
186
|
+
const semantic = buildIOSSemantic(normalizedType, traits)
|
|
187
|
+
const role = inferIOSRole(normalizedType, traits)
|
|
69
188
|
|
|
70
189
|
const clickable = traits.includes("UIAccessibilityTraitButton") || type === "Button" || type === "Cell";
|
|
71
190
|
|
|
@@ -77,13 +196,19 @@ function traverseIDBNode(node: IDBElement, elements: UIElement[], parentIndex: n
|
|
|
77
196
|
text: label,
|
|
78
197
|
contentDescription: value,
|
|
79
198
|
type: type,
|
|
80
|
-
resourceId:
|
|
199
|
+
resourceId: stableId,
|
|
81
200
|
clickable: clickable,
|
|
82
201
|
enabled: true,
|
|
83
202
|
visible: true,
|
|
84
203
|
bounds: bounds,
|
|
85
204
|
center: getCenter(bounds),
|
|
86
|
-
depth: depth
|
|
205
|
+
depth: depth,
|
|
206
|
+
state,
|
|
207
|
+
stable_id: stableId,
|
|
208
|
+
role,
|
|
209
|
+
test_tag: stableId,
|
|
210
|
+
selector,
|
|
211
|
+
semantic
|
|
87
212
|
};
|
|
88
213
|
|
|
89
214
|
if (parentIndex !== -1) {
|
package/src/server/common.ts
CHANGED
|
@@ -96,7 +96,8 @@ export function normalizeResolvedTarget(value: Partial<ActionTargetResolved> | n
|
|
|
96
96
|
accessibility_id: value.accessibility_id ?? null,
|
|
97
97
|
class: value.class ?? null,
|
|
98
98
|
bounds: value.bounds ?? null,
|
|
99
|
-
index: value.index ?? null
|
|
99
|
+
index: value.index ?? null,
|
|
100
|
+
state: value.state ?? null
|
|
100
101
|
}
|
|
101
102
|
}
|
|
102
103
|
|
|
@@ -468,6 +468,61 @@ Failure Handling:
|
|
|
468
468
|
required: ['selector']
|
|
469
469
|
}
|
|
470
470
|
},
|
|
471
|
+
{
|
|
472
|
+
name: 'expect_state',
|
|
473
|
+
description: `Purpose:
|
|
474
|
+
Verify a readable UI state property on the currently visible element.
|
|
475
|
+
|
|
476
|
+
Inputs:
|
|
477
|
+
- selector or element_id
|
|
478
|
+
- property
|
|
479
|
+
- expected
|
|
480
|
+
- platform/deviceId (optional)
|
|
481
|
+
|
|
482
|
+
Supported properties:
|
|
483
|
+
- checked, selected, focused, expanded, enabled, text_value, value, raw_value
|
|
484
|
+
|
|
485
|
+
Verification Guidance:
|
|
486
|
+
- Use this when the UI element is visible but its state must also be confirmed
|
|
487
|
+
- Prefer the canonical property names above
|
|
488
|
+
- The tool compares the normalized readable state and returns the observed value when available
|
|
489
|
+
|
|
490
|
+
Constraints:
|
|
491
|
+
- Returns structured success/failure only
|
|
492
|
+
- Does not infer a state when the property is unavailable
|
|
493
|
+
|
|
494
|
+
Failure Handling:
|
|
495
|
+
- ELEMENT_NOT_FOUND → re-resolve the element or wait for UI stabilization
|
|
496
|
+
- UNKNOWN → capture a snapshot and stop`,
|
|
497
|
+
inputSchema: {
|
|
498
|
+
type: 'object',
|
|
499
|
+
properties: {
|
|
500
|
+
selector: {
|
|
501
|
+
type: 'object',
|
|
502
|
+
properties: {
|
|
503
|
+
text: { type: 'string' },
|
|
504
|
+
resource_id: { type: 'string' },
|
|
505
|
+
accessibility_id: { type: 'string' },
|
|
506
|
+
contains: { type: 'boolean', default: false }
|
|
507
|
+
}
|
|
508
|
+
},
|
|
509
|
+
element_id: { type: 'string', description: 'Optional previously resolved element identifier.' },
|
|
510
|
+
property: { type: 'string', description: 'Readable state property to verify.' },
|
|
511
|
+
expected: {
|
|
512
|
+
description: 'Expected normalized state value.',
|
|
513
|
+
oneOf: [
|
|
514
|
+
{ type: 'boolean' },
|
|
515
|
+
{ type: 'number' },
|
|
516
|
+
{ type: 'string' },
|
|
517
|
+
{ type: 'object' }
|
|
518
|
+
]
|
|
519
|
+
},
|
|
520
|
+
platform: { type: 'string', enum: ['android', 'ios'], description: 'Optional platform override' },
|
|
521
|
+
deviceId: { type: 'string', description: 'Optional device serial/udid' }
|
|
522
|
+
},
|
|
523
|
+
required: ['property', 'expected']
|
|
524
|
+
}
|
|
525
|
+
},
|
|
471
526
|
{
|
|
472
527
|
name: 'wait_for_ui',
|
|
473
528
|
description: `Purpose:
|
|
@@ -258,6 +258,23 @@ async function handleExpectElementVisible(args: ToolCallArgs) {
|
|
|
258
258
|
return wrapResponse(res)
|
|
259
259
|
}
|
|
260
260
|
|
|
261
|
+
async function handleExpectState(args: ToolCallArgs) {
|
|
262
|
+
const selector = getObjectArg<ExpectElementSelectorArg>(args, 'selector')
|
|
263
|
+
const element_id = getStringArg(args, 'element_id')
|
|
264
|
+
const property = requireStringArg(args, 'property')
|
|
265
|
+
const platform = getStringArg(args, 'platform') as PlatformArg | undefined
|
|
266
|
+
const deviceId = getStringArg(args, 'deviceId')
|
|
267
|
+
if (!selector && !element_id) {
|
|
268
|
+
throw new Error('Missing selector or element_id argument')
|
|
269
|
+
}
|
|
270
|
+
if (!Object.prototype.hasOwnProperty.call(args, 'expected')) {
|
|
271
|
+
throw new Error('Missing expected argument')
|
|
272
|
+
}
|
|
273
|
+
const expected = args.expected as boolean | number | string | Record<string, unknown>
|
|
274
|
+
const res = await ToolsInteract.expectStateHandler({ selector: selector ?? undefined, element_id: element_id ?? undefined, property, expected, platform, deviceId })
|
|
275
|
+
return wrapResponse(res)
|
|
276
|
+
}
|
|
277
|
+
|
|
261
278
|
async function handleWaitForUI(args: ToolCallArgs) {
|
|
262
279
|
const selector = getObjectArg<ExpectElementSelectorArg>(args, 'selector')
|
|
263
280
|
const condition = (getStringArg(args, 'condition') as 'exists' | 'not_exists' | 'visible' | 'clickable' | undefined) ?? 'exists'
|
|
@@ -458,6 +475,7 @@ export const toolHandlers: Record<string, ToolHandler> = {
|
|
|
458
475
|
wait_for_screen_change: handleWaitForScreenChange,
|
|
459
476
|
expect_screen: handleExpectScreen,
|
|
460
477
|
expect_element_visible: handleExpectElementVisible,
|
|
478
|
+
expect_state: handleExpectState,
|
|
461
479
|
wait_for_ui: handleWaitForUI,
|
|
462
480
|
find_element: handleFindElement,
|
|
463
481
|
tap: handleTap,
|