mobile-debug-mcp 0.26.4 → 0.26.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/interact/index.js +40 -7
- package/dist/observe/ios.js +47 -3
- package/dist/server-core.js +1 -1
- package/dist/utils/android/utils.js +35 -3
- package/docs/CHANGELOG.md +3 -0
- package/docs/ROADMAP.md +5 -5
- package/docs/rfcs/009-semantic-control-modeling-for-custom-and-composite-controls.md +238 -0
- package/docs/specs/mcp-tooling-spec-v1.md +22 -0
- package/package.json +1 -1
- package/src/interact/index.ts +40 -8
- package/src/observe/ios.ts +42 -3
- package/src/server-core.ts +1 -1
- package/src/types.ts +4 -0
- package/src/utils/android/utils.ts +30 -3
- package/test/unit/observe/find_element.test.ts +46 -0
- package/test/unit/observe/state_extraction.test.ts +65 -2
package/dist/interact/index.js
CHANGED
|
@@ -226,6 +226,13 @@ export class ToolsInteract {
|
|
|
226
226
|
const role = ToolsInteract._normalize(el.role ?? '');
|
|
227
227
|
return !!el.state?.value_range || /slider|seekbar|stepper|adjustable|range/.test(type) || /slider|seekbar|stepper|adjustable|range/.test(role);
|
|
228
228
|
}
|
|
229
|
+
static _isSemanticActionable(el) {
|
|
230
|
+
if (!el?.semantic)
|
|
231
|
+
return false;
|
|
232
|
+
if (el.semantic.adjustable)
|
|
233
|
+
return true;
|
|
234
|
+
return Array.isArray(el.semantic.supported_actions) && el.semantic.supported_actions.length > 0;
|
|
235
|
+
}
|
|
229
236
|
static _readNumericControlValue(el, property) {
|
|
230
237
|
if (!el?.state)
|
|
231
238
|
return null;
|
|
@@ -318,11 +325,11 @@ export class ToolsInteract {
|
|
|
318
325
|
static _resolveActionableAncestor(elements, chosen) {
|
|
319
326
|
if (!chosen)
|
|
320
327
|
return null;
|
|
321
|
-
if (chosen.el.clickable || chosen.el.focusable)
|
|
328
|
+
if (chosen.el.clickable || chosen.el.focusable || ToolsInteract._isSemanticActionable(chosen.el))
|
|
322
329
|
return chosen;
|
|
323
330
|
let current = chosen;
|
|
324
331
|
let safety = 0;
|
|
325
|
-
while (safety < 20 && current.el && !(current.el.clickable || current.el.focusable) && current.el.parentId !== undefined && current.el.parentId !== null) {
|
|
332
|
+
while (safety < 20 && current.el && !(current.el.clickable || current.el.focusable || ToolsInteract._isSemanticActionable(current.el)) && current.el.parentId !== undefined && current.el.parentId !== null) {
|
|
326
333
|
const parentId = current.el.parentId;
|
|
327
334
|
let parentIndex = null;
|
|
328
335
|
if (typeof parentId === 'number')
|
|
@@ -331,7 +338,7 @@ export class ToolsInteract {
|
|
|
331
338
|
parentIndex = Number(parentId);
|
|
332
339
|
if (parentIndex !== null && elements[parentIndex]) {
|
|
333
340
|
current = { el: elements[parentIndex], idx: parentIndex };
|
|
334
|
-
if (current.el.clickable || current.el.focusable)
|
|
341
|
+
if (current.el.clickable || current.el.focusable || ToolsInteract._isSemanticActionable(current.el))
|
|
335
342
|
return current;
|
|
336
343
|
}
|
|
337
344
|
else if (typeof parentId === 'string') {
|
|
@@ -339,7 +346,7 @@ export class ToolsInteract {
|
|
|
339
346
|
if (foundIndex === -1)
|
|
340
347
|
break;
|
|
341
348
|
current = { el: elements[foundIndex], idx: foundIndex };
|
|
342
|
-
if (current.el.clickable || current.el.focusable)
|
|
349
|
+
if (current.el.clickable || current.el.focusable || ToolsInteract._isSemanticActionable(current.el))
|
|
343
350
|
return current;
|
|
344
351
|
}
|
|
345
352
|
else {
|
|
@@ -355,7 +362,7 @@ export class ToolsInteract {
|
|
|
355
362
|
let bestArea = Infinity;
|
|
356
363
|
for (let i = 0; i < elements.length; i++) {
|
|
357
364
|
const el = elements[i];
|
|
358
|
-
if (!el || !(el.clickable || el.focusable))
|
|
365
|
+
if (!el || !(el.clickable || el.focusable || ToolsInteract._isSemanticActionable(el)))
|
|
359
366
|
continue;
|
|
360
367
|
const bounds = ToolsInteract._normalizeBounds(el.bounds);
|
|
361
368
|
if (!bounds)
|
|
@@ -904,11 +911,13 @@ export class ToolsInteract {
|
|
|
904
911
|
if (r <= l || b <= t)
|
|
905
912
|
return null;
|
|
906
913
|
// Do not early-return on non-interactable elements — score them so we can locate their clickable ancestor later
|
|
907
|
-
const interactable = !!(el.clickable || el.enabled || el.focusable);
|
|
914
|
+
const interactable = !!(el.clickable || el.enabled || el.focusable || ToolsInteract._isSemanticActionable(el));
|
|
908
915
|
const text = normalize(el.text ?? el.label ?? el.value ?? '');
|
|
909
916
|
const content = normalize(el.contentDescription ?? el.contentDesc ?? el.accessibilityLabel ?? '');
|
|
910
917
|
const resourceId = normalize(el.resourceId ?? el.resourceID ?? el.id ?? '');
|
|
911
918
|
const className = normalize(el.type ?? el.class ?? '');
|
|
919
|
+
const semanticRole = normalize(el.semantic?.semantic_role ?? '');
|
|
920
|
+
const semanticActions = Array.isArray(el.semantic?.supported_actions) ? el.semantic.supported_actions.map((action) => normalize(action)).filter(Boolean) : [];
|
|
912
921
|
let score = 0;
|
|
913
922
|
let reason = 'best_scoring_candidate';
|
|
914
923
|
if (exact) {
|
|
@@ -959,6 +968,30 @@ export class ToolsInteract {
|
|
|
959
968
|
reason = 'partial_class_match';
|
|
960
969
|
}
|
|
961
970
|
}
|
|
971
|
+
if (!exact) {
|
|
972
|
+
if (!score && semanticRole && semanticRole.includes(q)) {
|
|
973
|
+
score = 0.5;
|
|
974
|
+
reason = 'semantic_role_match';
|
|
975
|
+
}
|
|
976
|
+
if (semanticActions.some((action) => action.includes(q))) {
|
|
977
|
+
score = Math.max(score, score > 0 ? 0.65 : 0.6);
|
|
978
|
+
reason = 'semantic_action_match';
|
|
979
|
+
}
|
|
980
|
+
if (score === 0 && el.semantic?.adjustable && /slider|stepper|dropdown|segment|control|adjust/.test(q)) {
|
|
981
|
+
score = 0.45;
|
|
982
|
+
reason = 'semantic_control_match';
|
|
983
|
+
}
|
|
984
|
+
}
|
|
985
|
+
else {
|
|
986
|
+
if (!score && semanticRole && semanticRole === q) {
|
|
987
|
+
score = 0.5;
|
|
988
|
+
reason = 'semantic_role_match';
|
|
989
|
+
}
|
|
990
|
+
if (semanticActions.some((action) => action === q)) {
|
|
991
|
+
score = Math.max(score, score > 0 ? 0.65 : 0.6);
|
|
992
|
+
reason = 'semantic_action_match';
|
|
993
|
+
}
|
|
994
|
+
}
|
|
962
995
|
if (score > 0 && interactable)
|
|
963
996
|
score += 0.05;
|
|
964
997
|
if (score <= 0)
|
|
@@ -1087,7 +1120,7 @@ export class ToolsInteract {
|
|
|
1087
1120
|
interactable: true
|
|
1088
1121
|
};
|
|
1089
1122
|
}
|
|
1090
|
-
if (best && !(best.el.clickable || best.el.focusable)) {
|
|
1123
|
+
if (best && !(best.el.clickable || best.el.focusable || ToolsInteract._isSemanticActionable(best.el))) {
|
|
1091
1124
|
const nearbyActionable = ToolsInteract._resolveNearbyActionableControl(elements, { el: best.el, idx: best.idx }, screen);
|
|
1092
1125
|
if (nearbyActionable) {
|
|
1093
1126
|
best = {
|
package/dist/observe/ios.js
CHANGED
|
@@ -52,6 +52,12 @@ function normalizeIOSType(value) {
|
|
|
52
52
|
function inferIOSRole(type, traits) {
|
|
53
53
|
if (/slider|adjustable/.test(type) || traits.some((trait) => /adjustable|slider/.test(trait)))
|
|
54
54
|
return 'slider';
|
|
55
|
+
if (/stepper/.test(type))
|
|
56
|
+
return 'stepper';
|
|
57
|
+
if (/picker|pop up button|dropdown/.test(type))
|
|
58
|
+
return 'dropdown';
|
|
59
|
+
if (/segmented control/.test(type))
|
|
60
|
+
return 'segmented_control';
|
|
55
61
|
if (/button/.test(type) || traits.some((trait) => /button/.test(trait)))
|
|
56
62
|
return 'button';
|
|
57
63
|
if (/cell/.test(type))
|
|
@@ -99,11 +105,49 @@ function buildIOSSelector(type, label, value, stableId) {
|
|
|
99
105
|
return { value: type, confidence: buildIOSSelectorConfidence('type') };
|
|
100
106
|
return null;
|
|
101
107
|
}
|
|
102
|
-
function buildIOSSemantic(type, traits) {
|
|
103
|
-
|
|
108
|
+
function buildIOSSemantic(type, traits, role, value) {
|
|
109
|
+
const semantic = {
|
|
104
110
|
is_clickable: traits.includes("UIAccessibilityTraitButton") || /adjustable|slider/.test(type) || type === "Button" || type === "Cell",
|
|
105
111
|
is_container: /window|application|group|scroll view|collection view/.test(type)
|
|
106
112
|
};
|
|
113
|
+
if (role === 'slider') {
|
|
114
|
+
semantic.semantic_role = 'slider';
|
|
115
|
+
semantic.adjustable = true;
|
|
116
|
+
semantic.supported_actions = ['adjust'];
|
|
117
|
+
semantic.state_shape = 'continuous';
|
|
118
|
+
}
|
|
119
|
+
else if (role === 'stepper') {
|
|
120
|
+
semantic.semantic_role = 'stepper';
|
|
121
|
+
semantic.adjustable = true;
|
|
122
|
+
semantic.supported_actions = ['increment', 'decrement'];
|
|
123
|
+
semantic.state_shape = 'discrete';
|
|
124
|
+
}
|
|
125
|
+
else if (role === 'dropdown') {
|
|
126
|
+
semantic.semantic_role = 'dropdown';
|
|
127
|
+
semantic.supported_actions = ['tap', 'expand'];
|
|
128
|
+
semantic.state_shape = 'semantic';
|
|
129
|
+
}
|
|
130
|
+
else if (role === 'segmented_control') {
|
|
131
|
+
semantic.semantic_role = 'segmented_control';
|
|
132
|
+
semantic.supported_actions = ['tap'];
|
|
133
|
+
semantic.state_shape = 'discrete';
|
|
134
|
+
}
|
|
135
|
+
else if (traits.some((trait) => /adjustable|slider/i.test(trait)) || /adjustable|slider/.test(type)) {
|
|
136
|
+
semantic.semantic_role = 'custom_adjustable';
|
|
137
|
+
semantic.adjustable = true;
|
|
138
|
+
semantic.supported_actions = ['adjust'];
|
|
139
|
+
semantic.state_shape = 'continuous';
|
|
140
|
+
}
|
|
141
|
+
else if (semantic.is_clickable) {
|
|
142
|
+
semantic.supported_actions = ['tap'];
|
|
143
|
+
}
|
|
144
|
+
if (semantic.state_shape === undefined && semantic.adjustable && value !== null) {
|
|
145
|
+
const numericValue = parseIOSNumber(value);
|
|
146
|
+
if (numericValue !== null && numericValue >= 0 && numericValue <= 1) {
|
|
147
|
+
semantic.state_shape = 'continuous';
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
return semantic;
|
|
107
151
|
}
|
|
108
152
|
function isIOSAdjustable(node, type, traits) {
|
|
109
153
|
return /slider|adjustable|stepper/i.test(type) || traits.some((trait) => /adjustable|slider/i.test(trait));
|
|
@@ -162,8 +206,8 @@ export function traverseIDBNode(node, elements, parentIndex = -1, depth = 0) {
|
|
|
162
206
|
const normalizedType = normalizeIOSType(type);
|
|
163
207
|
const stableId = getIOSStableId(node);
|
|
164
208
|
const selector = buildIOSSelector(type, label, value, stableId);
|
|
165
|
-
const semantic = buildIOSSemantic(normalizedType, traits);
|
|
166
209
|
const role = inferIOSRole(normalizedType, traits);
|
|
210
|
+
const semantic = buildIOSSemantic(normalizedType, traits, role, value);
|
|
167
211
|
const clickable = traits.includes("UIAccessibilityTraitButton") || type === "Button" || type === "Cell";
|
|
168
212
|
const isUseful = clickable || (label && label.length > 0) || (value && value.length > 0) || type === "Application" || type === "Window";
|
|
169
213
|
if (isUseful) {
|
package/dist/server-core.js
CHANGED
|
@@ -6,7 +6,7 @@ import { handleToolCall } from './server/tool-handlers.js';
|
|
|
6
6
|
export { wrapResponse, toolDefinitions, handleToolCall };
|
|
7
7
|
export const serverInfo = {
|
|
8
8
|
name: 'mobile-debug-mcp',
|
|
9
|
-
version: '0.26.
|
|
9
|
+
version: '0.26.5'
|
|
10
10
|
};
|
|
11
11
|
export function createServer() {
|
|
12
12
|
const server = new Server(serverInfo, {
|
|
@@ -362,6 +362,12 @@ function normalizeClassName(value) {
|
|
|
362
362
|
function inferAndroidRole(className) {
|
|
363
363
|
if (/seekbar|slider/.test(className))
|
|
364
364
|
return 'slider';
|
|
365
|
+
if (/stepper|numberpicker/.test(className))
|
|
366
|
+
return 'stepper';
|
|
367
|
+
if (/spinner|dropdown/.test(className))
|
|
368
|
+
return 'dropdown';
|
|
369
|
+
if (/segment|tablayout/.test(className))
|
|
370
|
+
return 'segmented_control';
|
|
365
371
|
if (/switch|toggle/.test(className))
|
|
366
372
|
return 'switch';
|
|
367
373
|
if (/checkbox/.test(className))
|
|
@@ -403,11 +409,37 @@ function buildAndroidSelector(text, contentDescription, resourceId, className) {
|
|
|
403
409
|
return { value: className, confidence: buildAndroidSelectorConfidence('class') };
|
|
404
410
|
return null;
|
|
405
411
|
}
|
|
406
|
-
function buildAndroidSemantic(clickable, className) {
|
|
407
|
-
|
|
412
|
+
function buildAndroidSemantic(clickable, className, role) {
|
|
413
|
+
const semantic = {
|
|
408
414
|
is_clickable: clickable,
|
|
409
415
|
is_container: /recyclerview|scroll|layout|viewgroup|frame/.test(className)
|
|
410
416
|
};
|
|
417
|
+
if (role === 'slider') {
|
|
418
|
+
semantic.semantic_role = 'slider';
|
|
419
|
+
semantic.adjustable = true;
|
|
420
|
+
semantic.supported_actions = ['adjust'];
|
|
421
|
+
semantic.state_shape = 'continuous';
|
|
422
|
+
}
|
|
423
|
+
else if (role === 'stepper') {
|
|
424
|
+
semantic.semantic_role = 'stepper';
|
|
425
|
+
semantic.adjustable = true;
|
|
426
|
+
semantic.supported_actions = ['increment', 'decrement'];
|
|
427
|
+
semantic.state_shape = 'discrete';
|
|
428
|
+
}
|
|
429
|
+
else if (role === 'dropdown') {
|
|
430
|
+
semantic.semantic_role = 'dropdown';
|
|
431
|
+
semantic.supported_actions = ['tap', 'expand'];
|
|
432
|
+
semantic.state_shape = 'semantic';
|
|
433
|
+
}
|
|
434
|
+
else if (role === 'segmented_control') {
|
|
435
|
+
semantic.semantic_role = 'segmented_control';
|
|
436
|
+
semantic.supported_actions = ['tap'];
|
|
437
|
+
semantic.state_shape = 'discrete';
|
|
438
|
+
}
|
|
439
|
+
else if (clickable) {
|
|
440
|
+
semantic.supported_actions = ['tap'];
|
|
441
|
+
}
|
|
442
|
+
return semantic;
|
|
411
443
|
}
|
|
412
444
|
function isSliderLikeAndroid(node) {
|
|
413
445
|
const className = String(node['@_class'] || '').toLowerCase();
|
|
@@ -487,7 +519,7 @@ export function traverseNode(node, elements, parentIndex = -1, depth = 0) {
|
|
|
487
519
|
const stableId = resourceId ?? (typeof contentDescription === 'string' && contentDescription.trim().length > 0 ? contentDescription : null);
|
|
488
520
|
const testTag = stableId;
|
|
489
521
|
const selector = buildAndroidSelector(text, contentDescription, resourceId, normalizeClassName(className));
|
|
490
|
-
const semantic = buildAndroidSemantic(clickable, normalizeClassName(className));
|
|
522
|
+
const semantic = buildAndroidSemantic(clickable, normalizeClassName(className), role);
|
|
491
523
|
const isUseful = clickable || (text && text.length > 0) || (contentDescription && contentDescription.length > 0);
|
|
492
524
|
if (isUseful) {
|
|
493
525
|
const element = {
|
package/docs/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,9 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to the **Mobile Debug MCP** project will be documented in this file.
|
|
4
4
|
|
|
5
|
+
## [0.26.5]
|
|
6
|
+
- Introduces a semantic control model to improve the identification and interaction with custom and composite UI controls.
|
|
7
|
+
|
|
5
8
|
## [0.26.4]
|
|
6
9
|
- Improved slider accuracy
|
|
7
10
|
|
package/docs/ROADMAP.md
CHANGED
|
@@ -44,6 +44,7 @@ Higher task success with fewer retries.
|
|
|
44
44
|
|
|
45
45
|
- Stronger State Verification — Complete (Foundational verification layer shipped)
|
|
46
46
|
- Richer Element Identity — Complete (Identity and selector confidence foundations shipped)
|
|
47
|
+
- Better Compose / Custom Control Semantics — Complete (Semantic role enrichment and custom-adjustable inference shipped)
|
|
47
48
|
|
|
48
49
|
## Current Focus
|
|
49
50
|
|
|
@@ -54,7 +55,6 @@ Higher task success with fewer retries.
|
|
|
54
55
|
|
|
55
56
|
- Environment Auto-Configuration and Toolchain Discovery
|
|
56
57
|
- Adjustable Control Support
|
|
57
|
-
- Better Compose / Custom Control Semantics
|
|
58
58
|
- Signal-Oriented Diagnostic Filtering
|
|
59
59
|
- Long Press Gesture
|
|
60
60
|
# Stronger State Verification
|
|
@@ -435,7 +435,7 @@ Strengthens:
|
|
|
435
435
|
## Rationale
|
|
436
436
|
Higher priority after agent feedback exposed custom control semantics as a core reliability gap, not a later optimization.
|
|
437
437
|
|
|
438
|
-
**Status:**
|
|
438
|
+
**Status:** Completed
|
|
439
439
|
|
|
440
440
|
Semantics become more useful once:
|
|
441
441
|
- identity is stronger
|
|
@@ -446,8 +446,8 @@ Semantics become more useful once:
|
|
|
446
446
|
|
|
447
447
|
## Scope
|
|
448
448
|
- Composite control traits
|
|
449
|
-
- Control role enrichment (
|
|
450
|
-
- Interaction
|
|
449
|
+
- Control role enrichment (`slider`, `stepper`, `dropdown`, `segmented_control`, `custom_adjustable`)
|
|
450
|
+
- Interaction contract metadata (`supported_actions`, `adjustable`, `state_shape`)
|
|
451
451
|
- Custom widget gesture affordance hints
|
|
452
452
|
- Semantic confidence annotations
|
|
453
453
|
- Compose-aware selectors for waits (merged semantics and element relationships)
|
|
@@ -457,7 +457,7 @@ High.
|
|
|
457
457
|
|
|
458
458
|
## Exit Criteria
|
|
459
459
|
- Semantic traits implemented for major custom control classes
|
|
460
|
-
- Interaction contracts surfaced in
|
|
460
|
+
- Interaction contracts surfaced in observation and resolution paths
|
|
461
461
|
- Confidence model defined for derived semantics
|
|
462
462
|
- Custom control manipulation success validated in benchmark flows
|
|
463
463
|
|
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
# RFC 009 — Semantic Control Modeling for Custom and Composite Controls
|
|
2
|
+
|
|
3
|
+
## 1. Summary
|
|
4
|
+
|
|
5
|
+
This RFC defines a semantic control model for identifying, exposing, and interacting with custom and composite controls that are poorly represented through raw accessibility or platform UI trees.
|
|
6
|
+
|
|
7
|
+
It introduces semantic enrichment for controls such as:
|
|
8
|
+
|
|
9
|
+
- sliders
|
|
10
|
+
- steppers
|
|
11
|
+
- segmented controls
|
|
12
|
+
- dropdowns
|
|
13
|
+
- Compose/SwiftUI custom widgets
|
|
14
|
+
- composite gesture-driven controls
|
|
15
|
+
|
|
16
|
+
The goal is to improve target resolution, control interaction, and verification reliability for controls whose actionable semantics are not fully captured by raw snapshots.
|
|
17
|
+
|
|
18
|
+
---
|
|
19
|
+
|
|
20
|
+
## 2. Problem Statement
|
|
21
|
+
|
|
22
|
+
Current interaction logic works well when platform semantics are explicit.
|
|
23
|
+
|
|
24
|
+
It is weaker when controls appear as:
|
|
25
|
+
|
|
26
|
+
- generic container views
|
|
27
|
+
- unlabeled clickable wrappers
|
|
28
|
+
- nested composite controls
|
|
29
|
+
- custom Compose/SwiftUI components with weak accessibility exposure
|
|
30
|
+
|
|
31
|
+
Observed problems include:
|
|
32
|
+
|
|
33
|
+
- controls resolving as parent containers rather than actionable targets
|
|
34
|
+
- missing slider-like controls in snapshots
|
|
35
|
+
- weak distinction between discrete vs continuous controls
|
|
36
|
+
- inability to infer supported interactions from control structure
|
|
37
|
+
- unreliable verification of control state
|
|
38
|
+
|
|
39
|
+
This causes brittle automation and coordinate fallback behavior.
|
|
40
|
+
|
|
41
|
+
---
|
|
42
|
+
|
|
43
|
+
## 3. Goals
|
|
44
|
+
|
|
45
|
+
This RFC introduces a semantic layer that MUST:
|
|
46
|
+
|
|
47
|
+
- infer higher-level control semantics from raw UI structures
|
|
48
|
+
- enrich snapshots with semantic control metadata
|
|
49
|
+
- improve actionable target selection (RFC 007)
|
|
50
|
+
- improve adjustable control handling (RFC 008)
|
|
51
|
+
- improve verification for semantic control state
|
|
52
|
+
- reduce coordinate fallback usage
|
|
53
|
+
|
|
54
|
+
---
|
|
55
|
+
|
|
56
|
+
## 4. Non-Goals
|
|
57
|
+
|
|
58
|
+
This RFC does NOT define:
|
|
59
|
+
|
|
60
|
+
- replacement of raw accessibility trees
|
|
61
|
+
- ML-based semantic inference
|
|
62
|
+
- probabilistic control classification
|
|
63
|
+
- new gesture primitives
|
|
64
|
+
- autonomous planning behavior
|
|
65
|
+
|
|
66
|
+
Semantic modeling is deterministic enrichment layered over raw signals.
|
|
67
|
+
|
|
68
|
+
---
|
|
69
|
+
|
|
70
|
+
## 5. Runtime Surfaces
|
|
71
|
+
|
|
72
|
+
This RFC applies to existing runtime surfaces:
|
|
73
|
+
|
|
74
|
+
- findElementHandler
|
|
75
|
+
- _resolveActionableAncestor
|
|
76
|
+
- _buildResolvedElement
|
|
77
|
+
- tapElementHandler
|
|
78
|
+
- scrollToElementHandler
|
|
79
|
+
|
|
80
|
+
Semantic modeling augments these surfaces; it does not replace them.
|
|
81
|
+
|
|
82
|
+
---
|
|
83
|
+
|
|
84
|
+
## 6. Semantic Control Model
|
|
85
|
+
|
|
86
|
+
Controls MAY progressively expose semantic metadata such as:
|
|
87
|
+
|
|
88
|
+
```ts
|
|
89
|
+
interface SemanticControl {
|
|
90
|
+
semantic_role:
|
|
91
|
+
| "slider"
|
|
92
|
+
| "stepper"
|
|
93
|
+
| "dropdown"
|
|
94
|
+
| "segmented_control"
|
|
95
|
+
| "custom_adjustable"
|
|
96
|
+
| "composite_control";
|
|
97
|
+
|
|
98
|
+
supported_actions: string[];
|
|
99
|
+
|
|
100
|
+
adjustable: boolean;
|
|
101
|
+
|
|
102
|
+
state_shape:
|
|
103
|
+
| "continuous"
|
|
104
|
+
| "discrete"
|
|
105
|
+
| "semantic";
|
|
106
|
+
}
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
The control roles above represent an expected semantic model, not a claim that all such control classes are equally surfaced in the current runtime.
|
|
110
|
+
|
|
111
|
+
Current runtime support may initially expose simpler semantic signals such as:
|
|
112
|
+
- role hints
|
|
113
|
+
- semantic labels
|
|
114
|
+
- value_range metadata
|
|
115
|
+
- selector confidence or related resolution signals
|
|
116
|
+
|
|
117
|
+
Richer control roles are progressive extensions over time.
|
|
118
|
+
|
|
119
|
+
---
|
|
120
|
+
|
|
121
|
+
## 7. Semantic Inference Rules
|
|
122
|
+
|
|
123
|
+
Inference MAY use signals such as:
|
|
124
|
+
|
|
125
|
+
- accessibility role hints
|
|
126
|
+
- value_range metadata
|
|
127
|
+
- child composition patterns
|
|
128
|
+
- repeated selectable child structures
|
|
129
|
+
- platform traits (adjustable, selected, expanded)
|
|
130
|
+
- known control heuristics
|
|
131
|
+
|
|
132
|
+
Inference MUST be deterministic and explainable.
|
|
133
|
+
|
|
134
|
+
Raw signals always win on conflict.
|
|
135
|
+
|
|
136
|
+
Semantic inference confidence, where present, is advisory only and MUST NOT be treated as executable truth.
|
|
137
|
+
|
|
138
|
+
---
|
|
139
|
+
|
|
140
|
+
## 8. Resolution Integration (RFC 007)
|
|
141
|
+
|
|
142
|
+
Semantic metadata SHOULD improve target resolution by:
|
|
143
|
+
|
|
144
|
+
- preferring actionable child controls over generic containers
|
|
145
|
+
- promoting semantically actionable descendants
|
|
146
|
+
- disambiguating among multiple candidate matches
|
|
147
|
+
|
|
148
|
+
Semantic signals are advisory enrichment, not executable truth.
|
|
149
|
+
|
|
150
|
+
---
|
|
151
|
+
|
|
152
|
+
## 9. Adjustable Control Integration (RFC 008)
|
|
153
|
+
|
|
154
|
+
Where adjustable=true:
|
|
155
|
+
|
|
156
|
+
Semantic metadata MAY expose:
|
|
157
|
+
|
|
158
|
+
- supported adjustment mode
|
|
159
|
+
- discrete vs continuous state model
|
|
160
|
+
- expected verification strategy
|
|
161
|
+
|
|
162
|
+
This improves convergence for value-setting workflows.
|
|
163
|
+
|
|
164
|
+
---
|
|
165
|
+
|
|
166
|
+
## 10. Verification Integration
|
|
167
|
+
|
|
168
|
+
Verification MAY use semantic control metadata to improve:
|
|
169
|
+
|
|
170
|
+
- value-state verification
|
|
171
|
+
- discrete selection verification
|
|
172
|
+
- semantic-state checks
|
|
173
|
+
|
|
174
|
+
Formal verification still remains governed by RFC 005.
|
|
175
|
+
|
|
176
|
+
---
|
|
177
|
+
|
|
178
|
+
## 11. Output Contract (Progressive Extension)
|
|
179
|
+
|
|
180
|
+
Current runtime may expose partial semantic outputs.
|
|
181
|
+
|
|
182
|
+
Expected progressive shape (future extension model):
|
|
183
|
+
|
|
184
|
+
```ts
|
|
185
|
+
interface SemanticResolutionMetadata {
|
|
186
|
+
semantic_role?: string;
|
|
187
|
+
supported_actions?: string[];
|
|
188
|
+
adjustable?: boolean;
|
|
189
|
+
state_shape?: string;
|
|
190
|
+
confidence?: "low" | "medium" | "high";
|
|
191
|
+
}
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
These fields are progressive enrichment and MUST NOT be assumed universally present.
|
|
195
|
+
|
|
196
|
+
Implementations MAY expose only a subset of this model initially. Presence of a richer semantic role does not imply universal runtime support for all control classes.
|
|
197
|
+
|
|
198
|
+
---
|
|
199
|
+
|
|
200
|
+
## 12. Failure Modes
|
|
201
|
+
|
|
202
|
+
Semantic modeling MAY fail due to:
|
|
203
|
+
|
|
204
|
+
- insufficient raw signals
|
|
205
|
+
- ambiguous composite structures
|
|
206
|
+
- conflicting heuristics
|
|
207
|
+
|
|
208
|
+
When semantic inference confidence is insufficient:
|
|
209
|
+
|
|
210
|
+
- raw resolution flow MUST continue
|
|
211
|
+
- semantic fields MAY be omitted
|
|
212
|
+
- no semantic guessing should be forced
|
|
213
|
+
|
|
214
|
+
---
|
|
215
|
+
|
|
216
|
+
## 13. Success Metrics
|
|
217
|
+
|
|
218
|
+
- fewer coordinate fallbacks
|
|
219
|
+
- improved control discovery
|
|
220
|
+
- improved actionable-target precision
|
|
221
|
+
- improved slider/custom-control automation success
|
|
222
|
+
- reduced semantic mismatch failures (RFC 010)
|
|
223
|
+
|
|
224
|
+
---
|
|
225
|
+
|
|
226
|
+
## 14. Relationship to Other RFCs
|
|
227
|
+
|
|
228
|
+
RFC 005 — verification correctness model
|
|
229
|
+
RFC 006 — runtime action execution
|
|
230
|
+
RFC 007 — target resolution
|
|
231
|
+
RFC 008 — adjustable control support
|
|
232
|
+
RFC 010 — recovery uses semantic mismatch failures defined here
|
|
233
|
+
|
|
234
|
+
---
|
|
235
|
+
|
|
236
|
+
## 15. Summary
|
|
237
|
+
|
|
238
|
+
This RFC adds deterministic semantic control enrichment for custom and composite controls, improving resolution, interaction reliability, and verification while remaining layered over existing runtime signals.
|
|
@@ -244,6 +244,7 @@ Raw layer contents include:
|
|
|
244
244
|
- UI hierarchy or accessibility tree
|
|
245
245
|
- normalized readable element state where exposed by the platform
|
|
246
246
|
- platform-native identity hints such as stable identifiers, roles, and test tags
|
|
247
|
+
- semantic control metadata when derivable from the raw tree, including `semantic_role`, `supported_actions`, `adjustable`, and `state_shape`
|
|
247
248
|
- snapshot metadata such as `snapshot_revision` and `captured_at_ms`
|
|
248
249
|
- `loading_state` when a reliable loading signal is detectable
|
|
249
250
|
- screenshot when available
|
|
@@ -292,6 +293,27 @@ Semantic output MUST NOT replace classification or verification.
|
|
|
292
293
|
|
|
293
294
|
Classification remains a supplementary, post-action interpretation mechanism.
|
|
294
295
|
|
|
296
|
+
### 9.4 Semantic Control Metadata
|
|
297
|
+
|
|
298
|
+
When present, semantic control metadata MAY include:
|
|
299
|
+
|
|
300
|
+
```ts
|
|
301
|
+
{
|
|
302
|
+
semantic_role?: 'slider' | 'stepper' | 'dropdown' | 'segmented_control' | 'custom_adjustable' | 'composite_control' | null,
|
|
303
|
+
supported_actions?: string[] | null,
|
|
304
|
+
adjustable?: boolean | null,
|
|
305
|
+
state_shape?: 'continuous' | 'discrete' | 'semantic' | null
|
|
306
|
+
}
|
|
307
|
+
```
|
|
308
|
+
|
|
309
|
+
Rules:
|
|
310
|
+
|
|
311
|
+
- semantic control metadata is derived and best-effort
|
|
312
|
+
- raw platform roles and state remain authoritative on conflict
|
|
313
|
+
- `adjustable` MAY be inferred from platform traits when no known role matches
|
|
314
|
+
- `state_shape` MUST respect known control roles before value-based heuristics
|
|
315
|
+
- `supported_actions` are hints only and MUST NOT be treated as guaranteed executable actions
|
|
316
|
+
|
|
295
317
|
## 10. Classification
|
|
296
318
|
|
|
297
319
|
Tool: `classify_action_outcome`
|
package/package.json
CHANGED
package/src/interact/index.ts
CHANGED
|
@@ -16,6 +16,7 @@ import type {
|
|
|
16
16
|
ExpectStateResponse,
|
|
17
17
|
ExpectScreenResponse,
|
|
18
18
|
WaitForUIChangeResponse,
|
|
19
|
+
UIElementSemanticMetadata,
|
|
19
20
|
UIElementState,
|
|
20
21
|
TapElementResponse
|
|
21
22
|
} from '../types.js'
|
|
@@ -48,7 +49,7 @@ interface UiElement {
|
|
|
48
49
|
role?: string | null
|
|
49
50
|
test_tag?: string | null
|
|
50
51
|
selector?: { value: string | null, confidence: { score: number, reason: string } | null } | null
|
|
51
|
-
semantic?:
|
|
52
|
+
semantic?: UIElementSemanticMetadata | null
|
|
52
53
|
}
|
|
53
54
|
|
|
54
55
|
interface ResolvedUiElementContext {
|
|
@@ -342,6 +343,12 @@ export class ToolsInteract {
|
|
|
342
343
|
return !!el.state?.value_range || /slider|seekbar|stepper|adjustable|range/.test(type) || /slider|seekbar|stepper|adjustable|range/.test(role)
|
|
343
344
|
}
|
|
344
345
|
|
|
346
|
+
private static _isSemanticActionable(el: UiElement | null): boolean {
|
|
347
|
+
if (!el?.semantic) return false
|
|
348
|
+
if (el.semantic.adjustable) return true
|
|
349
|
+
return Array.isArray(el.semantic.supported_actions) && el.semantic.supported_actions.length > 0
|
|
350
|
+
}
|
|
351
|
+
|
|
345
352
|
private static _readNumericControlValue(el: UiElement | null, property: string): number | null {
|
|
346
353
|
if (!el?.state) return null
|
|
347
354
|
const stateValue = el.state[property as keyof UIElementState]
|
|
@@ -460,12 +467,12 @@ export class ToolsInteract {
|
|
|
460
467
|
|
|
461
468
|
private static _resolveActionableAncestor(elements: UiElement[], chosen: { el: UiElement, idx: number } | null): { el: UiElement, idx: number } | null {
|
|
462
469
|
if (!chosen) return null
|
|
463
|
-
if (chosen.el.clickable || chosen.el.focusable) return chosen
|
|
470
|
+
if (chosen.el.clickable || chosen.el.focusable || ToolsInteract._isSemanticActionable(chosen.el)) return chosen
|
|
464
471
|
|
|
465
472
|
let current = chosen
|
|
466
473
|
let safety = 0
|
|
467
474
|
|
|
468
|
-
while (safety < 20 && current.el && !(current.el.clickable || current.el.focusable) && current.el.parentId !== undefined && current.el.parentId !== null) {
|
|
475
|
+
while (safety < 20 && current.el && !(current.el.clickable || current.el.focusable || ToolsInteract._isSemanticActionable(current.el)) && current.el.parentId !== undefined && current.el.parentId !== null) {
|
|
469
476
|
const parentId = current.el.parentId
|
|
470
477
|
let parentIndex: number | null = null
|
|
471
478
|
|
|
@@ -474,12 +481,12 @@ export class ToolsInteract {
|
|
|
474
481
|
|
|
475
482
|
if (parentIndex !== null && elements[parentIndex]) {
|
|
476
483
|
current = { el: elements[parentIndex], idx: parentIndex }
|
|
477
|
-
if (current.el.clickable || current.el.focusable) return current
|
|
484
|
+
if (current.el.clickable || current.el.focusable || ToolsInteract._isSemanticActionable(current.el)) return current
|
|
478
485
|
} else if (typeof parentId === 'string') {
|
|
479
486
|
const foundIndex = elements.findIndex((el) => el.resourceId === parentId || el.id === parentId)
|
|
480
487
|
if (foundIndex === -1) break
|
|
481
488
|
current = { el: elements[foundIndex], idx: foundIndex }
|
|
482
|
-
if (current.el.clickable || current.el.focusable) return current
|
|
489
|
+
if (current.el.clickable || current.el.focusable || ToolsInteract._isSemanticActionable(current.el)) return current
|
|
483
490
|
} else {
|
|
484
491
|
break
|
|
485
492
|
}
|
|
@@ -496,7 +503,7 @@ export class ToolsInteract {
|
|
|
496
503
|
|
|
497
504
|
for (let i = 0; i < elements.length; i++) {
|
|
498
505
|
const el = elements[i]
|
|
499
|
-
if (!el || !(el.clickable || el.focusable)) continue
|
|
506
|
+
if (!el || !(el.clickable || el.focusable || ToolsInteract._isSemanticActionable(el))) continue
|
|
500
507
|
const bounds = ToolsInteract._normalizeBounds(el.bounds)
|
|
501
508
|
if (!bounds) continue
|
|
502
509
|
const [pl, pt, pr, pb] = bounds
|
|
@@ -1196,12 +1203,14 @@ export class ToolsInteract {
|
|
|
1196
1203
|
const [l,t,r,b] = bounds
|
|
1197
1204
|
if (r <= l || b <= t) return null
|
|
1198
1205
|
// Do not early-return on non-interactable elements — score them so we can locate their clickable ancestor later
|
|
1199
|
-
const interactable = !!(el.clickable || el.enabled || el.focusable)
|
|
1206
|
+
const interactable = !!(el.clickable || el.enabled || el.focusable || ToolsInteract._isSemanticActionable(el))
|
|
1200
1207
|
|
|
1201
1208
|
const text = normalize(el.text ?? el.label ?? el.value ?? '')
|
|
1202
1209
|
const content = normalize(el.contentDescription ?? el.contentDesc ?? el.accessibilityLabel ?? '')
|
|
1203
1210
|
const resourceId = normalize(el.resourceId ?? el.resourceID ?? el.id ?? '')
|
|
1204
1211
|
const className = normalize(el.type ?? el.class ?? '')
|
|
1212
|
+
const semanticRole = normalize(el.semantic?.semantic_role ?? '')
|
|
1213
|
+
const semanticActions = Array.isArray(el.semantic?.supported_actions) ? el.semantic.supported_actions.map((action) => normalize(action)).filter(Boolean) : []
|
|
1205
1214
|
|
|
1206
1215
|
let score = 0
|
|
1207
1216
|
let reason = 'best_scoring_candidate'
|
|
@@ -1243,6 +1252,29 @@ export class ToolsInteract {
|
|
|
1243
1252
|
reason = 'partial_class_match'
|
|
1244
1253
|
}
|
|
1245
1254
|
}
|
|
1255
|
+
if (!exact) {
|
|
1256
|
+
if (!score && semanticRole && semanticRole.includes(q)) {
|
|
1257
|
+
score = 0.5
|
|
1258
|
+
reason = 'semantic_role_match'
|
|
1259
|
+
}
|
|
1260
|
+
if (semanticActions.some((action) => action.includes(q))) {
|
|
1261
|
+
score = Math.max(score, score > 0 ? 0.65 : 0.6)
|
|
1262
|
+
reason = 'semantic_action_match'
|
|
1263
|
+
}
|
|
1264
|
+
if (score === 0 && el.semantic?.adjustable && /slider|stepper|dropdown|segment|control|adjust/.test(q)) {
|
|
1265
|
+
score = 0.45
|
|
1266
|
+
reason = 'semantic_control_match'
|
|
1267
|
+
}
|
|
1268
|
+
} else {
|
|
1269
|
+
if (!score && semanticRole && semanticRole === q) {
|
|
1270
|
+
score = 0.5
|
|
1271
|
+
reason = 'semantic_role_match'
|
|
1272
|
+
}
|
|
1273
|
+
if (semanticActions.some((action) => action === q)) {
|
|
1274
|
+
score = Math.max(score, score > 0 ? 0.65 : 0.6)
|
|
1275
|
+
reason = 'semantic_action_match'
|
|
1276
|
+
}
|
|
1277
|
+
}
|
|
1246
1278
|
if (score > 0 && interactable) score += 0.05
|
|
1247
1279
|
if (score <= 0) return null
|
|
1248
1280
|
return { el, idx, score, reason, interactable }
|
|
@@ -1352,7 +1384,7 @@ export class ToolsInteract {
|
|
|
1352
1384
|
}
|
|
1353
1385
|
}
|
|
1354
1386
|
|
|
1355
|
-
if (best && !(best.el.clickable || best.el.focusable)) {
|
|
1387
|
+
if (best && !(best.el.clickable || best.el.focusable || ToolsInteract._isSemanticActionable(best.el))) {
|
|
1356
1388
|
const nearbyActionable = ToolsInteract._resolveNearbyActionableControl(elements, { el: best.el, idx: best.idx }, screen)
|
|
1357
1389
|
if (nearbyActionable) {
|
|
1358
1390
|
best = {
|
package/src/observe/ios.ts
CHANGED
|
@@ -73,6 +73,9 @@ function normalizeIOSType(value: unknown): string {
|
|
|
73
73
|
|
|
74
74
|
function inferIOSRole(type: string, traits: string[]): string | null {
|
|
75
75
|
if (/slider|adjustable/.test(type) || traits.some((trait) => /adjustable|slider/.test(trait))) return 'slider'
|
|
76
|
+
if (/stepper/.test(type)) return 'stepper'
|
|
77
|
+
if (/picker|pop up button|dropdown/.test(type)) return 'dropdown'
|
|
78
|
+
if (/segmented control/.test(type)) return 'segmented_control'
|
|
76
79
|
if (/button/.test(type) || traits.some((trait) => /button/.test(trait))) return 'button'
|
|
77
80
|
if (/cell/.test(type)) return 'cell'
|
|
78
81
|
if (/switch/.test(type)) return 'switch'
|
|
@@ -113,11 +116,47 @@ function buildIOSSelector(type: string, label: string | null, value: string | nu
|
|
|
113
116
|
return null
|
|
114
117
|
}
|
|
115
118
|
|
|
116
|
-
function buildIOSSemantic(type: string, traits: string[]): UIElementSemanticMetadata {
|
|
117
|
-
|
|
119
|
+
function buildIOSSemantic(type: string, traits: string[], role: string | null, value: string | null): UIElementSemanticMetadata {
|
|
120
|
+
const semantic: UIElementSemanticMetadata = {
|
|
118
121
|
is_clickable: traits.includes("UIAccessibilityTraitButton") || /adjustable|slider/.test(type) || type === "Button" || type === "Cell",
|
|
119
122
|
is_container: /window|application|group|scroll view|collection view/.test(type)
|
|
120
123
|
}
|
|
124
|
+
|
|
125
|
+
if (role === 'slider') {
|
|
126
|
+
semantic.semantic_role = 'slider'
|
|
127
|
+
semantic.adjustable = true
|
|
128
|
+
semantic.supported_actions = ['adjust']
|
|
129
|
+
semantic.state_shape = 'continuous'
|
|
130
|
+
} else if (role === 'stepper') {
|
|
131
|
+
semantic.semantic_role = 'stepper'
|
|
132
|
+
semantic.adjustable = true
|
|
133
|
+
semantic.supported_actions = ['increment', 'decrement']
|
|
134
|
+
semantic.state_shape = 'discrete'
|
|
135
|
+
} else if (role === 'dropdown') {
|
|
136
|
+
semantic.semantic_role = 'dropdown'
|
|
137
|
+
semantic.supported_actions = ['tap', 'expand']
|
|
138
|
+
semantic.state_shape = 'semantic'
|
|
139
|
+
} else if (role === 'segmented_control') {
|
|
140
|
+
semantic.semantic_role = 'segmented_control'
|
|
141
|
+
semantic.supported_actions = ['tap']
|
|
142
|
+
semantic.state_shape = 'discrete'
|
|
143
|
+
} else if (traits.some((trait) => /adjustable|slider/i.test(trait)) || /adjustable|slider/.test(type)) {
|
|
144
|
+
semantic.semantic_role = 'custom_adjustable'
|
|
145
|
+
semantic.adjustable = true
|
|
146
|
+
semantic.supported_actions = ['adjust']
|
|
147
|
+
semantic.state_shape = 'continuous'
|
|
148
|
+
} else if (semantic.is_clickable) {
|
|
149
|
+
semantic.supported_actions = ['tap']
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
if (semantic.state_shape === undefined && semantic.adjustable && value !== null) {
|
|
153
|
+
const numericValue = parseIOSNumber(value)
|
|
154
|
+
if (numericValue !== null && numericValue >= 0 && numericValue <= 1) {
|
|
155
|
+
semantic.state_shape = 'continuous'
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
return semantic
|
|
121
160
|
}
|
|
122
161
|
|
|
123
162
|
function isIOSAdjustable(node: IDBElement, type: string, traits: string[]): boolean {
|
|
@@ -184,8 +223,8 @@ export function traverseIDBNode(node: IDBElement, elements: UIElement[], parentI
|
|
|
184
223
|
const normalizedType = normalizeIOSType(type)
|
|
185
224
|
const stableId = getIOSStableId(node)
|
|
186
225
|
const selector = buildIOSSelector(type, label, value, stableId)
|
|
187
|
-
const semantic = buildIOSSemantic(normalizedType, traits)
|
|
188
226
|
const role = inferIOSRole(normalizedType, traits)
|
|
227
|
+
const semantic = buildIOSSemantic(normalizedType, traits, role, value)
|
|
189
228
|
|
|
190
229
|
const clickable = traits.includes("UIAccessibilityTraitButton") || type === "Button" || type === "Cell";
|
|
191
230
|
|
package/src/server-core.ts
CHANGED
package/src/types.ts
CHANGED
|
@@ -107,6 +107,10 @@ export interface UIResolutionSelector {
|
|
|
107
107
|
export interface UIElementSemanticMetadata {
|
|
108
108
|
is_clickable: boolean;
|
|
109
109
|
is_container: boolean;
|
|
110
|
+
semantic_role?: 'slider' | 'stepper' | 'dropdown' | 'segmented_control' | 'custom_adjustable' | 'composite_control' | null;
|
|
111
|
+
supported_actions?: string[] | null;
|
|
112
|
+
adjustable?: boolean | null;
|
|
113
|
+
state_shape?: 'continuous' | 'discrete' | 'semantic' | null;
|
|
110
114
|
}
|
|
111
115
|
|
|
112
116
|
export interface LoadingState {
|
|
@@ -342,6 +342,9 @@ function normalizeClassName(value: unknown): string {
|
|
|
342
342
|
|
|
343
343
|
function inferAndroidRole(className: string): string | null {
|
|
344
344
|
if (/seekbar|slider/.test(className)) return 'slider'
|
|
345
|
+
if (/stepper|numberpicker/.test(className)) return 'stepper'
|
|
346
|
+
if (/spinner|dropdown/.test(className)) return 'dropdown'
|
|
347
|
+
if (/segment|tablayout/.test(className)) return 'segmented_control'
|
|
345
348
|
if (/switch|toggle/.test(className)) return 'switch'
|
|
346
349
|
if (/checkbox/.test(className)) return 'checkbox'
|
|
347
350
|
if (/radiobutton|radio/.test(className)) return 'radio'
|
|
@@ -375,11 +378,35 @@ function buildAndroidSelector(text: string | null, contentDescription: string |
|
|
|
375
378
|
return null
|
|
376
379
|
}
|
|
377
380
|
|
|
378
|
-
function buildAndroidSemantic(clickable: boolean, className: string): UIElementSemanticMetadata {
|
|
379
|
-
|
|
381
|
+
function buildAndroidSemantic(clickable: boolean, className: string, role: string | null): UIElementSemanticMetadata {
|
|
382
|
+
const semantic: UIElementSemanticMetadata = {
|
|
380
383
|
is_clickable: clickable,
|
|
381
384
|
is_container: /recyclerview|scroll|layout|viewgroup|frame/.test(className)
|
|
382
385
|
}
|
|
386
|
+
|
|
387
|
+
if (role === 'slider') {
|
|
388
|
+
semantic.semantic_role = 'slider'
|
|
389
|
+
semantic.adjustable = true
|
|
390
|
+
semantic.supported_actions = ['adjust']
|
|
391
|
+
semantic.state_shape = 'continuous'
|
|
392
|
+
} else if (role === 'stepper') {
|
|
393
|
+
semantic.semantic_role = 'stepper'
|
|
394
|
+
semantic.adjustable = true
|
|
395
|
+
semantic.supported_actions = ['increment', 'decrement']
|
|
396
|
+
semantic.state_shape = 'discrete'
|
|
397
|
+
} else if (role === 'dropdown') {
|
|
398
|
+
semantic.semantic_role = 'dropdown'
|
|
399
|
+
semantic.supported_actions = ['tap', 'expand']
|
|
400
|
+
semantic.state_shape = 'semantic'
|
|
401
|
+
} else if (role === 'segmented_control') {
|
|
402
|
+
semantic.semantic_role = 'segmented_control'
|
|
403
|
+
semantic.supported_actions = ['tap']
|
|
404
|
+
semantic.state_shape = 'discrete'
|
|
405
|
+
} else if (clickable) {
|
|
406
|
+
semantic.supported_actions = ['tap']
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
return semantic
|
|
383
410
|
}
|
|
384
411
|
|
|
385
412
|
function isSliderLikeAndroid(node: any): boolean {
|
|
@@ -459,7 +486,7 @@ export function traverseNode(node: any, elements: UIElement[], parentIndex: numb
|
|
|
459
486
|
const stableId = resourceId ?? (typeof contentDescription === 'string' && contentDescription.trim().length > 0 ? contentDescription : null)
|
|
460
487
|
const testTag = stableId
|
|
461
488
|
const selector = buildAndroidSelector(text, contentDescription, resourceId, normalizeClassName(className))
|
|
462
|
-
const semantic = buildAndroidSemantic(clickable, normalizeClassName(className))
|
|
489
|
+
const semantic = buildAndroidSemantic(clickable, normalizeClassName(className), role)
|
|
463
490
|
|
|
464
491
|
const isUseful = clickable || (text && text.length > 0) || (contentDescription && contentDescription.length > 0);
|
|
465
492
|
|
|
@@ -78,6 +78,52 @@ async function run() {
|
|
|
78
78
|
assert.ok((res4.resolution?.alternates || []).length >= 1, 'Parent promotion should preserve alternates')
|
|
79
79
|
process.stdout.write('Test 4: ' + (pass4 ? 'PASS' : 'FAIL') + '\n');
|
|
80
80
|
|
|
81
|
+
// Test 4b: semantic-only stepper should be discoverable by supported action
|
|
82
|
+
;(ToolsObserve as any).getUITreeHandler = async () => ({
|
|
83
|
+
device: { platform: 'android', id: 'mock' },
|
|
84
|
+
screen: '',
|
|
85
|
+
resolution: { width: 1080, height: 1920 },
|
|
86
|
+
elements: [
|
|
87
|
+
{
|
|
88
|
+
text: null,
|
|
89
|
+
contentDescription: 'Quantity stepper',
|
|
90
|
+
type: 'android.widget.NumberPicker',
|
|
91
|
+
clickable: false,
|
|
92
|
+
enabled: true,
|
|
93
|
+
visible: true,
|
|
94
|
+
bounds: [10,10,200,80],
|
|
95
|
+
resourceId: 'picker_quantity',
|
|
96
|
+
semantic: {
|
|
97
|
+
is_clickable: false,
|
|
98
|
+
is_container: true,
|
|
99
|
+
semantic_role: 'stepper',
|
|
100
|
+
supported_actions: ['increment', 'decrement'],
|
|
101
|
+
adjustable: true,
|
|
102
|
+
state_shape: 'discrete'
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
]
|
|
106
|
+
})
|
|
107
|
+
|
|
108
|
+
const res4b: any = await ToolsInteract.findElementHandler({ query: 'increment', exact: false, platform: 'android', timeoutMs: 300 })
|
|
109
|
+
process.stdout.write('res4b ' + JSON.stringify(res4b, null, 2) + '\n');
|
|
110
|
+
const pass4b = res4b.found === true && res4b.element && res4b.element.resourceId === 'picker_quantity' && res4b.element.semantic?.semantic_role === 'stepper'
|
|
111
|
+
assert.ok(pass4b, 'Semantic-only steppers should be discoverable by supported actions')
|
|
112
|
+
assert.strictEqual(res4b.resolution?.reason, 'semantic_action_match')
|
|
113
|
+
process.stdout.write('Test 4b: ' + (pass4b ? 'PASS' : 'FAIL') + '\n');
|
|
114
|
+
|
|
115
|
+
const res4bb: any = await ToolsInteract.findElementHandler({ query: 'increment', exact: true, platform: 'android', timeoutMs: 300 })
|
|
116
|
+
process.stdout.write('res4bb ' + JSON.stringify(res4bb, null, 2) + '\n');
|
|
117
|
+
const pass4bb = res4bb.found === true && res4bb.element && res4bb.element.resourceId === 'picker_quantity' && res4bb.resolution?.reason === 'semantic_action_match'
|
|
118
|
+
assert.ok(pass4bb, 'Exact searches should still match exact semantic actions')
|
|
119
|
+
process.stdout.write('Test 4bb: ' + (pass4bb ? 'PASS' : 'FAIL') + '\n');
|
|
120
|
+
|
|
121
|
+
const res4c: any = await ToolsInteract.findElementHandler({ query: 'control', exact: true, platform: 'android', timeoutMs: 300 })
|
|
122
|
+
process.stdout.write('res4c ' + JSON.stringify(res4c, null, 2) + '\n');
|
|
123
|
+
const pass4c = res4c.found === false
|
|
124
|
+
assert.ok(pass4c, 'Exact searches should not fall back to broad semantic keywords')
|
|
125
|
+
process.stdout.write('Test 4c: ' + (pass4c ? 'PASS' : 'FAIL') + '\n');
|
|
126
|
+
|
|
81
127
|
// Test 5: duration label should resolve to the nearby slider control
|
|
82
128
|
;(ToolsObserve as any).getUITreeHandler = async () => ({
|
|
83
129
|
device: { platform: 'android', id: 'mock' },
|
|
@@ -29,7 +29,14 @@ async function run() {
|
|
|
29
29
|
value: 'com.example:id/duration',
|
|
30
30
|
confidence: { score: 1, reason: 'resource_id' }
|
|
31
31
|
})
|
|
32
|
-
assert.deepStrictEqual(androidElements[0].semantic, {
|
|
32
|
+
assert.deepStrictEqual(androidElements[0].semantic, {
|
|
33
|
+
is_clickable: true,
|
|
34
|
+
is_container: false,
|
|
35
|
+
semantic_role: 'slider',
|
|
36
|
+
supported_actions: ['adjust'],
|
|
37
|
+
adjustable: true,
|
|
38
|
+
state_shape: 'continuous'
|
|
39
|
+
})
|
|
33
40
|
|
|
34
41
|
const androidProgressElements: any[] = []
|
|
35
42
|
traverseNode({
|
|
@@ -44,6 +51,21 @@ async function run() {
|
|
|
44
51
|
|
|
45
52
|
assert.notStrictEqual(androidProgressElements[0]?.role, 'slider')
|
|
46
53
|
assert.notStrictEqual(androidProgressElements[0]?.state?.value, 40)
|
|
54
|
+
assert.notStrictEqual(androidProgressElements[0]?.semantic?.adjustable, true)
|
|
55
|
+
|
|
56
|
+
const androidStepperElements: any[] = []
|
|
57
|
+
traverseNode({
|
|
58
|
+
'@_class': 'android.widget.NumberPicker',
|
|
59
|
+
'@_text': 'Quantity',
|
|
60
|
+
'@_content-desc': 'Quantity stepper',
|
|
61
|
+
'@_clickable': 'false',
|
|
62
|
+
'@_enabled': 'true',
|
|
63
|
+
'@_bounds': '[0,0][200,80]'
|
|
64
|
+
}, androidStepperElements)
|
|
65
|
+
assert.strictEqual(androidStepperElements[0].role, 'stepper')
|
|
66
|
+
assert.deepStrictEqual(androidStepperElements[0].semantic?.semantic_role, 'stepper')
|
|
67
|
+
assert.deepStrictEqual(androidStepperElements[0].semantic?.supported_actions, ['increment', 'decrement'])
|
|
68
|
+
assert.strictEqual(androidStepperElements[0].semantic?.adjustable, true)
|
|
47
69
|
|
|
48
70
|
const androidFallbackElements: any[] = []
|
|
49
71
|
traverseNode({
|
|
@@ -82,7 +104,14 @@ async function run() {
|
|
|
82
104
|
value: 'playback_speed_slider',
|
|
83
105
|
confidence: { score: 1, reason: 'accessibility_identifier' }
|
|
84
106
|
})
|
|
85
|
-
assert.deepStrictEqual(iosElements[0].semantic, {
|
|
107
|
+
assert.deepStrictEqual(iosElements[0].semantic, {
|
|
108
|
+
is_clickable: true,
|
|
109
|
+
is_container: false,
|
|
110
|
+
semantic_role: 'slider',
|
|
111
|
+
supported_actions: ['adjust'],
|
|
112
|
+
adjustable: true,
|
|
113
|
+
state_shape: 'continuous'
|
|
114
|
+
})
|
|
86
115
|
|
|
87
116
|
const iosProgressElements: any[] = []
|
|
88
117
|
traverseIDBNode({
|
|
@@ -94,6 +123,40 @@ async function run() {
|
|
|
94
123
|
|
|
95
124
|
assert.notStrictEqual(iosProgressElements[0]?.role, 'slider')
|
|
96
125
|
|
|
126
|
+
const iosStepperElements: any[] = []
|
|
127
|
+
traverseIDBNode({
|
|
128
|
+
AXElementType: 'Stepper',
|
|
129
|
+
AXLabel: 'Quantity',
|
|
130
|
+
AXValue: '1',
|
|
131
|
+
AXTraits: ['UIAccessibilityTraitAdjustable']
|
|
132
|
+
}, iosStepperElements)
|
|
133
|
+
assert.strictEqual(iosStepperElements[0].role, 'stepper')
|
|
134
|
+
assert.strictEqual(iosStepperElements[0].semantic?.semantic_role, 'stepper')
|
|
135
|
+
assert.deepStrictEqual(iosStepperElements[0].semantic?.supported_actions, ['increment', 'decrement'])
|
|
136
|
+
assert.strictEqual(iosStepperElements[0].semantic?.state_shape, 'discrete')
|
|
137
|
+
|
|
138
|
+
const iosSegmentedElements: any[] = []
|
|
139
|
+
traverseIDBNode({
|
|
140
|
+
AXElementType: 'Segmented Control',
|
|
141
|
+
AXLabel: 'Playback mode',
|
|
142
|
+
AXTraits: ['UIAccessibilityTraitButton']
|
|
143
|
+
}, iosSegmentedElements)
|
|
144
|
+
assert.strictEqual(iosSegmentedElements[0].role, 'segmented_control')
|
|
145
|
+
assert.strictEqual(iosSegmentedElements[0].semantic?.semantic_role, 'segmented_control')
|
|
146
|
+
assert.deepStrictEqual(iosSegmentedElements[0].semantic?.supported_actions, ['tap'])
|
|
147
|
+
|
|
148
|
+
const iosCustomAdjustableElements: any[] = []
|
|
149
|
+
traverseIDBNode({
|
|
150
|
+
AXElementType: 'CustomControl',
|
|
151
|
+
AXLabel: 'Intensity',
|
|
152
|
+
AXValue: '0.25',
|
|
153
|
+
AXTraits: ['UIAccessibilityTraitAdjustable']
|
|
154
|
+
}, iosCustomAdjustableElements)
|
|
155
|
+
assert.strictEqual(iosCustomAdjustableElements[0].semantic?.semantic_role, 'custom_adjustable')
|
|
156
|
+
assert.strictEqual(iosCustomAdjustableElements[0].semantic?.adjustable, true)
|
|
157
|
+
assert.deepStrictEqual(iosCustomAdjustableElements[0].semantic?.supported_actions, ['adjust'])
|
|
158
|
+
assert.strictEqual(iosCustomAdjustableElements[0].semantic?.state_shape, 'continuous')
|
|
159
|
+
|
|
97
160
|
const iosFallbackElements: any[] = []
|
|
98
161
|
traverseIDBNode({
|
|
99
162
|
AXElementType: 'Button',
|