mobile-debug-mcp 0.26.4 → 0.27.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/interact/index.js +392 -192
- package/dist/observe/ios.js +47 -3
- package/dist/server/common.js +39 -0
- package/dist/server-core.js +1 -1
- package/dist/utils/android/utils.js +35 -3
- package/docs/CHANGELOG.md +6 -0
- package/docs/ROADMAP.md +114 -16
- package/docs/rfcs/009-semantic-control-modeling-for-custom-and-composite-controls.md +238 -0
- package/docs/rfcs/010-verification-stabilization-and-temporal-convergence.md +265 -0
- package/docs/rfcs/011-recovery-and-replanning-for-failed-or-ambiguous-interaction-flows.md +321 -0
- package/docs/rfcs/011.1-recovery-contract-types-and-runtime-wiring-spec.md +253 -0
- package/docs/rfcs/012.md +203 -0
- package/docs/specs/mcp-tooling-spec-v1.md +34 -0
- package/docs/tools/interact.md +10 -0
- package/package.json +1 -1
- package/src/interact/index.ts +433 -194
- package/src/observe/ios.ts +42 -3
- package/src/server/common.ts +44 -1
- package/src/server-core.ts +1 -1
- package/src/types.ts +41 -1
- package/src/utils/android/utils.ts +30 -3
- package/test/unit/interact/adjust_control.test.ts +77 -1
- package/test/unit/interact/verification_stabilization.test.ts +94 -0
- package/test/unit/observe/find_element.test.ts +46 -0
- package/test/unit/observe/state_extraction.test.ts +65 -2
- package/test/unit/server/common.test.ts +36 -1
package/dist/observe/ios.js
CHANGED
|
@@ -52,6 +52,12 @@ function normalizeIOSType(value) {
|
|
|
52
52
|
function inferIOSRole(type, traits) {
|
|
53
53
|
if (/slider|adjustable/.test(type) || traits.some((trait) => /adjustable|slider/.test(trait)))
|
|
54
54
|
return 'slider';
|
|
55
|
+
if (/stepper/.test(type))
|
|
56
|
+
return 'stepper';
|
|
57
|
+
if (/picker|pop up button|dropdown/.test(type))
|
|
58
|
+
return 'dropdown';
|
|
59
|
+
if (/segmented control/.test(type))
|
|
60
|
+
return 'segmented_control';
|
|
55
61
|
if (/button/.test(type) || traits.some((trait) => /button/.test(trait)))
|
|
56
62
|
return 'button';
|
|
57
63
|
if (/cell/.test(type))
|
|
@@ -99,11 +105,49 @@ function buildIOSSelector(type, label, value, stableId) {
|
|
|
99
105
|
return { value: type, confidence: buildIOSSelectorConfidence('type') };
|
|
100
106
|
return null;
|
|
101
107
|
}
|
|
102
|
-
function buildIOSSemantic(type, traits) {
|
|
103
|
-
|
|
108
|
+
function buildIOSSemantic(type, traits, role, value) {
|
|
109
|
+
const semantic = {
|
|
104
110
|
is_clickable: traits.includes("UIAccessibilityTraitButton") || /adjustable|slider/.test(type) || type === "Button" || type === "Cell",
|
|
105
111
|
is_container: /window|application|group|scroll view|collection view/.test(type)
|
|
106
112
|
};
|
|
113
|
+
if (role === 'slider') {
|
|
114
|
+
semantic.semantic_role = 'slider';
|
|
115
|
+
semantic.adjustable = true;
|
|
116
|
+
semantic.supported_actions = ['adjust'];
|
|
117
|
+
semantic.state_shape = 'continuous';
|
|
118
|
+
}
|
|
119
|
+
else if (role === 'stepper') {
|
|
120
|
+
semantic.semantic_role = 'stepper';
|
|
121
|
+
semantic.adjustable = true;
|
|
122
|
+
semantic.supported_actions = ['increment', 'decrement'];
|
|
123
|
+
semantic.state_shape = 'discrete';
|
|
124
|
+
}
|
|
125
|
+
else if (role === 'dropdown') {
|
|
126
|
+
semantic.semantic_role = 'dropdown';
|
|
127
|
+
semantic.supported_actions = ['tap', 'expand'];
|
|
128
|
+
semantic.state_shape = 'semantic';
|
|
129
|
+
}
|
|
130
|
+
else if (role === 'segmented_control') {
|
|
131
|
+
semantic.semantic_role = 'segmented_control';
|
|
132
|
+
semantic.supported_actions = ['tap'];
|
|
133
|
+
semantic.state_shape = 'discrete';
|
|
134
|
+
}
|
|
135
|
+
else if (traits.some((trait) => /adjustable|slider/i.test(trait)) || /adjustable|slider/.test(type)) {
|
|
136
|
+
semantic.semantic_role = 'custom_adjustable';
|
|
137
|
+
semantic.adjustable = true;
|
|
138
|
+
semantic.supported_actions = ['adjust'];
|
|
139
|
+
semantic.state_shape = 'continuous';
|
|
140
|
+
}
|
|
141
|
+
else if (semantic.is_clickable) {
|
|
142
|
+
semantic.supported_actions = ['tap'];
|
|
143
|
+
}
|
|
144
|
+
if (semantic.state_shape === undefined && semantic.adjustable && value !== null) {
|
|
145
|
+
const numericValue = parseIOSNumber(value);
|
|
146
|
+
if (numericValue !== null && numericValue >= 0 && numericValue <= 1) {
|
|
147
|
+
semantic.state_shape = 'continuous';
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
return semantic;
|
|
107
151
|
}
|
|
108
152
|
function isIOSAdjustable(node, type, traits) {
|
|
109
153
|
return /slider|adjustable|stepper/i.test(type) || traits.some((trait) => /adjustable|slider/i.test(trait));
|
|
@@ -162,8 +206,8 @@ export function traverseIDBNode(node, elements, parentIndex = -1, depth = 0) {
|
|
|
162
206
|
const normalizedType = normalizeIOSType(type);
|
|
163
207
|
const stableId = getIOSStableId(node);
|
|
164
208
|
const selector = buildIOSSelector(type, label, value, stableId);
|
|
165
|
-
const semantic = buildIOSSemantic(normalizedType, traits);
|
|
166
209
|
const role = inferIOSRole(normalizedType, traits);
|
|
210
|
+
const semantic = buildIOSSemantic(normalizedType, traits, role, value);
|
|
167
211
|
const clickable = traits.includes("UIAccessibilityTraitButton") || type === "Button" || type === "Cell";
|
|
168
212
|
const isUseful = clickable || (label && label.length > 0) || (value && value.length > 0) || type === "Application" || type === "Window";
|
|
169
213
|
if (isUseful) {
|
package/dist/server/common.js
CHANGED
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
import { ToolsObserve } from '../observe/index.js';
|
|
2
|
+
export const DEFAULT_MAX_RECOVERY_ATTEMPTS = 3;
|
|
3
|
+
export const DEFAULT_MAX_RETRY_DEPTH = 3;
|
|
2
4
|
export function wrapResponse(data) {
|
|
3
5
|
return {
|
|
4
6
|
content: [{
|
|
@@ -86,6 +88,8 @@ export function normalizeResolvedTarget(value = null) {
|
|
|
86
88
|
export function inferGenericFailure(message) {
|
|
87
89
|
if (message && /timeout/i.test(message))
|
|
88
90
|
return { failureCode: 'TIMEOUT', retryable: true };
|
|
91
|
+
if (message && /semantic mismatch/i.test(message))
|
|
92
|
+
return { failureCode: 'SEMANTIC_MISMATCH', retryable: false };
|
|
89
93
|
return { failureCode: 'UNKNOWN', retryable: false };
|
|
90
94
|
}
|
|
91
95
|
export function inferScrollFailure(message) {
|
|
@@ -106,6 +110,40 @@ export function determineActionLifecycleState({ success, failure }) {
|
|
|
106
110
|
return ACTION_LIFECYCLE_STATE_BY_OUTCOME.success;
|
|
107
111
|
return ACTION_LIFECYCLE_STATE_BY_OUTCOME.success;
|
|
108
112
|
}
|
|
113
|
+
function mapFailureCodeToFailureClass(code) {
|
|
114
|
+
switch (code) {
|
|
115
|
+
case 'ELEMENT_NOT_FOUND':
|
|
116
|
+
case 'AMBIGUOUS_TARGET':
|
|
117
|
+
case 'STALE_REFERENCE':
|
|
118
|
+
return 'TargetResolutionFailure';
|
|
119
|
+
case 'ELEMENT_NOT_INTERACTABLE':
|
|
120
|
+
return 'ExecutionFailure';
|
|
121
|
+
case 'TIMEOUT':
|
|
122
|
+
case 'ACTION_REJECTED':
|
|
123
|
+
case 'NAVIGATION_NO_CHANGE':
|
|
124
|
+
case 'UNKNOWN':
|
|
125
|
+
return 'ExecutionFailure';
|
|
126
|
+
case 'VERIFICATION_FAILED':
|
|
127
|
+
case 'EXPECT_STATE_MISMATCH':
|
|
128
|
+
return 'VerificationFailure';
|
|
129
|
+
case 'CONTROL_CONVERGENCE_FAILED':
|
|
130
|
+
return 'ControlConvergenceFailure';
|
|
131
|
+
case 'SEMANTIC_MISMATCH':
|
|
132
|
+
return 'SemanticMismatchFailure';
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
function buildRecoveryState(failureCode, retryable) {
|
|
136
|
+
return {
|
|
137
|
+
failure_class: mapFailureCodeToFailureClass(failureCode),
|
|
138
|
+
runtime_code: failureCode,
|
|
139
|
+
recovery_attempts: 0,
|
|
140
|
+
max_recovery_attempts: DEFAULT_MAX_RECOVERY_ATTEMPTS,
|
|
141
|
+
retry_depth: 0,
|
|
142
|
+
max_retry_depth: DEFAULT_MAX_RETRY_DEPTH,
|
|
143
|
+
is_terminal: false,
|
|
144
|
+
retry_allowed: retryable
|
|
145
|
+
};
|
|
146
|
+
}
|
|
109
147
|
export function buildActionExecutionResult({ actionType, device, selector, resolved, success, uiFingerprintBefore, uiFingerprintAfter, failure, details, sourceModule }) {
|
|
110
148
|
const timestampMs = Date.now();
|
|
111
149
|
const timestamp = new Date(timestampMs).toISOString();
|
|
@@ -122,6 +160,7 @@ export function buildActionExecutionResult({ actionType, device, selector, resol
|
|
|
122
160
|
},
|
|
123
161
|
success,
|
|
124
162
|
...(failure ? { failure_code: failure.failureCode, retryable: failure.retryable } : {}),
|
|
163
|
+
...(failure ? { recovery: buildRecoveryState(failure.failureCode, failure.retryable) } : {}),
|
|
125
164
|
ui_fingerprint_before: uiFingerprintBefore,
|
|
126
165
|
ui_fingerprint_after: uiFingerprintAfter,
|
|
127
166
|
...(details ? { details } : {})
|
package/dist/server-core.js
CHANGED
|
@@ -6,7 +6,7 @@ import { handleToolCall } from './server/tool-handlers.js';
|
|
|
6
6
|
export { wrapResponse, toolDefinitions, handleToolCall };
|
|
7
7
|
export const serverInfo = {
|
|
8
8
|
name: 'mobile-debug-mcp',
|
|
9
|
-
version: '0.
|
|
9
|
+
version: '0.27.0'
|
|
10
10
|
};
|
|
11
11
|
export function createServer() {
|
|
12
12
|
const server = new Server(serverInfo, {
|
|
@@ -362,6 +362,12 @@ function normalizeClassName(value) {
|
|
|
362
362
|
function inferAndroidRole(className) {
|
|
363
363
|
if (/seekbar|slider/.test(className))
|
|
364
364
|
return 'slider';
|
|
365
|
+
if (/stepper|numberpicker/.test(className))
|
|
366
|
+
return 'stepper';
|
|
367
|
+
if (/spinner|dropdown/.test(className))
|
|
368
|
+
return 'dropdown';
|
|
369
|
+
if (/segment|tablayout/.test(className))
|
|
370
|
+
return 'segmented_control';
|
|
365
371
|
if (/switch|toggle/.test(className))
|
|
366
372
|
return 'switch';
|
|
367
373
|
if (/checkbox/.test(className))
|
|
@@ -403,11 +409,37 @@ function buildAndroidSelector(text, contentDescription, resourceId, className) {
|
|
|
403
409
|
return { value: className, confidence: buildAndroidSelectorConfidence('class') };
|
|
404
410
|
return null;
|
|
405
411
|
}
|
|
406
|
-
function buildAndroidSemantic(clickable, className) {
|
|
407
|
-
|
|
412
|
+
function buildAndroidSemantic(clickable, className, role) {
|
|
413
|
+
const semantic = {
|
|
408
414
|
is_clickable: clickable,
|
|
409
415
|
is_container: /recyclerview|scroll|layout|viewgroup|frame/.test(className)
|
|
410
416
|
};
|
|
417
|
+
if (role === 'slider') {
|
|
418
|
+
semantic.semantic_role = 'slider';
|
|
419
|
+
semantic.adjustable = true;
|
|
420
|
+
semantic.supported_actions = ['adjust'];
|
|
421
|
+
semantic.state_shape = 'continuous';
|
|
422
|
+
}
|
|
423
|
+
else if (role === 'stepper') {
|
|
424
|
+
semantic.semantic_role = 'stepper';
|
|
425
|
+
semantic.adjustable = true;
|
|
426
|
+
semantic.supported_actions = ['increment', 'decrement'];
|
|
427
|
+
semantic.state_shape = 'discrete';
|
|
428
|
+
}
|
|
429
|
+
else if (role === 'dropdown') {
|
|
430
|
+
semantic.semantic_role = 'dropdown';
|
|
431
|
+
semantic.supported_actions = ['tap', 'expand'];
|
|
432
|
+
semantic.state_shape = 'semantic';
|
|
433
|
+
}
|
|
434
|
+
else if (role === 'segmented_control') {
|
|
435
|
+
semantic.semantic_role = 'segmented_control';
|
|
436
|
+
semantic.supported_actions = ['tap'];
|
|
437
|
+
semantic.state_shape = 'discrete';
|
|
438
|
+
}
|
|
439
|
+
else if (clickable) {
|
|
440
|
+
semantic.supported_actions = ['tap'];
|
|
441
|
+
}
|
|
442
|
+
return semantic;
|
|
411
443
|
}
|
|
412
444
|
function isSliderLikeAndroid(node) {
|
|
413
445
|
const className = String(node['@_class'] || '').toLowerCase();
|
|
@@ -487,7 +519,7 @@ export function traverseNode(node, elements, parentIndex = -1, depth = 0) {
|
|
|
487
519
|
const stableId = resourceId ?? (typeof contentDescription === 'string' && contentDescription.trim().length > 0 ? contentDescription : null);
|
|
488
520
|
const testTag = stableId;
|
|
489
521
|
const selector = buildAndroidSelector(text, contentDescription, resourceId, normalizeClassName(className));
|
|
490
|
-
const semantic = buildAndroidSemantic(clickable, normalizeClassName(className));
|
|
522
|
+
const semantic = buildAndroidSemantic(clickable, normalizeClassName(className), role);
|
|
491
523
|
const isUseful = clickable || (text && text.length > 0) || (contentDescription && contentDescription.length > 0);
|
|
492
524
|
if (isUseful) {
|
|
493
525
|
const element = {
|
package/docs/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,12 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to the **Mobile Debug MCP** project will be documented in this file.
|
|
4
4
|
|
|
5
|
+
## [0.27.0]
|
|
6
|
+
- defines a structured recovery and replanning model for UI interaction failures, enabling the system to respond to execution uncertainty with bounded, deterministic recovery strategies.
|
|
7
|
+
|
|
8
|
+
## [0.26.5]
|
|
9
|
+
- Introduces a semantic control model to improve the identification and interaction with custom and composite UI controls.
|
|
10
|
+
|
|
5
11
|
## [0.26.4]
|
|
6
12
|
- Improved slider accuracy
|
|
7
13
|
|
package/docs/ROADMAP.md
CHANGED
|
@@ -44,17 +44,20 @@ Higher task success with fewer retries.
|
|
|
44
44
|
|
|
45
45
|
- Stronger State Verification — Complete (Foundational verification layer shipped)
|
|
46
46
|
- Richer Element Identity — Complete (Identity and selector confidence foundations shipped)
|
|
47
|
+
- Better Compose / Custom Control Semantics — Complete (Semantic role enrichment and custom-adjustable inference shipped)
|
|
47
48
|
|
|
48
49
|
## Current Focus
|
|
49
50
|
|
|
50
51
|
- Wait and Synchronization Reliability
|
|
51
52
|
- Actionability Resolution
|
|
53
|
+
- Verification Stabilization and Temporal Convergence
|
|
52
54
|
|
|
53
55
|
## Upcoming Work
|
|
54
56
|
|
|
57
|
+
- Adjustable Control Precision Hardening
|
|
55
58
|
- Environment Auto-Configuration and Toolchain Discovery
|
|
56
59
|
- Adjustable Control Support
|
|
57
|
-
-
|
|
60
|
+
- Verification Stabilization and Temporal Convergence
|
|
58
61
|
- Signal-Oriented Diagnostic Filtering
|
|
59
62
|
- Long Press Gesture
|
|
60
63
|
# Stronger State Verification
|
|
@@ -243,6 +246,53 @@ Blocks or strengthens:
|
|
|
243
246
|
|
|
244
247
|
---
|
|
245
248
|
|
|
249
|
+
# Verification Stabilization and Temporal Convergence
|
|
250
|
+
|
|
251
|
+
## Rationale
|
|
252
|
+
Real-world feedback exposed false-negative readiness failures caused by transient UI timing, even when target state had actually converged.
|
|
253
|
+
|
|
254
|
+
**Status:** Planned
|
|
255
|
+
|
|
256
|
+
Addresses friction where agents:
|
|
257
|
+
- fail readiness checks on transient timing races
|
|
258
|
+
- act on stale snapshots
|
|
259
|
+
- misclassify eventual success as timeout failure
|
|
260
|
+
- encounter lag between UI convergence and verification success
|
|
261
|
+
|
|
262
|
+
## Scope
|
|
263
|
+
- Bounded recheck before readiness failure
|
|
264
|
+
- Temporal debounce for transient state mismatches
|
|
265
|
+
- Verify-until-stable semantics for readiness checks
|
|
266
|
+
- Stability confirmation windows
|
|
267
|
+
- Snapshot freshness and convergence heuristics
|
|
268
|
+
|
|
269
|
+
## Expected Impact
|
|
270
|
+
Very high.
|
|
271
|
+
|
|
272
|
+
## Exit Criteria
|
|
273
|
+
- False-negative readiness failures materially reduced
|
|
274
|
+
- Stability confirmation logic implemented
|
|
275
|
+
- Benchmark async flows validate improved convergence detection
|
|
276
|
+
- Verification timing behavior documented in guardrails
|
|
277
|
+
|
|
278
|
+
## Success Metrics
|
|
279
|
+
- Higher first-pass verification success
|
|
280
|
+
- Lower false timeout failures
|
|
281
|
+
- Higher wait success rate
|
|
282
|
+
- Fewer retries caused by premature failure classification
|
|
283
|
+
|
|
284
|
+
## Dependencies
|
|
285
|
+
Depends on:
|
|
286
|
+
- Stronger State Verification
|
|
287
|
+
- Wait and Synchronization Reliability
|
|
288
|
+
|
|
289
|
+
Strengthens:
|
|
290
|
+
- Actionability Resolution
|
|
291
|
+
- Adjustable Control Support
|
|
292
|
+
- Recovery and replanning readiness
|
|
293
|
+
|
|
294
|
+
---
|
|
295
|
+
|
|
246
296
|
# Actionability Resolution
|
|
247
297
|
|
|
248
298
|
## Rationale
|
|
@@ -338,6 +388,48 @@ Blocks or strengthens:
|
|
|
338
388
|
|
|
339
389
|
---
|
|
340
390
|
|
|
391
|
+
# Adjustable Control Precision Hardening
|
|
392
|
+
|
|
393
|
+
## Rationale
|
|
394
|
+
Post-implementation feedback shows semantics exist, but fine-grained adjustable targeting and convergence still need hardening.
|
|
395
|
+
|
|
396
|
+
**Status:** Planned
|
|
397
|
+
|
|
398
|
+
Addresses friction around:
|
|
399
|
+
- slider thumb targeting precision
|
|
400
|
+
- tap vs drag adjustment strategy selection
|
|
401
|
+
- snapping and quantized convergence behavior
|
|
402
|
+
- repeated adjustment retries before landing on target value
|
|
403
|
+
|
|
404
|
+
## Scope
|
|
405
|
+
- Fine-grained slider targeting refinement
|
|
406
|
+
- Drag vs tap adjustment strategy heuristics
|
|
407
|
+
- Improved value snapping convergence
|
|
408
|
+
- Control-specific adjustment fallback policies
|
|
409
|
+
|
|
410
|
+
## Expected Impact
|
|
411
|
+
High.
|
|
412
|
+
|
|
413
|
+
## Exit Criteria
|
|
414
|
+
- Benchmark slider flows reach target values with fewer retries
|
|
415
|
+
- Adjustment strategy selection validated across representative controls
|
|
416
|
+
- Reduced repeated-tap convergence failures
|
|
417
|
+
|
|
418
|
+
## Success Metrics
|
|
419
|
+
- Fewer retries for adjustable controls
|
|
420
|
+
- Higher first-attempt target value success
|
|
421
|
+
- Reduced control convergence failures
|
|
422
|
+
|
|
423
|
+
## Dependencies
|
|
424
|
+
Depends on:
|
|
425
|
+
- Adjustable Control Support
|
|
426
|
+
- Better Compose / Custom Control Semantics
|
|
427
|
+
|
|
428
|
+
Strengthens:
|
|
429
|
+
- Recovery readiness
|
|
430
|
+
|
|
431
|
+
---
|
|
432
|
+
|
|
341
433
|
# Signal-Oriented Diagnostic Filtering
|
|
342
434
|
|
|
343
435
|
## Rationale
|
|
@@ -435,7 +527,7 @@ Strengthens:
|
|
|
435
527
|
## Rationale
|
|
436
528
|
Higher priority after agent feedback exposed custom control semantics as a core reliability gap, not a later optimization.
|
|
437
529
|
|
|
438
|
-
**Status:**
|
|
530
|
+
**Status:** Completed
|
|
439
531
|
|
|
440
532
|
Semantics become more useful once:
|
|
441
533
|
- identity is stronger
|
|
@@ -446,8 +538,8 @@ Semantics become more useful once:
|
|
|
446
538
|
|
|
447
539
|
## Scope
|
|
448
540
|
- Composite control traits
|
|
449
|
-
- Control role enrichment (
|
|
450
|
-
- Interaction
|
|
541
|
+
- Control role enrichment (`slider`, `stepper`, `dropdown`, `segmented_control`, `custom_adjustable`)
|
|
542
|
+
- Interaction contract metadata (`supported_actions`, `adjustable`, `state_shape`)
|
|
451
543
|
- Custom widget gesture affordance hints
|
|
452
544
|
- Semantic confidence annotations
|
|
453
545
|
- Compose-aware selectors for waits (merged semantics and element relationships)
|
|
@@ -457,7 +549,7 @@ High.
|
|
|
457
549
|
|
|
458
550
|
## Exit Criteria
|
|
459
551
|
- Semantic traits implemented for major custom control classes
|
|
460
|
-
- Interaction contracts surfaced in
|
|
552
|
+
- Interaction contracts surfaced in observation and resolution paths
|
|
461
553
|
- Confidence model defined for derived semantics
|
|
462
554
|
- Custom control manipulation success validated in benchmark flows
|
|
463
555
|
|
|
@@ -574,11 +666,13 @@ Synchronization & Actionability
|
|
|
574
666
|
|
|
575
667
|
Control Precision & Observability
|
|
576
668
|
- Adjustable Control Support
|
|
669
|
+
- Adjustable Control Precision Hardening
|
|
670
|
+
- Better Compose / Custom Control Semantics
|
|
577
671
|
- Signal-Oriented Diagnostic Filtering
|
|
672
|
+
- Verification Stabilization and Temporal Convergence
|
|
578
673
|
|
|
579
674
|
Interaction Expansion
|
|
580
675
|
- Long Press Gesture
|
|
581
|
-
- Better Compose / Custom Control Semantics
|
|
582
676
|
- Pinch to Zoom
|
|
583
677
|
|
|
584
678
|
Deep Observability
|
|
@@ -598,11 +692,13 @@ Make core loop reliable and reduce onboarding friction.
|
|
|
598
692
|
|
|
599
693
|
## Wave 2 (Control Precision + Diagnostics)
|
|
600
694
|
- Adjustable Control Support
|
|
695
|
+
- Adjustable Control Precision Hardening
|
|
601
696
|
- Better Compose / Custom Control Semantics
|
|
602
697
|
- Signal-Oriented Diagnostic Filtering
|
|
698
|
+
- Verification Stabilization and Temporal Convergence
|
|
603
699
|
|
|
604
700
|
Focus:
|
|
605
|
-
Improve control precision, custom control
|
|
701
|
+
Improve control precision, verification convergence, custom control reliability, and signal observability.
|
|
606
702
|
|
|
607
703
|
---
|
|
608
704
|
|
|
@@ -629,14 +725,16 @@ Roadmap Ordering:
|
|
|
629
725
|
1. Stronger State Verification
|
|
630
726
|
2. Richer Element Identity
|
|
631
727
|
3. Wait and Synchronization Reliability
|
|
632
|
-
4.
|
|
633
|
-
5.
|
|
634
|
-
6.
|
|
635
|
-
7.
|
|
636
|
-
8.
|
|
637
|
-
9.
|
|
638
|
-
10.
|
|
639
|
-
11.
|
|
728
|
+
4. Verification Stabilization and Temporal Convergence
|
|
729
|
+
5. Environment Auto-Configuration and Toolchain Discovery
|
|
730
|
+
6. Actionability Resolution
|
|
731
|
+
7. Adjustable Control Support
|
|
732
|
+
8. Adjustable Control Precision Hardening
|
|
733
|
+
9. Better Compose / Custom Control Semantics
|
|
734
|
+
10. Signal-Oriented Diagnostic Filtering
|
|
735
|
+
11. Long Press Gesture
|
|
736
|
+
12. Pinch to Zoom
|
|
737
|
+
13. Action Trace Correlation
|
|
640
738
|
|
|
641
739
|
Rationale:
|
|
642
740
|
- Early roadmap items harden state, targeting, synchronization, environment readiness, and action execution.
|
|
@@ -649,7 +747,7 @@ Rationale:
|
|
|
649
747
|
## Future Considerations
|
|
650
748
|
Still out of scope:
|
|
651
749
|
|
|
652
|
-
-
|
|
750
|
+
- Full autonomous recovery planning (deferred until after verification stabilization)
|
|
653
751
|
- Autonomous retry strategy
|
|
654
752
|
- MCP-level agent orchestration
|
|
655
753
|
- Autonomous recovery hinting (future consideration only)
|
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
# RFC 009 — Semantic Control Modeling for Custom and Composite Controls
|
|
2
|
+
|
|
3
|
+
## 1. Summary
|
|
4
|
+
|
|
5
|
+
This RFC defines a semantic control model for identifying, exposing, and interacting with custom and composite controls that are poorly represented through raw accessibility or platform UI trees.
|
|
6
|
+
|
|
7
|
+
It introduces semantic enrichment for controls such as:
|
|
8
|
+
|
|
9
|
+
- sliders
|
|
10
|
+
- steppers
|
|
11
|
+
- segmented controls
|
|
12
|
+
- dropdowns
|
|
13
|
+
- Compose/SwiftUI custom widgets
|
|
14
|
+
- composite gesture-driven controls
|
|
15
|
+
|
|
16
|
+
The goal is to improve target resolution, control interaction, and verification reliability for controls whose actionable semantics are not fully captured by raw snapshots.
|
|
17
|
+
|
|
18
|
+
---
|
|
19
|
+
|
|
20
|
+
## 2. Problem Statement
|
|
21
|
+
|
|
22
|
+
Current interaction logic works well when platform semantics are explicit.
|
|
23
|
+
|
|
24
|
+
It is weaker when controls appear as:
|
|
25
|
+
|
|
26
|
+
- generic container views
|
|
27
|
+
- unlabeled clickable wrappers
|
|
28
|
+
- nested composite controls
|
|
29
|
+
- custom Compose/SwiftUI components with weak accessibility exposure
|
|
30
|
+
|
|
31
|
+
Observed problems include:
|
|
32
|
+
|
|
33
|
+
- controls resolving as parent containers rather than actionable targets
|
|
34
|
+
- missing slider-like controls in snapshots
|
|
35
|
+
- weak distinction between discrete vs continuous controls
|
|
36
|
+
- inability to infer supported interactions from control structure
|
|
37
|
+
- unreliable verification of control state
|
|
38
|
+
|
|
39
|
+
This causes brittle automation and coordinate fallback behavior.
|
|
40
|
+
|
|
41
|
+
---
|
|
42
|
+
|
|
43
|
+
## 3. Goals
|
|
44
|
+
|
|
45
|
+
This RFC introduces a semantic layer that MUST:
|
|
46
|
+
|
|
47
|
+
- infer higher-level control semantics from raw UI structures
|
|
48
|
+
- enrich snapshots with semantic control metadata
|
|
49
|
+
- improve actionable target selection (RFC 007)
|
|
50
|
+
- improve adjustable control handling (RFC 008)
|
|
51
|
+
- improve verification for semantic control state
|
|
52
|
+
- reduce coordinate fallback usage
|
|
53
|
+
|
|
54
|
+
---
|
|
55
|
+
|
|
56
|
+
## 4. Non-Goals
|
|
57
|
+
|
|
58
|
+
This RFC does NOT define:
|
|
59
|
+
|
|
60
|
+
- replacement of raw accessibility trees
|
|
61
|
+
- ML-based semantic inference
|
|
62
|
+
- probabilistic control classification
|
|
63
|
+
- new gesture primitives
|
|
64
|
+
- autonomous planning behavior
|
|
65
|
+
|
|
66
|
+
Semantic modeling is deterministic enrichment layered over raw signals.
|
|
67
|
+
|
|
68
|
+
---
|
|
69
|
+
|
|
70
|
+
## 5. Runtime Surfaces
|
|
71
|
+
|
|
72
|
+
This RFC applies to existing runtime surfaces:
|
|
73
|
+
|
|
74
|
+
- findElementHandler
|
|
75
|
+
- _resolveActionableAncestor
|
|
76
|
+
- _buildResolvedElement
|
|
77
|
+
- tapElementHandler
|
|
78
|
+
- scrollToElementHandler
|
|
79
|
+
|
|
80
|
+
Semantic modeling augments these surfaces; it does not replace them.
|
|
81
|
+
|
|
82
|
+
---
|
|
83
|
+
|
|
84
|
+
## 6. Semantic Control Model
|
|
85
|
+
|
|
86
|
+
Controls MAY progressively expose semantic metadata such as:
|
|
87
|
+
|
|
88
|
+
```ts
|
|
89
|
+
interface SemanticControl {
|
|
90
|
+
semantic_role:
|
|
91
|
+
| "slider"
|
|
92
|
+
| "stepper"
|
|
93
|
+
| "dropdown"
|
|
94
|
+
| "segmented_control"
|
|
95
|
+
| "custom_adjustable"
|
|
96
|
+
| "composite_control";
|
|
97
|
+
|
|
98
|
+
supported_actions: string[];
|
|
99
|
+
|
|
100
|
+
adjustable: boolean;
|
|
101
|
+
|
|
102
|
+
state_shape:
|
|
103
|
+
| "continuous"
|
|
104
|
+
| "discrete"
|
|
105
|
+
| "semantic";
|
|
106
|
+
}
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
The control roles above represent an expected semantic model, not a claim that all such control classes are equally surfaced in the current runtime.
|
|
110
|
+
|
|
111
|
+
Current runtime support may initially expose simpler semantic signals such as:
|
|
112
|
+
- role hints
|
|
113
|
+
- semantic labels
|
|
114
|
+
- value_range metadata
|
|
115
|
+
- selector confidence or related resolution signals
|
|
116
|
+
|
|
117
|
+
Richer control roles are progressive extensions over time.
|
|
118
|
+
|
|
119
|
+
---
|
|
120
|
+
|
|
121
|
+
## 7. Semantic Inference Rules
|
|
122
|
+
|
|
123
|
+
Inference MAY use signals such as:
|
|
124
|
+
|
|
125
|
+
- accessibility role hints
|
|
126
|
+
- value_range metadata
|
|
127
|
+
- child composition patterns
|
|
128
|
+
- repeated selectable child structures
|
|
129
|
+
- platform traits (adjustable, selected, expanded)
|
|
130
|
+
- known control heuristics
|
|
131
|
+
|
|
132
|
+
Inference MUST be deterministic and explainable.
|
|
133
|
+
|
|
134
|
+
Raw signals always win on conflict.
|
|
135
|
+
|
|
136
|
+
Semantic inference confidence, where present, is advisory only and MUST NOT be treated as executable truth.
|
|
137
|
+
|
|
138
|
+
---
|
|
139
|
+
|
|
140
|
+
## 8. Resolution Integration (RFC 007)
|
|
141
|
+
|
|
142
|
+
Semantic metadata SHOULD improve target resolution by:
|
|
143
|
+
|
|
144
|
+
- preferring actionable child controls over generic containers
|
|
145
|
+
- promoting semantically actionable descendants
|
|
146
|
+
- disambiguating among multiple candidate matches
|
|
147
|
+
|
|
148
|
+
Semantic signals are advisory enrichment, not executable truth.
|
|
149
|
+
|
|
150
|
+
---
|
|
151
|
+
|
|
152
|
+
## 9. Adjustable Control Integration (RFC 008)
|
|
153
|
+
|
|
154
|
+
Where adjustable=true:
|
|
155
|
+
|
|
156
|
+
Semantic metadata MAY expose:
|
|
157
|
+
|
|
158
|
+
- supported adjustment mode
|
|
159
|
+
- discrete vs continuous state model
|
|
160
|
+
- expected verification strategy
|
|
161
|
+
|
|
162
|
+
This improves convergence for value-setting workflows.
|
|
163
|
+
|
|
164
|
+
---
|
|
165
|
+
|
|
166
|
+
## 10. Verification Integration
|
|
167
|
+
|
|
168
|
+
Verification MAY use semantic control metadata to improve:
|
|
169
|
+
|
|
170
|
+
- value-state verification
|
|
171
|
+
- discrete selection verification
|
|
172
|
+
- semantic-state checks
|
|
173
|
+
|
|
174
|
+
Formal verification still remains governed by RFC 005.
|
|
175
|
+
|
|
176
|
+
---
|
|
177
|
+
|
|
178
|
+
## 11. Output Contract (Progressive Extension)
|
|
179
|
+
|
|
180
|
+
Current runtime may expose partial semantic outputs.
|
|
181
|
+
|
|
182
|
+
Expected progressive shape (future extension model):
|
|
183
|
+
|
|
184
|
+
```ts
|
|
185
|
+
interface SemanticResolutionMetadata {
|
|
186
|
+
semantic_role?: string;
|
|
187
|
+
supported_actions?: string[];
|
|
188
|
+
adjustable?: boolean;
|
|
189
|
+
state_shape?: string;
|
|
190
|
+
confidence?: "low" | "medium" | "high";
|
|
191
|
+
}
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
These fields are progressive enrichment and MUST NOT be assumed universally present.
|
|
195
|
+
|
|
196
|
+
Implementations MAY expose only a subset of this model initially. Presence of a richer semantic role does not imply universal runtime support for all control classes.
|
|
197
|
+
|
|
198
|
+
---
|
|
199
|
+
|
|
200
|
+
## 12. Failure Modes
|
|
201
|
+
|
|
202
|
+
Semantic modeling MAY fail due to:
|
|
203
|
+
|
|
204
|
+
- insufficient raw signals
|
|
205
|
+
- ambiguous composite structures
|
|
206
|
+
- conflicting heuristics
|
|
207
|
+
|
|
208
|
+
When semantic inference confidence is insufficient:
|
|
209
|
+
|
|
210
|
+
- raw resolution flow MUST continue
|
|
211
|
+
- semantic fields MAY be omitted
|
|
212
|
+
- no semantic guessing should be forced
|
|
213
|
+
|
|
214
|
+
---
|
|
215
|
+
|
|
216
|
+
## 13. Success Metrics
|
|
217
|
+
|
|
218
|
+
- fewer coordinate fallbacks
|
|
219
|
+
- improved control discovery
|
|
220
|
+
- improved actionable-target precision
|
|
221
|
+
- improved slider/custom-control automation success
|
|
222
|
+
- reduced semantic mismatch failures (RFC 010)
|
|
223
|
+
|
|
224
|
+
---
|
|
225
|
+
|
|
226
|
+
## 14. Relationship to Other RFCs
|
|
227
|
+
|
|
228
|
+
RFC 005 — verification correctness model
|
|
229
|
+
RFC 006 — runtime action execution
|
|
230
|
+
RFC 007 — target resolution
|
|
231
|
+
RFC 008 — adjustable control support
|
|
232
|
+
RFC 010 — recovery uses semantic mismatch failures defined here
|
|
233
|
+
|
|
234
|
+
---
|
|
235
|
+
|
|
236
|
+
## 15. Summary
|
|
237
|
+
|
|
238
|
+
This RFC adds deterministic semantic control enrichment for custom and composite controls, improving resolution, interaction reliability, and verification while remaining layered over existing runtime signals.
|