mobile-debug-mcp 0.26.5 → 0.27.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/interact/index.js +352 -185
- package/dist/server/common.js +39 -0
- package/dist/server-core.js +1 -1
- package/docs/CHANGELOG.md +3 -0
- package/docs/ROADMAP.md +109 -11
- package/docs/rfcs/010-verification-stabilization-and-temporal-convergence.md +265 -0
- package/docs/rfcs/011-recovery-and-replanning-for-failed-or-ambiguous-interaction-flows.md +321 -0
- package/docs/rfcs/011.1-recovery-contract-types-and-runtime-wiring-spec.md +253 -0
- package/docs/rfcs/012.md +203 -0
- package/docs/specs/mcp-tooling-spec-v1.md +12 -0
- package/docs/tools/interact.md +10 -0
- package/package.json +1 -1
- package/src/interact/index.ts +393 -186
- package/src/server/common.ts +44 -1
- package/src/server-core.ts +1 -1
- package/src/types.ts +36 -0
- package/test/unit/interact/adjust_control.test.ts +77 -1
- package/test/unit/interact/verification_stabilization.test.ts +94 -0
- package/test/unit/server/common.test.ts +36 -1
package/dist/server/common.js
CHANGED
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
import { ToolsObserve } from '../observe/index.js';
|
|
2
|
+
export const DEFAULT_MAX_RECOVERY_ATTEMPTS = 3;
|
|
3
|
+
export const DEFAULT_MAX_RETRY_DEPTH = 3;
|
|
2
4
|
export function wrapResponse(data) {
|
|
3
5
|
return {
|
|
4
6
|
content: [{
|
|
@@ -86,6 +88,8 @@ export function normalizeResolvedTarget(value = null) {
|
|
|
86
88
|
export function inferGenericFailure(message) {
|
|
87
89
|
if (message && /timeout/i.test(message))
|
|
88
90
|
return { failureCode: 'TIMEOUT', retryable: true };
|
|
91
|
+
if (message && /semantic mismatch/i.test(message))
|
|
92
|
+
return { failureCode: 'SEMANTIC_MISMATCH', retryable: false };
|
|
89
93
|
return { failureCode: 'UNKNOWN', retryable: false };
|
|
90
94
|
}
|
|
91
95
|
export function inferScrollFailure(message) {
|
|
@@ -106,6 +110,40 @@ export function determineActionLifecycleState({ success, failure }) {
|
|
|
106
110
|
return ACTION_LIFECYCLE_STATE_BY_OUTCOME.success;
|
|
107
111
|
return ACTION_LIFECYCLE_STATE_BY_OUTCOME.success;
|
|
108
112
|
}
|
|
113
|
+
function mapFailureCodeToFailureClass(code) {
|
|
114
|
+
switch (code) {
|
|
115
|
+
case 'ELEMENT_NOT_FOUND':
|
|
116
|
+
case 'AMBIGUOUS_TARGET':
|
|
117
|
+
case 'STALE_REFERENCE':
|
|
118
|
+
return 'TargetResolutionFailure';
|
|
119
|
+
case 'ELEMENT_NOT_INTERACTABLE':
|
|
120
|
+
return 'ExecutionFailure';
|
|
121
|
+
case 'TIMEOUT':
|
|
122
|
+
case 'ACTION_REJECTED':
|
|
123
|
+
case 'NAVIGATION_NO_CHANGE':
|
|
124
|
+
case 'UNKNOWN':
|
|
125
|
+
return 'ExecutionFailure';
|
|
126
|
+
case 'VERIFICATION_FAILED':
|
|
127
|
+
case 'EXPECT_STATE_MISMATCH':
|
|
128
|
+
return 'VerificationFailure';
|
|
129
|
+
case 'CONTROL_CONVERGENCE_FAILED':
|
|
130
|
+
return 'ControlConvergenceFailure';
|
|
131
|
+
case 'SEMANTIC_MISMATCH':
|
|
132
|
+
return 'SemanticMismatchFailure';
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
function buildRecoveryState(failureCode, retryable) {
|
|
136
|
+
return {
|
|
137
|
+
failure_class: mapFailureCodeToFailureClass(failureCode),
|
|
138
|
+
runtime_code: failureCode,
|
|
139
|
+
recovery_attempts: 0,
|
|
140
|
+
max_recovery_attempts: DEFAULT_MAX_RECOVERY_ATTEMPTS,
|
|
141
|
+
retry_depth: 0,
|
|
142
|
+
max_retry_depth: DEFAULT_MAX_RETRY_DEPTH,
|
|
143
|
+
is_terminal: false,
|
|
144
|
+
retry_allowed: retryable
|
|
145
|
+
};
|
|
146
|
+
}
|
|
109
147
|
export function buildActionExecutionResult({ actionType, device, selector, resolved, success, uiFingerprintBefore, uiFingerprintAfter, failure, details, sourceModule }) {
|
|
110
148
|
const timestampMs = Date.now();
|
|
111
149
|
const timestamp = new Date(timestampMs).toISOString();
|
|
@@ -122,6 +160,7 @@ export function buildActionExecutionResult({ actionType, device, selector, resol
|
|
|
122
160
|
},
|
|
123
161
|
success,
|
|
124
162
|
...(failure ? { failure_code: failure.failureCode, retryable: failure.retryable } : {}),
|
|
163
|
+
...(failure ? { recovery: buildRecoveryState(failure.failureCode, failure.retryable) } : {}),
|
|
125
164
|
ui_fingerprint_before: uiFingerprintBefore,
|
|
126
165
|
ui_fingerprint_after: uiFingerprintAfter,
|
|
127
166
|
...(details ? { details } : {})
|
package/dist/server-core.js
CHANGED
|
@@ -6,7 +6,7 @@ import { handleToolCall } from './server/tool-handlers.js';
|
|
|
6
6
|
export { wrapResponse, toolDefinitions, handleToolCall };
|
|
7
7
|
export const serverInfo = {
|
|
8
8
|
name: 'mobile-debug-mcp',
|
|
9
|
-
version: '0.
|
|
9
|
+
version: '0.27.0'
|
|
10
10
|
};
|
|
11
11
|
export function createServer() {
|
|
12
12
|
const server = new Server(serverInfo, {
|
package/docs/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,9 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to the **Mobile Debug MCP** project will be documented in this file.
|
|
4
4
|
|
|
5
|
+
## [0.27.0]
|
|
6
|
+
- defines a structured recovery and replanning model for UI interaction failures, enabling the system to respond to execution uncertainty with bounded, deterministic recovery strategies.
|
|
7
|
+
|
|
5
8
|
## [0.26.5]
|
|
6
9
|
- Introduces a semantic control model to improve the identification and interaction with custom and composite UI controls.
|
|
7
10
|
|
package/docs/ROADMAP.md
CHANGED
|
@@ -50,11 +50,14 @@ Higher task success with fewer retries.
|
|
|
50
50
|
|
|
51
51
|
- Wait and Synchronization Reliability
|
|
52
52
|
- Actionability Resolution
|
|
53
|
+
- Verification Stabilization and Temporal Convergence
|
|
53
54
|
|
|
54
55
|
## Upcoming Work
|
|
55
56
|
|
|
57
|
+
- Adjustable Control Precision Hardening
|
|
56
58
|
- Environment Auto-Configuration and Toolchain Discovery
|
|
57
59
|
- Adjustable Control Support
|
|
60
|
+
- Verification Stabilization and Temporal Convergence
|
|
58
61
|
- Signal-Oriented Diagnostic Filtering
|
|
59
62
|
- Long Press Gesture
|
|
60
63
|
# Stronger State Verification
|
|
@@ -243,6 +246,53 @@ Blocks or strengthens:
|
|
|
243
246
|
|
|
244
247
|
---
|
|
245
248
|
|
|
249
|
+
# Verification Stabilization and Temporal Convergence
|
|
250
|
+
|
|
251
|
+
## Rationale
|
|
252
|
+
Real-world feedback exposed false-negative readiness failures caused by transient UI timing, even when target state had actually converged.
|
|
253
|
+
|
|
254
|
+
**Status:** Planned
|
|
255
|
+
|
|
256
|
+
Addresses friction where agents:
|
|
257
|
+
- fail readiness checks on transient timing races
|
|
258
|
+
- act on stale snapshots
|
|
259
|
+
- misclassify eventual success as timeout failure
|
|
260
|
+
- encounter lag between UI convergence and verification success
|
|
261
|
+
|
|
262
|
+
## Scope
|
|
263
|
+
- Bounded recheck before readiness failure
|
|
264
|
+
- Temporal debounce for transient state mismatches
|
|
265
|
+
- Verify-until-stable semantics for readiness checks
|
|
266
|
+
- Stability confirmation windows
|
|
267
|
+
- Snapshot freshness and convergence heuristics
|
|
268
|
+
|
|
269
|
+
## Expected Impact
|
|
270
|
+
Very high.
|
|
271
|
+
|
|
272
|
+
## Exit Criteria
|
|
273
|
+
- False-negative readiness failures materially reduced
|
|
274
|
+
- Stability confirmation logic implemented
|
|
275
|
+
- Benchmark async flows validate improved convergence detection
|
|
276
|
+
- Verification timing behavior documented in guardrails
|
|
277
|
+
|
|
278
|
+
## Success Metrics
|
|
279
|
+
- Higher first-pass verification success
|
|
280
|
+
- Lower false timeout failures
|
|
281
|
+
- Higher wait success rate
|
|
282
|
+
- Fewer retries caused by premature failure classification
|
|
283
|
+
|
|
284
|
+
## Dependencies
|
|
285
|
+
Depends on:
|
|
286
|
+
- Stronger State Verification
|
|
287
|
+
- Wait and Synchronization Reliability
|
|
288
|
+
|
|
289
|
+
Strengthens:
|
|
290
|
+
- Actionability Resolution
|
|
291
|
+
- Adjustable Control Support
|
|
292
|
+
- Recovery and replanning readiness
|
|
293
|
+
|
|
294
|
+
---
|
|
295
|
+
|
|
246
296
|
# Actionability Resolution
|
|
247
297
|
|
|
248
298
|
## Rationale
|
|
@@ -338,6 +388,48 @@ Blocks or strengthens:
|
|
|
338
388
|
|
|
339
389
|
---
|
|
340
390
|
|
|
391
|
+
# Adjustable Control Precision Hardening
|
|
392
|
+
|
|
393
|
+
## Rationale
|
|
394
|
+
Post-implementation feedback shows semantics exist, but fine-grained adjustable targeting and convergence still need hardening.
|
|
395
|
+
|
|
396
|
+
**Status:** Planned
|
|
397
|
+
|
|
398
|
+
Addresses friction around:
|
|
399
|
+
- slider thumb targeting precision
|
|
400
|
+
- tap vs drag adjustment strategy selection
|
|
401
|
+
- snapping and quantized convergence behavior
|
|
402
|
+
- repeated adjustment retries before landing on target value
|
|
403
|
+
|
|
404
|
+
## Scope
|
|
405
|
+
- Fine-grained slider targeting refinement
|
|
406
|
+
- Drag vs tap adjustment strategy heuristics
|
|
407
|
+
- Improved value snapping convergence
|
|
408
|
+
- Control-specific adjustment fallback policies
|
|
409
|
+
|
|
410
|
+
## Expected Impact
|
|
411
|
+
High.
|
|
412
|
+
|
|
413
|
+
## Exit Criteria
|
|
414
|
+
- Benchmark slider flows reach target values with fewer retries
|
|
415
|
+
- Adjustment strategy selection validated across representative controls
|
|
416
|
+
- Reduced repeated-tap convergence failures
|
|
417
|
+
|
|
418
|
+
## Success Metrics
|
|
419
|
+
- Fewer retries for adjustable controls
|
|
420
|
+
- Higher first-attempt target value success
|
|
421
|
+
- Reduced control convergence failures
|
|
422
|
+
|
|
423
|
+
## Dependencies
|
|
424
|
+
Depends on:
|
|
425
|
+
- Adjustable Control Support
|
|
426
|
+
- Better Compose / Custom Control Semantics
|
|
427
|
+
|
|
428
|
+
Strengthens:
|
|
429
|
+
- Recovery readiness
|
|
430
|
+
|
|
431
|
+
---
|
|
432
|
+
|
|
341
433
|
# Signal-Oriented Diagnostic Filtering
|
|
342
434
|
|
|
343
435
|
## Rationale
|
|
@@ -574,11 +666,13 @@ Synchronization & Actionability
|
|
|
574
666
|
|
|
575
667
|
Control Precision & Observability
|
|
576
668
|
- Adjustable Control Support
|
|
669
|
+
- Adjustable Control Precision Hardening
|
|
670
|
+
- Better Compose / Custom Control Semantics
|
|
577
671
|
- Signal-Oriented Diagnostic Filtering
|
|
672
|
+
- Verification Stabilization and Temporal Convergence
|
|
578
673
|
|
|
579
674
|
Interaction Expansion
|
|
580
675
|
- Long Press Gesture
|
|
581
|
-
- Better Compose / Custom Control Semantics
|
|
582
676
|
- Pinch to Zoom
|
|
583
677
|
|
|
584
678
|
Deep Observability
|
|
@@ -598,11 +692,13 @@ Make core loop reliable and reduce onboarding friction.
|
|
|
598
692
|
|
|
599
693
|
## Wave 2 (Control Precision + Diagnostics)
|
|
600
694
|
- Adjustable Control Support
|
|
695
|
+
- Adjustable Control Precision Hardening
|
|
601
696
|
- Better Compose / Custom Control Semantics
|
|
602
697
|
- Signal-Oriented Diagnostic Filtering
|
|
698
|
+
- Verification Stabilization and Temporal Convergence
|
|
603
699
|
|
|
604
700
|
Focus:
|
|
605
|
-
Improve control precision, custom control
|
|
701
|
+
Improve control precision, verification convergence, custom control reliability, and signal observability.
|
|
606
702
|
|
|
607
703
|
---
|
|
608
704
|
|
|
@@ -629,14 +725,16 @@ Roadmap Ordering:
|
|
|
629
725
|
1. Stronger State Verification
|
|
630
726
|
2. Richer Element Identity
|
|
631
727
|
3. Wait and Synchronization Reliability
|
|
632
|
-
4.
|
|
633
|
-
5.
|
|
634
|
-
6.
|
|
635
|
-
7.
|
|
636
|
-
8.
|
|
637
|
-
9.
|
|
638
|
-
10.
|
|
639
|
-
11.
|
|
728
|
+
4. Verification Stabilization and Temporal Convergence
|
|
729
|
+
5. Environment Auto-Configuration and Toolchain Discovery
|
|
730
|
+
6. Actionability Resolution
|
|
731
|
+
7. Adjustable Control Support
|
|
732
|
+
8. Adjustable Control Precision Hardening
|
|
733
|
+
9. Better Compose / Custom Control Semantics
|
|
734
|
+
10. Signal-Oriented Diagnostic Filtering
|
|
735
|
+
11. Long Press Gesture
|
|
736
|
+
12. Pinch to Zoom
|
|
737
|
+
13. Action Trace Correlation
|
|
640
738
|
|
|
641
739
|
Rationale:
|
|
642
740
|
- Early roadmap items harden state, targeting, synchronization, environment readiness, and action execution.
|
|
@@ -649,7 +747,7 @@ Rationale:
|
|
|
649
747
|
## Future Considerations
|
|
650
748
|
Still out of scope:
|
|
651
749
|
|
|
652
|
-
-
|
|
750
|
+
- Full autonomous recovery planning (deferred until after verification stabilization)
|
|
653
751
|
- Autonomous retry strategy
|
|
654
752
|
- MCP-level agent orchestration
|
|
655
753
|
- Autonomous recovery hinting (future consideration only)
|
|
@@ -0,0 +1,265 @@
|
|
|
1
|
+
|
|
2
|
+
|
|
3
|
+
# RFC 010 — Verification Stabilization and Temporal Convergence
|
|
4
|
+
|
|
5
|
+
## 1. Summary
|
|
6
|
+
|
|
7
|
+
This RFC defines a verification stabilization layer that ensures UI state transitions are not misclassified due to timing instability, transient UI states, or stale snapshots.
|
|
8
|
+
|
|
9
|
+
It introduces temporal semantics into verification so that readiness and state checks are based on convergence over time, not a single snapshot.
|
|
10
|
+
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
## 2. Problem Statement
|
|
14
|
+
|
|
15
|
+
Current verification behavior is snapshot-based and may produce false-negative failures when UI state is in transition.
|
|
16
|
+
|
|
17
|
+
Observed issues include:
|
|
18
|
+
|
|
19
|
+
- readiness checks timing out even though UI converges shortly after
|
|
20
|
+
- stale snapshots being treated as authoritative state
|
|
21
|
+
- transient UI states causing premature failure classification
|
|
22
|
+
- mismatch between UI convergence and verification success
|
|
23
|
+
|
|
24
|
+
These issues lead to unnecessary retries, incorrect failure classification, and degraded automation reliability.
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## 3. Goals
|
|
29
|
+
|
|
30
|
+
This RFC introduces a temporal verification model that MUST:
|
|
31
|
+
|
|
32
|
+
- reduce false-negative readiness failures
|
|
33
|
+
- ensure verification reflects stable UI convergence
|
|
34
|
+
- introduce bounded recheck before failure
|
|
35
|
+
- debounce transient mismatches
|
|
36
|
+
- maintain deterministic verification behavior
|
|
37
|
+
|
|
38
|
+
---
|
|
39
|
+
|
|
40
|
+
## 4. Non-Goals
|
|
41
|
+
|
|
42
|
+
This RFC does NOT define:
|
|
43
|
+
|
|
44
|
+
- recovery or replanning strategies (covered by a later RFC)
|
|
45
|
+
- probabilistic verification
|
|
46
|
+
- ML-based state inference
|
|
47
|
+
- changes to action execution semantics
|
|
48
|
+
|
|
49
|
+
Verification remains deterministic and grounded in observable UI state.
|
|
50
|
+
|
|
51
|
+
---
|
|
52
|
+
|
|
53
|
+
## 5. Runtime Ownership and Integration
|
|
54
|
+
|
|
55
|
+
This RFC applies to existing verification surfaces:
|
|
56
|
+
|
|
57
|
+
- expect_* handlers (e.g. expect_state)
|
|
58
|
+
- readiness checks in wait_for_ui_element
|
|
59
|
+
- post-action verification in src/interact
|
|
60
|
+
|
|
61
|
+
It augments these surfaces with temporal semantics; it does not replace them.
|
|
62
|
+
|
|
63
|
+
### 5.1 Ownership and Composition with Existing Logic
|
|
64
|
+
|
|
65
|
+
This RFC refines existing behavior rather than introducing a parallel mechanism.
|
|
66
|
+
|
|
67
|
+
- `wait_for_ui_element` (and underlying `waitForUICore`) owns **readiness stabilization**.
|
|
68
|
+
- `expect_*` handlers (e.g. `expect_state`) own **state verification stabilization**.
|
|
69
|
+
- `src/interact` owns **post-action verification application** of these rules.
|
|
70
|
+
|
|
71
|
+
Composition rules:
|
|
72
|
+
- `wait_for_ui_element` MUST apply stabilization for presence/readiness before returning success or failure.
|
|
73
|
+
- `expect_*` MUST apply stabilization for state/value assertions.
|
|
74
|
+
- If both are used in sequence, `wait_for_ui_element` completes first, then `expect_*` applies its own stabilization.
|
|
75
|
+
- Stabilization MUST NOT be duplicated across layers for the same check.
|
|
76
|
+
|
|
77
|
+
---
|
|
78
|
+
|
|
79
|
+
## 6. Temporal Verification Model
|
|
80
|
+
|
|
81
|
+
Verification MUST consider state over time, not a single observation.
|
|
82
|
+
|
|
83
|
+
### 6.1 Stabilization Window
|
|
84
|
+
|
|
85
|
+
Verification SHOULD use a bounded observation window before declaring failure.
|
|
86
|
+
|
|
87
|
+
Within this window:
|
|
88
|
+
- multiple UI reads MAY be performed
|
|
89
|
+
- transient mismatches MUST NOT immediately trigger failure
|
|
90
|
+
|
|
91
|
+
### 6.2 Verify-Until-Stable
|
|
92
|
+
|
|
93
|
+
Verification SHOULD require state to be stable across consecutive observations before success is confirmed.
|
|
94
|
+
|
|
95
|
+
Example:
|
|
96
|
+
- state must match expected condition for N consecutive reads
|
|
97
|
+
|
|
98
|
+
### 6.3 Debounce Semantics
|
|
99
|
+
|
|
100
|
+
Transient mismatches SHOULD be debounced.
|
|
101
|
+
|
|
102
|
+
Short-lived mismatches within the stabilization window MUST NOT be treated as terminal failure.
|
|
103
|
+
|
|
104
|
+
### 6.4 Deterministic Defaults (Required)
|
|
105
|
+
|
|
106
|
+
Implementations MUST use bounded defaults unless explicitly overridden:
|
|
107
|
+
|
|
108
|
+
- `stabilization_window_ms`: 1000ms (range: 500–1500ms)
|
|
109
|
+
- `stable_observation_count`: 2 consecutive matching reads
|
|
110
|
+
- `max_recheck_attempts`: 3
|
|
111
|
+
- `min_read_interval_ms`: 100–200ms between reads
|
|
112
|
+
|
|
113
|
+
These values MUST be configurable but bounded to prevent unbounded waits.
|
|
114
|
+
|
|
115
|
+
---
|
|
116
|
+
|
|
117
|
+
## 6.1 Reference Stabilization Algorithm
|
|
118
|
+
|
|
119
|
+
For a given verification predicate `P(snapshot)`:
|
|
120
|
+
|
|
121
|
+
1. Start timer `t0`.
|
|
122
|
+
2. Initialize `stable_count = 0`, `attempts = 0`.
|
|
123
|
+
3. Loop until `now - t0 > stabilization_window_ms` OR `stable_count >= stable_observation_count`:
|
|
124
|
+
- Read fresh snapshot `S`.
|
|
125
|
+
- If `P(S)` is true:
|
|
126
|
+
- `stable_count += 1`
|
|
127
|
+
Else:
|
|
128
|
+
- `stable_count = 0`
|
|
129
|
+
- `attempts += 1`
|
|
130
|
+
- Sleep `min_read_interval_ms`.
|
|
131
|
+
4. If `stable_count >= stable_observation_count`: SUCCESS
|
|
132
|
+
5. Else if `attempts < max_recheck_attempts`:
|
|
133
|
+
- Perform one additional fresh read and re-evaluate once.
|
|
134
|
+
6. Else: FAILURE
|
|
135
|
+
|
|
136
|
+
Notes:
|
|
137
|
+
- Implementations MUST ensure at least one fresh read occurs before failure.
|
|
138
|
+
- Debounce is achieved via resetting `stable_count` on mismatch.
|
|
139
|
+
|
|
140
|
+
---
|
|
141
|
+
|
|
142
|
+
## 7. Snapshot Freshness
|
|
143
|
+
|
|
144
|
+
Verification MUST account for snapshot freshness.
|
|
145
|
+
|
|
146
|
+
### 7.1 Freshness Constraints
|
|
147
|
+
|
|
148
|
+
- snapshots older than `snapshot_stale_threshold_ms` MUST be considered stale (default: 500ms)
|
|
149
|
+
- stale snapshots MUST NOT be used as final verification evidence and MUST trigger a fresh read
|
|
150
|
+
|
|
151
|
+
### 7.2 Re-read Requirement
|
|
152
|
+
|
|
153
|
+
Before declaring failure, the system MUST attempt at least one fresh UI read within the stabilization window.
|
|
154
|
+
|
|
155
|
+
### 7.3 Freshness Defaults
|
|
156
|
+
|
|
157
|
+
- `snapshot_stale_threshold_ms`: 500ms (range: 300–800ms)
|
|
158
|
+
|
|
159
|
+
---
|
|
160
|
+
|
|
161
|
+
## 8. Runtime Failure Code Mapping
|
|
162
|
+
|
|
163
|
+
Existing runtime failure signals MUST map into RFC 010 failure categories.
|
|
164
|
+
|
|
165
|
+
| Runtime Code | RFC 010 Category |
|
|
166
|
+
|--------------|------------------|
|
|
167
|
+
| ELEMENT_NOT_FOUND | Target Resolution Failure |
|
|
168
|
+
| STALE_REFERENCE | Target Resolution Failure |
|
|
169
|
+
| AMBIGUOUS_TARGET | Target Resolution Failure |
|
|
170
|
+
| TIMEOUT | Execution Failure |
|
|
171
|
+
| ACTION_REJECTED | Execution Failure |
|
|
172
|
+
| VERIFICATION_FAILED | Verification Failure |
|
|
173
|
+
| EXPECT_STATE_MISMATCH | Verification Failure |
|
|
174
|
+
| CONTROL_CONVERGENCE_FAILED | Control Convergence Failure |
|
|
175
|
+
| SEMANTIC_MISMATCH | Semantic Mismatch Failure |
|
|
176
|
+
| UNKNOWN | Execution Failure (default fallback) |
|
|
177
|
+
|
|
178
|
+
This mapping MUST be deterministic, exhaustive, and versioned with the runtime.
|
|
179
|
+
|
|
180
|
+
### 8.1 Failure Gating Rules
|
|
181
|
+
|
|
182
|
+
Failure MUST only be emitted when:
|
|
183
|
+
|
|
184
|
+
- stabilization window is exhausted
|
|
185
|
+
- fresh snapshot verification still fails
|
|
186
|
+
|
|
187
|
+
Transient mismatches SHOULD NOT be classified as:
|
|
188
|
+
- TIMEOUT
|
|
189
|
+
- VERIFICATION_FAILED
|
|
190
|
+
|
|
191
|
+
until stabilization logic has completed.
|
|
192
|
+
|
|
193
|
+
- FAILURE MUST NOT be emitted if `stable_observation_count` has not been attempted within the stabilization window.
|
|
194
|
+
- FAILURE MUST NOT be emitted without at least one fresh read within `snapshot_stale_threshold_ms`.
|
|
195
|
+
- TIMEOUT MUST correspond to exhaustion of `stabilization_window_ms`, not a single read failure.
|
|
196
|
+
|
|
197
|
+
---
|
|
198
|
+
|
|
199
|
+
## 9. Integration with RFC 005 (Verification Correctness)
|
|
200
|
+
|
|
201
|
+
RFC 005 defines what correctness means.
|
|
202
|
+
|
|
203
|
+
RFC 010 defines when correctness can be confidently evaluated.
|
|
204
|
+
|
|
205
|
+
RFC 010 augments RFC 005 by introducing temporal convergence requirements before asserting success or failure.
|
|
206
|
+
|
|
207
|
+
---
|
|
208
|
+
|
|
209
|
+
## 10. Integration with RFC 006 (Execution Layer)
|
|
210
|
+
|
|
211
|
+
Post-action verification in src/interact MUST apply stabilization logic before returning failure.
|
|
212
|
+
|
|
213
|
+
Execution MUST NOT prematurely surface verification failure without applying temporal checks defined in this RFC.
|
|
214
|
+
|
|
215
|
+
`src/interact` MUST wrap post-action verification with the reference stabilization algorithm. It MUST pass through configuration (window, counts) and MUST NOT short-circuit on first mismatch.
|
|
216
|
+
|
|
217
|
+
---
|
|
218
|
+
|
|
219
|
+
## 11. Integration with RFC 011.1 (Recovery Contract)
|
|
220
|
+
|
|
221
|
+
Verification stabilization reduces false-positive failure signals that would otherwise trigger downstream recovery mechanisms (defined in a companion RFC).
|
|
222
|
+
|
|
223
|
+
---
|
|
224
|
+
|
|
225
|
+
## 13. Output Behavior (Progressive Extension)
|
|
226
|
+
|
|
227
|
+
Future implementations MAY expose additional metadata such as:
|
|
228
|
+
|
|
229
|
+
```ts
|
|
230
|
+
interface VerificationMetadata {
|
|
231
|
+
stabilization_attempts?: number;
|
|
232
|
+
stabilization_window_ms?: number;
|
|
233
|
+
stable_observation_count?: number;
|
|
234
|
+
snapshot_freshness_ms?: number;
|
|
235
|
+
}
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
These fields are optional and for observability only.
|
|
239
|
+
|
|
240
|
+
---
|
|
241
|
+
|
|
242
|
+
## 14. Failure Modes
|
|
243
|
+
|
|
244
|
+
Verification stabilization MAY fail due to:
|
|
245
|
+
|
|
246
|
+
- UI never converging to expected state
|
|
247
|
+
- repeated oscillation of UI state
|
|
248
|
+
- persistent stale snapshot conditions
|
|
249
|
+
|
|
250
|
+
In these cases, failure MUST be emitted after stabilization window is exhausted.
|
|
251
|
+
|
|
252
|
+
---
|
|
253
|
+
|
|
254
|
+
## 15. Success Metrics
|
|
255
|
+
|
|
256
|
+
- reduced false-negative readiness failures
|
|
257
|
+
- higher first-pass verification success
|
|
258
|
+
- lower premature timeout rates
|
|
259
|
+
- improved reliability of wait and readiness checks
|
|
260
|
+
|
|
261
|
+
---
|
|
262
|
+
|
|
263
|
+
## 16. Summary
|
|
264
|
+
|
|
265
|
+
This RFC introduces temporal stabilization into verification, ensuring that UI state is evaluated based on convergence over time rather than single snapshots. It improves reliability by eliminating transient mismatches and stale-state errors without introducing probabilistic behavior.
|