mobile-debug-mcp 0.26.5 → 0.28.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/interact/index.js +586 -192
- package/dist/server/common.js +172 -2
- package/dist/server-core.js +1 -1
- package/docs/CHANGELOG.md +6 -0
- package/docs/ROADMAP.md +109 -11
- package/docs/rfcs/010-verification-stabilization-and-temporal-convergence.md +265 -0
- package/docs/rfcs/011-recovery-and-replanning-for-failed-or-ambiguous-interaction-flows.md +321 -0
- package/docs/rfcs/011.1-recovery-contract-types-and-runtime-wiring-spec.md +253 -0
- package/docs/rfcs/012-action-trace-and-xecution-observability.md +242 -0
- package/docs/specs/mcp-tooling-spec-v1.md +26 -0
- package/docs/tools/interact.md +54 -0
- package/package.json +1 -1
- package/src/interact/index.ts +657 -194
- package/src/server/common.ts +236 -3
- package/src/server-core.ts +1 -1
- package/src/types.ts +59 -0
- package/test/device/manual/observe/rfc012_trace.manual.ts +51 -0
- package/test/unit/interact/adjust_control.test.ts +77 -1
- package/test/unit/interact/expect_tools.test.ts +57 -25
- package/test/unit/interact/verification_stabilization.test.ts +94 -0
- package/test/unit/server/common.test.ts +60 -1
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
# RFC 012 — Action Trace and Execution Observability
|
|
5
|
+
|
|
6
|
+
## 1. Summary
|
|
7
|
+
|
|
8
|
+
This RFC defines a structured execution trace model for all actions within the MCP runtime. It provides visibility into resolution, execution, verification, stabilization, and recovery stages.
|
|
9
|
+
|
|
10
|
+
The goal is to make system behavior explainable, debuggable, and measurable without altering execution semantics.
|
|
11
|
+
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
## 2. Problem Statement
|
|
15
|
+
|
|
16
|
+
As the system has evolved (RFC 005–011), execution has become more reliable but also more opaque due to:
|
|
17
|
+
|
|
18
|
+
- stabilization loops masking transient failures
|
|
19
|
+
- recovery logic retrying actions without visibility
|
|
20
|
+
- multiple execution stages (resolve → execute → verify → stabilize → recover)
|
|
21
|
+
|
|
22
|
+
Current outputs provide final results but lack a structured explanation of how those results were reached.
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## 3. Goals
|
|
27
|
+
|
|
28
|
+
This RFC introduces an execution trace model that MUST:
|
|
29
|
+
|
|
30
|
+
- provide a step-by-step record of action execution
|
|
31
|
+
- expose resolution, execution, verification, stabilization, and recovery stages
|
|
32
|
+
- remain deterministic and low-overhead
|
|
33
|
+
- be consistent across all tools and handlers
|
|
34
|
+
|
|
35
|
+
---
|
|
36
|
+
|
|
37
|
+
## 4. Non-Goals
|
|
38
|
+
|
|
39
|
+
This RFC does NOT define:
|
|
40
|
+
|
|
41
|
+
- external logging systems
|
|
42
|
+
- UI visualization layers
|
|
43
|
+
- distributed tracing infrastructure
|
|
44
|
+
|
|
45
|
+
It is strictly an in-process observability model.
|
|
46
|
+
|
|
47
|
+
---
|
|
48
|
+
|
|
49
|
+
## 5. Runtime Surfaces
|
|
50
|
+
|
|
51
|
+
Trace data MUST be emitted from:
|
|
52
|
+
|
|
53
|
+
- src/server (resolution)
|
|
54
|
+
- src/interact (execution and verification)
|
|
55
|
+
- stabilization layer (RFC 010)
|
|
56
|
+
- recovery layer (RFC 011)
|
|
57
|
+
|
|
58
|
+
All action flows MUST produce a trace.
|
|
59
|
+
|
|
60
|
+
---
|
|
61
|
+
|
|
62
|
+
## 6. Trace Model
|
|
63
|
+
|
|
64
|
+
### 6.1 ActionTrace
|
|
65
|
+
|
|
66
|
+
```ts
|
|
67
|
+
interface ActionTrace {
|
|
68
|
+
action_id: string;
|
|
69
|
+
steps: TraceStep[];
|
|
70
|
+
final_outcome: "success" | "failure";
|
|
71
|
+
attempts: number; // total execution attempts including recovery-triggered retries
|
|
72
|
+
}
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
### 6.2 TraceStep
|
|
76
|
+
|
|
77
|
+
```ts
|
|
78
|
+
interface TraceStep {
|
|
79
|
+
stage: "resolve" | "execute" | "verify" | "stabilize" | "recover";
|
|
80
|
+
timestamp: number;
|
|
81
|
+
result: "success" | "failure" | "retry";
|
|
82
|
+
attempt_index: number; // monotonic per action execution
|
|
83
|
+
cycle_id?: number; // groups steps within a recovery cycle
|
|
84
|
+
metadata?: Record<string, any>;
|
|
85
|
+
}
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
### 6.3 Partial Trace Requirements
|
|
89
|
+
|
|
90
|
+
For actions that do not traverse the full lifecycle (resolve → execute → verify → stabilize → recover), implementations MUST emit a partial trace.
|
|
91
|
+
|
|
92
|
+
A partial trace MUST:
|
|
93
|
+
- include a valid action_id
|
|
94
|
+
- include final_outcome
|
|
95
|
+
- include at least one TraceStep with a valid stage and timestamp
|
|
96
|
+
|
|
97
|
+
Partial traces MUST still respect attempt_index semantics.
|
|
98
|
+
|
|
99
|
+
This ensures observability coverage even for legacy or bypass execution paths.
|
|
100
|
+
|
|
101
|
+
---
|
|
102
|
+
|
|
103
|
+
## 7. Stage Emission Rules
|
|
104
|
+
|
|
105
|
+
### 7.1 Resolve Stage
|
|
106
|
+
|
|
107
|
+
- emitted by findElementHandler and related resolution logic
|
|
108
|
+
- includes selector, matched element, and confidence (if available)
|
|
109
|
+
|
|
110
|
+
### 7.2 Execute Stage
|
|
111
|
+
|
|
112
|
+
- emitted by action handlers (tap, type_text, scroll_to_element, etc.)
|
|
113
|
+
- represents the execution attempt
|
|
114
|
+
|
|
115
|
+
### 7.3 Verify Stage
|
|
116
|
+
|
|
117
|
+
- emitted by expect_* handlers
|
|
118
|
+
- reflects state validation results
|
|
119
|
+
|
|
120
|
+
### 7.4 Stabilize Stage
|
|
121
|
+
|
|
122
|
+
- emitted by RFC 010 stabilization logic
|
|
123
|
+
- includes stabilization attempts and convergence status
|
|
124
|
+
|
|
125
|
+
### 7.5 Recover Stage
|
|
126
|
+
|
|
127
|
+
- emitted by RFC 011 recovery logic
|
|
128
|
+
- includes strategy used and retry attempts
|
|
129
|
+
|
|
130
|
+
### 7.6 Step Emission Timing
|
|
131
|
+
|
|
132
|
+
Each stage MUST emit a TraceStep at the point where its outcome is determined:
|
|
133
|
+
|
|
134
|
+
- resolve: after target selection is finalized
|
|
135
|
+
- execute: after action handler completes (success or failure)
|
|
136
|
+
- verify: after verification result is computed
|
|
137
|
+
- stabilize: after stabilization loop completes (success or failure)
|
|
138
|
+
- recover: after a recovery attempt is decided and executed
|
|
139
|
+
|
|
140
|
+
Each retry or re-attempt MUST emit a separate step.
|
|
141
|
+
|
|
142
|
+
---
|
|
143
|
+
|
|
144
|
+
## 8. Deterministic Behavior
|
|
145
|
+
|
|
146
|
+
Trace emission MUST NOT:
|
|
147
|
+
|
|
148
|
+
- alter execution flow
|
|
149
|
+
- introduce timing side effects
|
|
150
|
+
- affect success/failure outcomes
|
|
151
|
+
|
|
152
|
+
It is strictly observational.
|
|
153
|
+
|
|
154
|
+
---
|
|
155
|
+
|
|
156
|
+
## 9. Minimal Metadata Contract
|
|
157
|
+
|
|
158
|
+
Implementations SHOULD include where available:
|
|
159
|
+
|
|
160
|
+
- selector or target identifier
|
|
161
|
+
- snapshot identifiers
|
|
162
|
+
- stabilization attempt counts
|
|
163
|
+
- recovery strategy name
|
|
164
|
+
|
|
165
|
+
Metadata MUST remain lightweight.
|
|
166
|
+
|
|
167
|
+
---
|
|
168
|
+
|
|
169
|
+
## 10. Integration with Existing RFCs
|
|
170
|
+
|
|
171
|
+
- RFC 006: execution emits execute stage
|
|
172
|
+
- RFC 007: resolution emits resolve stage
|
|
173
|
+
- RFC 010: stabilization emits stabilize stage
|
|
174
|
+
- RFC 011: recovery emits recover stage
|
|
175
|
+
|
|
176
|
+
### 10.1 Compatibility with RFC 006 Observability Model
|
|
177
|
+
|
|
178
|
+
RFC 006 defines traceability as being assembled from distributed signals rather than a centralized event system.
|
|
179
|
+
|
|
180
|
+
This RFC does NOT replace that model; it standardizes a unified projection layer over those signals.
|
|
181
|
+
|
|
182
|
+
- Existing emitters (server, interact, stabilization, recovery) remain the source of truth
|
|
183
|
+
- RFC 012 defines how those signals are composed into a single ActionTrace
|
|
184
|
+
- Actions that bypass parts of the lifecycle MUST still emit partial traces reflecting the stages they execute
|
|
185
|
+
|
|
186
|
+
This ensures backward compatibility while enabling a coherent trace surface.
|
|
187
|
+
|
|
188
|
+
---
|
|
189
|
+
|
|
190
|
+
## 11. Output Behavior
|
|
191
|
+
|
|
192
|
+
Trace MUST be produced for all action flows (full or partial, depending on runtime capability).
|
|
193
|
+
|
|
194
|
+
Canonical contract:
|
|
195
|
+
- Trace SHOULD be included in ActionExecutionResult when the runtime path supports full trace emission
|
|
196
|
+
- Trace MAY also be stored internally for diagnostics
|
|
197
|
+
|
|
198
|
+
If a runtime path cannot yet emit a full trace (e.g. legacy or bypass actions), it MUST emit a partial trace containing at least:
|
|
199
|
+
- action_id
|
|
200
|
+
- final_outcome
|
|
201
|
+
- at least one TraceStep representing the executed stage
|
|
202
|
+
|
|
203
|
+
Example:
|
|
204
|
+
|
|
205
|
+
```ts
|
|
206
|
+
interface ActionExecutionResult {
|
|
207
|
+
success: boolean;
|
|
208
|
+
failure_code?: string;
|
|
209
|
+
trace?: ActionTrace; // optional in type, required by RFC behavior (full or partial)
|
|
210
|
+
}
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
Implementations MUST treat the absence of `trace` in the runtime type as a temporary compatibility constraint, not as an absence of trace generation. All execution paths MUST still generate a trace internally, even if only a partial trace is returned externally.
|
|
215
|
+
|
|
216
|
+
The optionality of `trace` in ActionExecutionResult is transitional. Implementations MUST treat the absence of `trace` as a compatibility constraint rather than a valid steady-state. Future versions of the runtime MAY require `trace` to be present on all ActionExecutionResult values once all execution paths support full trace emission.
|
|
217
|
+
|
|
218
|
+
---
|
|
219
|
+
|
|
220
|
+
## 12. Failure Analysis
|
|
221
|
+
|
|
222
|
+
Trace data MUST allow identification of:
|
|
223
|
+
|
|
224
|
+
- resolution failures
|
|
225
|
+
- execution failures
|
|
226
|
+
- verification mismatches
|
|
227
|
+
- stabilization convergence issues
|
|
228
|
+
- recovery attempts and outcomes
|
|
229
|
+
|
|
230
|
+
---
|
|
231
|
+
|
|
232
|
+
## 13. Success Metrics
|
|
233
|
+
|
|
234
|
+
- improved debuggability of failures
|
|
235
|
+
- reduced need for manual log inspection
|
|
236
|
+
- clearer differentiation between failure types
|
|
237
|
+
|
|
238
|
+
---
|
|
239
|
+
|
|
240
|
+
## 14. Summary
|
|
241
|
+
|
|
242
|
+
This RFC introduces a structured trace model that makes action execution transparent and debuggable. It builds on existing RFCs without changing behavior, enabling better diagnostics and future analytics capabilities.
|
|
@@ -80,6 +80,30 @@ MUST be returned in this structure:
|
|
|
80
80
|
ui_fingerprint_after: string | null,
|
|
81
81
|
failure_code?: string,
|
|
82
82
|
retryable?: boolean,
|
|
83
|
+
trace: {
|
|
84
|
+
action_id: string,
|
|
85
|
+
steps: Array<{
|
|
86
|
+
stage: 'resolve' | 'execute' | 'verify' | 'stabilize' | 'recover',
|
|
87
|
+
timestamp: number,
|
|
88
|
+
result: 'success' | 'failure' | 'retry',
|
|
89
|
+
attempt_index: number,
|
|
90
|
+
cycle_id?: number,
|
|
91
|
+
metadata?: Record<string, unknown>
|
|
92
|
+
}>,
|
|
93
|
+
final_outcome: 'success' | 'failure',
|
|
94
|
+
attempts: number
|
|
95
|
+
},
|
|
96
|
+
recovery?: {
|
|
97
|
+
failure_class: string,
|
|
98
|
+
runtime_code: string,
|
|
99
|
+
recovery_strategy?: string,
|
|
100
|
+
recovery_attempts: number,
|
|
101
|
+
max_recovery_attempts: number,
|
|
102
|
+
retry_depth: number,
|
|
103
|
+
max_retry_depth: number,
|
|
104
|
+
is_terminal: boolean,
|
|
105
|
+
retry_allowed?: boolean
|
|
106
|
+
},
|
|
83
107
|
device?: DeviceInfo,
|
|
84
108
|
details?: object
|
|
85
109
|
}
|
|
@@ -93,6 +117,8 @@ Rules:
|
|
|
93
117
|
- `source_module` identifies where the envelope was produced
|
|
94
118
|
- fingerprints represent observed pre/post UI state on a best-effort basis
|
|
95
119
|
- `failure_code` is optional but MUST be used when a structured mapping exists
|
|
120
|
+
- `trace` is required and carries the observable execution path
|
|
121
|
+
- `recovery` MAY be attached to failed actions to carry typed recovery metadata
|
|
96
122
|
|
|
97
123
|
### 4.4 Allowed Deviations
|
|
98
124
|
|
package/docs/tools/interact.md
CHANGED
|
@@ -40,6 +40,25 @@ Example response:
|
|
|
40
40
|
"source_module": "server",
|
|
41
41
|
"target": { "selector": { "x": 100, "y": 200 }, "resolved": null },
|
|
42
42
|
"success": true,
|
|
43
|
+
"trace": {
|
|
44
|
+
"action_id": "tap_element_1710000000002_3",
|
|
45
|
+
"steps": [
|
|
46
|
+
{
|
|
47
|
+
"stage": "resolve",
|
|
48
|
+
"timestamp": 1710000000002,
|
|
49
|
+
"result": "success",
|
|
50
|
+
"attempt_index": 0
|
|
51
|
+
},
|
|
52
|
+
{
|
|
53
|
+
"stage": "execute",
|
|
54
|
+
"timestamp": 1710000000003,
|
|
55
|
+
"result": "success",
|
|
56
|
+
"attempt_index": 1
|
|
57
|
+
}
|
|
58
|
+
],
|
|
59
|
+
"final_outcome": "success",
|
|
60
|
+
"attempts": 1
|
|
61
|
+
},
|
|
43
62
|
"ui_fingerprint_before": "fp_before",
|
|
44
63
|
"ui_fingerprint_after": "fp_after"
|
|
45
64
|
}
|
|
@@ -395,6 +414,41 @@ Failure response:
|
|
|
395
414
|
"success": false,
|
|
396
415
|
"failure_code": "STALE_REFERENCE",
|
|
397
416
|
"retryable": true,
|
|
417
|
+
"trace": {
|
|
418
|
+
"action_id": "tap_element_1710000000003_4",
|
|
419
|
+
"steps": [
|
|
420
|
+
{
|
|
421
|
+
"stage": "resolve",
|
|
422
|
+
"timestamp": 1710000000003,
|
|
423
|
+
"result": "failure",
|
|
424
|
+
"attempt_index": 0
|
|
425
|
+
},
|
|
426
|
+
{
|
|
427
|
+
"stage": "execute",
|
|
428
|
+
"timestamp": 1710000000004,
|
|
429
|
+
"result": "failure",
|
|
430
|
+
"attempt_index": 1
|
|
431
|
+
},
|
|
432
|
+
{
|
|
433
|
+
"stage": "recover",
|
|
434
|
+
"timestamp": 1710000000005,
|
|
435
|
+
"result": "retry",
|
|
436
|
+
"attempt_index": 2
|
|
437
|
+
}
|
|
438
|
+
],
|
|
439
|
+
"final_outcome": "failure",
|
|
440
|
+
"attempts": 1
|
|
441
|
+
},
|
|
442
|
+
"recovery": {
|
|
443
|
+
"failure_class": "TargetResolutionFailure",
|
|
444
|
+
"runtime_code": "STALE_REFERENCE",
|
|
445
|
+
"recovery_attempts": 0,
|
|
446
|
+
"max_recovery_attempts": 3,
|
|
447
|
+
"retry_depth": 0,
|
|
448
|
+
"max_retry_depth": 3,
|
|
449
|
+
"is_terminal": false,
|
|
450
|
+
"retry_allowed": true
|
|
451
|
+
},
|
|
398
452
|
"ui_fingerprint_before": "fp_before",
|
|
399
453
|
"ui_fingerprint_after": "fp_before"
|
|
400
454
|
}
|