mobile-debug-mcp 0.26.0 → 0.26.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/interact/classify.js +48 -11
- package/dist/interact/index.js +26 -33
- package/dist/server/common.js +14 -1
- package/dist/server/tool-definitions.js +38 -15
- package/dist/server/tool-handlers.js +9 -0
- package/dist/server-core.js +1 -1
- package/docs/CHANGELOG.md +6 -0
- package/docs/ROADMAP.md +281 -88
- package/docs/rfcs/004-action-verification-routing.md +342 -0
- package/docs/rfcs/005-unified-action-execution-and-verification-model.md +216 -0
- package/docs/rfcs/006-runtime-action-instrumentation-and-binding-layer.md +230 -0
- package/docs/specs/mcp-tooling-spec-v1.md +7 -3
- package/docs/tools/interact.md +14 -8
- package/package.json +1 -1
- package/src/interact/classify.ts +53 -13
- package/src/interact/index.ts +27 -35
- package/src/server/common.ts +22 -1
- package/src/server/tool-definitions.ts +38 -15
- package/src/server/tool-handlers.ts +9 -0
- package/src/server-core.ts +1 -1
- package/src/types.ts +2 -0
- package/test/unit/interact/classify_action_outcome.test.ts +44 -25
- package/test/unit/server/contract.test.ts +8 -6
- package/test/unit/server/response_shapes.test.ts +8 -0
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
# RFC 006 — Runtime Action Instrumentation & Binding Layer
|
|
2
|
+
|
|
3
|
+
## 1. Summary
|
|
4
|
+
|
|
5
|
+
This RFC defines how the execution model in RFC 005 is mapped onto the current runtime behaviour of the system.
|
|
6
|
+
|
|
7
|
+
It does not assume a new instrumentation system exists. Instead, it describes how lifecycle semantics are derived from existing execution flows, logs, module behaviour, and lightweight runtime metadata attached to action envelopes.
|
|
8
|
+
|
|
9
|
+
It specifies:
|
|
10
|
+
- how existing `action_type` values are interpreted under RFC 005 semantics
|
|
11
|
+
- how lifecycle states are inferred from current runtime execution
|
|
12
|
+
- how `src/server` and `src/interact` currently participate in execution
|
|
13
|
+
- how legacy and platform actions are incorporated into the model
|
|
14
|
+
|
|
15
|
+
This RFC is a runtime binding and normalisation layer over existing implementation behaviour.
|
|
16
|
+
|
|
17
|
+
---
|
|
18
|
+
|
|
19
|
+
## 2. Problem Statement
|
|
20
|
+
|
|
21
|
+
RFC 005 defines a unified execution lifecycle:
|
|
22
|
+
- Resolved
|
|
23
|
+
- Dispatched
|
|
24
|
+
- Pending Verification
|
|
25
|
+
- Verified
|
|
26
|
+
- Failed
|
|
27
|
+
|
|
28
|
+
However, the current system already contains:
|
|
29
|
+
- a concrete `action_type` execution model
|
|
30
|
+
- execution logic split across `src/server` and `src/interact`
|
|
31
|
+
- platform-specific actions (tap_element, type_text, press_back, start_app, restart_app, scroll_to_element)
|
|
32
|
+
- distributed logging and partial instrumentation within modules
|
|
33
|
+
|
|
34
|
+
There is no central instrumentation system and no explicit lifecycle emitter.
|
|
35
|
+
Instead, lifecycle meaning is inferred from runtime behaviour and the `lifecycle_state` / `source_module` fields now attached to action envelopes.
|
|
36
|
+
|
|
37
|
+
This results in:
|
|
38
|
+
- implicit execution state transitions
|
|
39
|
+
- distributed observability signals
|
|
40
|
+
- non-uniform traceability across actions
|
|
41
|
+
|
|
42
|
+
---
|
|
43
|
+
|
|
44
|
+
## 3. Design Goals
|
|
45
|
+
|
|
46
|
+
This layer MUST:
|
|
47
|
+
|
|
48
|
+
- Map existing runtime behaviour to RFC 005 lifecycle semantics
|
|
49
|
+
- Use existing `action_type` values as the authoritative execution taxonomy
|
|
50
|
+
- Derive lifecycle states from observable runtime transitions
|
|
51
|
+
- Reflect actual module responsibilities (not idealised separation)
|
|
52
|
+
- Work with existing logging and execution hooks
|
|
53
|
+
- Preserve compatibility with all current action implementations
|
|
54
|
+
|
|
55
|
+
---
|
|
56
|
+
|
|
57
|
+
## 4. Runtime Execution Flow (Observed)
|
|
58
|
+
|
|
59
|
+
Current observed execution flow:
|
|
60
|
+
|
|
61
|
+
UI Request
|
|
62
|
+
→ src/server (routing + validation)
|
|
63
|
+
→ src/interact (execution + platform dispatch)
|
|
64
|
+
→ platform layer
|
|
65
|
+
→ response handling + logs
|
|
66
|
+
→ optional state verification (where available)
|
|
67
|
+
|
|
68
|
+
Lifecycle states are derived from this flow rather than explicitly emitted.
|
|
69
|
+
|
|
70
|
+
---
|
|
71
|
+
|
|
72
|
+
## 5. Action Type Mapping (Current Runtime)
|
|
73
|
+
|
|
74
|
+
This RFC maps existing `action_type` values to RFC 005 semantics.
|
|
75
|
+
|
|
76
|
+
| action_type | RFC 005 Semantic Interpretation |
|
|
77
|
+
|------------|---------------------------------|
|
|
78
|
+
| tap | Selection |
|
|
79
|
+
| tap_element | Selection |
|
|
80
|
+
| type_text | Input |
|
|
81
|
+
| press_back | Navigation |
|
|
82
|
+
| start_app | System Action |
|
|
83
|
+
| restart_app | System Action |
|
|
84
|
+
| scroll_to_element | Navigation |
|
|
85
|
+
|
|
86
|
+
This table reflects the current runtime contract.
|
|
87
|
+
|
|
88
|
+
---
|
|
89
|
+
|
|
90
|
+
## 6. Lifecycle State Derivation
|
|
91
|
+
|
|
92
|
+
Lifecycle states are NOT explicitly emitted. They are inferred as follows:
|
|
93
|
+
|
|
94
|
+
### 6.1 Resolved
|
|
95
|
+
Inferred when:
|
|
96
|
+
- src/server accepts request
|
|
97
|
+
- action is validated and normalized
|
|
98
|
+
- action_id is assigned (or equivalent identifier exists)
|
|
99
|
+
|
|
100
|
+
---
|
|
101
|
+
|
|
102
|
+
### 6.2 Dispatched
|
|
103
|
+
Inferred when:
|
|
104
|
+
- control passes from src/server to src/interact
|
|
105
|
+
- execution call is issued to platform layer
|
|
106
|
+
|
|
107
|
+
---
|
|
108
|
+
|
|
109
|
+
### 6.3 Pending Verification
|
|
110
|
+
Inferred when:
|
|
111
|
+
- platform execution returns a result
|
|
112
|
+
- before any UI/state evaluation occurs
|
|
113
|
+
|
|
114
|
+
---
|
|
115
|
+
|
|
116
|
+
### 6.4 Verified / Failed
|
|
117
|
+
Inferred when:
|
|
118
|
+
- post-execution evaluation is performed (if available)
|
|
119
|
+
|
|
120
|
+
Rules:
|
|
121
|
+
- Verified = expected outcome observed in UI/state/log signals
|
|
122
|
+
- Failed = timeout, error, or mismatch in expected outcome
|
|
123
|
+
|
|
124
|
+
Where no formal verification exists, outcome is derived from best available signals (logs, UI diff, or absence of error).
|
|
125
|
+
|
|
126
|
+
---
|
|
127
|
+
|
|
128
|
+
## 7. Instrumentation Reality
|
|
129
|
+
|
|
130
|
+
There is no central instrumentation layer in the current system.
|
|
131
|
+
|
|
132
|
+
Instead:
|
|
133
|
+
- src/server emits partial logs during routing and validation
|
|
134
|
+
- src/interact emits execution logs and platform responses
|
|
135
|
+
- platform adapters may emit additional debugging information
|
|
136
|
+
- action envelopes now carry lightweight lifecycle metadata for post-dispatch state and source ownership
|
|
137
|
+
|
|
138
|
+
Lifecycle traceability is therefore assembled from distributed signals rather than a unified event system.
|
|
139
|
+
|
|
140
|
+
---
|
|
141
|
+
|
|
142
|
+
## 8. Module Responsibilities (Observed Behaviour)
|
|
143
|
+
|
|
144
|
+
### src/server
|
|
145
|
+
- receives action requests
|
|
146
|
+
- performs validation and normalization
|
|
147
|
+
- assigns identifiers where applicable
|
|
148
|
+
- routes actions to src/interact
|
|
149
|
+
- emits partial logs for request lifecycle
|
|
150
|
+
|
|
151
|
+
---
|
|
152
|
+
|
|
153
|
+
### src/interact
|
|
154
|
+
- executes platform-specific actions
|
|
155
|
+
- handles retries and fallback behaviours
|
|
156
|
+
- emits execution logs
|
|
157
|
+
- returns execution results
|
|
158
|
+
- may perform lightweight post-processing
|
|
159
|
+
|
|
160
|
+
---
|
|
161
|
+
|
|
162
|
+
## 9. Verification Reality
|
|
163
|
+
|
|
164
|
+
Verification is not a uniform system-wide layer.
|
|
165
|
+
|
|
166
|
+
It may occur via:
|
|
167
|
+
- UI state comparison (where available)
|
|
168
|
+
- log-based confirmation
|
|
169
|
+
- absence of error signals
|
|
170
|
+
- platform feedback
|
|
171
|
+
|
|
172
|
+
Verification outcomes are best-effort only where no formal verifier exists, and deterministic where reliable state signals or explicit evaluation paths are available.
|
|
173
|
+
|
|
174
|
+
---
|
|
175
|
+
|
|
176
|
+
## 10. Legacy and Special Actions
|
|
177
|
+
|
|
178
|
+
Actions such as:
|
|
179
|
+
- scroll_to_element
|
|
180
|
+
- start_app
|
|
181
|
+
- restart_app
|
|
182
|
+
- press_back
|
|
183
|
+
|
|
184
|
+
are fully supported in the runtime.
|
|
185
|
+
|
|
186
|
+
These actions:
|
|
187
|
+
- may bypass full lifecycle observability
|
|
188
|
+
- may not have explicit verification paths
|
|
189
|
+
- are interpreted using best-effort semantic mapping
|
|
190
|
+
|
|
191
|
+
---
|
|
192
|
+
|
|
193
|
+
## 11. Observability Model
|
|
194
|
+
|
|
195
|
+
Observability is currently distributed across:
|
|
196
|
+
- src/server logs
|
|
197
|
+
- src/interact logs
|
|
198
|
+
- platform debug output
|
|
199
|
+
- action envelope metadata
|
|
200
|
+
|
|
201
|
+
There is no unified event schema.
|
|
202
|
+
|
|
203
|
+
Lifecycle reconstruction requires correlation of:
|
|
204
|
+
- action_type
|
|
205
|
+
- timestamps
|
|
206
|
+
- execution boundaries
|
|
207
|
+
- error signals
|
|
208
|
+
|
|
209
|
+
---
|
|
210
|
+
|
|
211
|
+
## 12. Relationship to RFC 005
|
|
212
|
+
|
|
213
|
+
RFC 005 defines the ideal execution lifecycle semantics.
|
|
214
|
+
|
|
215
|
+
RFC 006 defines how those semantics are interpreted from the existing runtime system.
|
|
216
|
+
|
|
217
|
+
Together:
|
|
218
|
+
- RFC 005 = conceptual correctness model
|
|
219
|
+
- RFC 006 = runtime behavioural mapping layer
|
|
220
|
+
|
|
221
|
+
---
|
|
222
|
+
|
|
223
|
+
## 13. Summary
|
|
224
|
+
|
|
225
|
+
This RFC ensures:
|
|
226
|
+
- lifecycle semantics can be derived from current runtime behaviour
|
|
227
|
+
- existing action_type contract is preserved as source of truth
|
|
228
|
+
- no assumption of new instrumentation infrastructure is required
|
|
229
|
+
- real module responsibilities are accurately represented
|
|
230
|
+
- observability is understood as distributed rather than centralised
|
|
@@ -41,7 +41,7 @@ Outcome-specific guidance:
|
|
|
41
41
|
- visible navigation expected -> `wait_for_screen_change` (optional) -> `expect_screen`
|
|
42
42
|
- local UI change expected -> `wait_for_ui` (optional) -> `expect_element_visible`
|
|
43
43
|
- readable element state expected -> `wait_for_ui` (optional) -> `expect_state`
|
|
44
|
-
- backend/API activity expected without a visible UI change -> compare `get_screen_fingerprint` before/after, then call `
|
|
44
|
+
- backend/API activity expected without a visible UI change -> compare `get_screen_fingerprint` before/after, then call `classify_action_outcome` with the runtime `action_type`; collect `get_network_activity` only if the result remains ambiguous
|
|
45
45
|
|
|
46
46
|
For backend/API activity, `wait_for_screen_change` is not the right verification tool unless a visible transition is also expected.
|
|
47
47
|
|
|
@@ -69,6 +69,8 @@ MUST be returned in this structure:
|
|
|
69
69
|
action_id: string,
|
|
70
70
|
timestamp: string,
|
|
71
71
|
action_type: string,
|
|
72
|
+
lifecycle_state?: 'pending_verification' | 'failed',
|
|
73
|
+
source_module?: 'server' | 'interact',
|
|
72
74
|
target: {
|
|
73
75
|
selector: object,
|
|
74
76
|
resolved: object | null
|
|
@@ -87,6 +89,8 @@ Rules:
|
|
|
87
89
|
|
|
88
90
|
- `success` is at the top level, not nested
|
|
89
91
|
- `target` contains only selection and resolution context
|
|
92
|
+
- `lifecycle_state` reflects the post-dispatch runtime state
|
|
93
|
+
- `source_module` identifies where the envelope was produced
|
|
90
94
|
- fingerprints represent observed pre/post UI state on a best-effort basis
|
|
91
95
|
- `failure_code` is optional but MUST be used when a structured mapping exists
|
|
92
96
|
|
|
@@ -294,11 +298,11 @@ Tool: `classify_action_outcome`
|
|
|
294
298
|
|
|
295
299
|
Rules:
|
|
296
300
|
|
|
297
|
-
- MAY use UI, network, and log signals
|
|
301
|
+
- MAY use UI, action, network, and log signals
|
|
298
302
|
- MUST be deterministic
|
|
299
303
|
- MUST NOT replace `expect_*` tools
|
|
300
304
|
- MUST be treated as a supplementary signal only
|
|
301
|
-
- SHOULD be used with `get_network_activity` when the
|
|
305
|
+
- SHOULD be used with `get_network_activity` only when the outcome is still ambiguous after routing by `action_type`
|
|
302
306
|
|
|
303
307
|
It is not a verification mechanism.
|
|
304
308
|
|
package/docs/tools/interact.md
CHANGED
|
@@ -17,6 +17,7 @@ Important:
|
|
|
17
17
|
|
|
18
18
|
- `wait_for_*` tools must not be used as the final verification of action success when an applicable `expect_*` tool exists.
|
|
19
19
|
- action tools report execution success, not outcome correctness.
|
|
20
|
+
- `classify_action_outcome` should receive the runtime `action_type` when you want routing to distinguish local-state and side-effect actions.
|
|
20
21
|
|
|
21
22
|
## tap / swipe / type_text / press_back
|
|
22
23
|
|
|
@@ -35,6 +36,8 @@ Example response:
|
|
|
35
36
|
"action_id": "tap_1710000000000_1",
|
|
36
37
|
"timestamp": "2026-04-23T08:00:00.000Z",
|
|
37
38
|
"action_type": "tap",
|
|
39
|
+
"lifecycle_state": "pending_verification",
|
|
40
|
+
"source_module": "server",
|
|
38
41
|
"target": { "selector": { "x": 100, "y": 200 }, "resolved": null },
|
|
39
42
|
"success": true,
|
|
40
43
|
"ui_fingerprint_before": "fp_before",
|
|
@@ -54,10 +57,10 @@ Preferred verification:
|
|
|
54
57
|
- navigation outcome known -> `expect_screen`
|
|
55
58
|
- local UI change known -> `expect_element_visible`
|
|
56
59
|
- readable element state known -> `expect_state`
|
|
57
|
-
- backend/API activity expected -> `classify_action_outcome` + `get_network_activity`
|
|
60
|
+
- backend/API activity expected -> `classify_action_outcome` + optional `get_network_activity` if the UI signal remains ambiguous
|
|
58
61
|
|
|
59
|
-
Use `wait_for_screen_change` only when a visible transition is the expected outcome. If a button should trigger an API request but the screen should stay the same, rely on
|
|
60
|
-
For backend-only actions, prefer comparing `get_screen_fingerprint` before/after and
|
|
62
|
+
Use `wait_for_screen_change` only when a visible transition is the expected outcome. If a button should trigger an API request but the screen should stay the same, rely on `action_type` plus classification first.
|
|
63
|
+
For backend-only actions, prefer comparing `get_screen_fingerprint` before/after and collect `get_network_activity` immediately after the action only if the result is still ambiguous; do not wait on `wait_for_screen_change` if no visible transition is expected.
|
|
61
64
|
Use `wait_for_ui_change` when the screen stays in place but visible text or element state should change.
|
|
62
65
|
|
|
63
66
|
---
|
|
@@ -332,6 +335,8 @@ Success response:
|
|
|
332
335
|
"action_id": "tap_element_1710000000000_1",
|
|
333
336
|
"timestamp": "2026-04-23T08:00:00.000Z",
|
|
334
337
|
"action_type": "tap_element",
|
|
338
|
+
"lifecycle_state": "pending_verification",
|
|
339
|
+
"source_module": "interact",
|
|
335
340
|
"target": {
|
|
336
341
|
"selector": { "elementId": "el_123" },
|
|
337
342
|
"resolved": {
|
|
@@ -507,17 +512,18 @@ Notes:
|
|
|
507
512
|
|
|
508
513
|
## classify_action_outcome + get_network_activity
|
|
509
514
|
|
|
510
|
-
Use this pair when the action
|
|
515
|
+
Use this pair when the action may trigger network/backend work and the screen may not visibly change.
|
|
511
516
|
|
|
512
517
|
Pattern:
|
|
513
518
|
|
|
514
519
|
1. perform the action
|
|
515
520
|
2. call `classify_action_outcome` with `uiChanged` from `wait_for_screen_change` or a screen fingerprint comparison
|
|
516
|
-
3.
|
|
517
|
-
4.
|
|
521
|
+
3. pass the runtime `action_type` value as `actionType`
|
|
522
|
+
4. collect `get_network_activity` only if the action is side-effect oriented and the UI signal remains ambiguous
|
|
523
|
+
5. call `classify_action_outcome` again with `networkRequests` if you collected them
|
|
518
524
|
|
|
519
525
|
Guidance:
|
|
520
526
|
|
|
521
527
|
- `uiChanged=true` or `expectedElementVisible=true` means the action outcome is already verified
|
|
522
|
-
-
|
|
523
|
-
-
|
|
528
|
+
- local-state actions should prefer refreshed snapshots, `expect_state`, or `expect_element_visible` over default network inspection
|
|
529
|
+
- network activity is auxiliary evidence, not mandatory proof
|
package/package.json
CHANGED
package/src/interact/classify.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
export type ActionOutcome = 'success' | 'no_op' | 'backend_failure' | 'ui_failure' | 'unknown'
|
|
2
2
|
export type NetworkRequestStatus = 'success' | 'failure' | 'retryable'
|
|
3
|
+
export type ActionCategory = 'local_state' | 'side_effect'
|
|
3
4
|
|
|
4
5
|
export interface NetworkRequest {
|
|
5
6
|
endpoint: string
|
|
@@ -9,6 +10,8 @@ export interface NetworkRequest {
|
|
|
9
10
|
export interface ClassifyActionOutcomeInput {
|
|
10
11
|
uiChanged: boolean
|
|
11
12
|
expectedElementVisible?: boolean | null
|
|
13
|
+
/** Concrete action_type from the runtime action result (for example: tap, type_text, start_app). */
|
|
14
|
+
actionType?: string | null
|
|
12
15
|
/** null = get_network_activity has not been called yet */
|
|
13
16
|
networkRequests?: NetworkRequest[] | null
|
|
14
17
|
hasLogErrors?: boolean | null
|
|
@@ -17,8 +20,29 @@ export interface ClassifyActionOutcomeInput {
|
|
|
17
20
|
export interface ClassifyActionOutcomeResult {
|
|
18
21
|
outcome: ActionOutcome
|
|
19
22
|
reasoning: string
|
|
20
|
-
|
|
21
|
-
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
const ACTION_CATEGORY_BY_TYPE: Record<string, ActionCategory> = {
|
|
26
|
+
tap: 'local_state',
|
|
27
|
+
tap_element: 'local_state',
|
|
28
|
+
swipe: 'local_state',
|
|
29
|
+
scroll_to_element: 'local_state',
|
|
30
|
+
type_text: 'local_state',
|
|
31
|
+
press_back: 'local_state',
|
|
32
|
+
start_app: 'side_effect',
|
|
33
|
+
restart_app: 'side_effect',
|
|
34
|
+
terminate_app: 'side_effect',
|
|
35
|
+
reset_app_data: 'side_effect',
|
|
36
|
+
install_app: 'side_effect',
|
|
37
|
+
build_app: 'side_effect',
|
|
38
|
+
build_and_install: 'side_effect'
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
function inferActionCategory(actionType?: string | null): ActionCategory | null {
|
|
42
|
+
if (typeof actionType !== 'string') return null
|
|
43
|
+
const normalized = actionType.trim().toLowerCase()
|
|
44
|
+
if (!normalized) return null
|
|
45
|
+
return ACTION_CATEGORY_BY_TYPE[normalized] ?? 'side_effect'
|
|
22
46
|
}
|
|
23
47
|
|
|
24
48
|
/**
|
|
@@ -26,39 +50,55 @@ export interface ClassifyActionOutcomeResult {
|
|
|
26
50
|
* Same inputs always produce the same output.
|
|
27
51
|
*/
|
|
28
52
|
export function classifyActionOutcome(input: ClassifyActionOutcomeInput): ClassifyActionOutcomeResult {
|
|
29
|
-
const { uiChanged, expectedElementVisible, networkRequests, hasLogErrors } = input
|
|
53
|
+
const { uiChanged, expectedElementVisible, actionType, networkRequests, hasLogErrors } = input
|
|
54
|
+
const actionCategory = inferActionCategory(actionType)
|
|
30
55
|
|
|
31
56
|
// Step 1 — UI signal is positive
|
|
32
57
|
if (uiChanged || expectedElementVisible === true) {
|
|
33
58
|
return { outcome: 'success', reasoning: expectedElementVisible === true ? 'expected element is visible' : 'UI changed after action' }
|
|
34
59
|
}
|
|
35
60
|
|
|
36
|
-
// Step 2 —
|
|
37
|
-
if (
|
|
61
|
+
// Step 2 — no action type means we cannot choose a safe routing path
|
|
62
|
+
if (actionCategory === null) {
|
|
38
63
|
return {
|
|
39
64
|
outcome: 'unknown',
|
|
40
|
-
reasoning: '
|
|
41
|
-
nextAction: 'call_get_network_activity'
|
|
65
|
+
reasoning: 'actionType was not supplied; pass the runtime action_type so the classifier can distinguish local-state and side-effect routing'
|
|
42
66
|
}
|
|
43
67
|
}
|
|
44
68
|
|
|
45
|
-
|
|
46
|
-
const failedRequest = networkRequests.find((r) => r.status === 'failure' || r.status === 'retryable')
|
|
69
|
+
const failedRequest = networkRequests?.find((r) => r.status === 'failure' || r.status === 'retryable')
|
|
47
70
|
if (failedRequest) {
|
|
48
71
|
return { outcome: 'backend_failure', reasoning: `network request ${failedRequest.endpoint} returned ${failedRequest.status}` }
|
|
49
72
|
}
|
|
50
73
|
|
|
51
|
-
// Step
|
|
74
|
+
// Step 3 — local-state actions should be verified with state-specific signals first
|
|
75
|
+
if (actionCategory === 'local_state') {
|
|
76
|
+
const logNote = hasLogErrors ? ' (log errors present)' : ''
|
|
77
|
+
return {
|
|
78
|
+
outcome: 'no_op',
|
|
79
|
+
reasoning: `local-state action${logNote}; use expect_state, refreshed snapshot comparison, or expect_element_visible instead of defaulting to network inspection`
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// Step 4 — side-effect actions may legitimately need network or log inspection
|
|
84
|
+
if (networkRequests === null || networkRequests === undefined) {
|
|
85
|
+
return {
|
|
86
|
+
outcome: 'unknown',
|
|
87
|
+
reasoning: 'side-effect action without network data; inspect network or log signals only if the outcome is still ambiguous'
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// Step 5 — no network requests at all
|
|
52
92
|
if (networkRequests.length === 0) {
|
|
53
93
|
const logNote = hasLogErrors ? ' (log errors present)' : ''
|
|
54
|
-
return { outcome: 'no_op', reasoning: `
|
|
94
|
+
return { outcome: 'no_op', reasoning: `side-effect action and no network activity${logNote}` }
|
|
55
95
|
}
|
|
56
96
|
|
|
57
|
-
// Step
|
|
97
|
+
// Step 6 — network requests exist and all succeeded
|
|
58
98
|
if (networkRequests.every((r) => r.status === 'success')) {
|
|
59
99
|
return { outcome: 'ui_failure', reasoning: 'network requests succeeded but UI did not change' }
|
|
60
100
|
}
|
|
61
101
|
|
|
62
|
-
// Step
|
|
102
|
+
// Step 7 — fallback
|
|
63
103
|
return { outcome: 'unknown', reasoning: 'signals are inconclusive' }
|
|
64
104
|
}
|
package/src/interact/index.ts
CHANGED
|
@@ -6,7 +6,7 @@ export { AndroidInteract, iOSInteract };
|
|
|
6
6
|
import { resolveTargetDevice } from '../utils/resolve-device.js'
|
|
7
7
|
import { ToolsObserve } from '../observe/index.js'
|
|
8
8
|
import { computeSnapshotSignature } from '../observe/snapshot-metadata.js'
|
|
9
|
-
import {
|
|
9
|
+
import { buildActionExecutionResult } from '../server/common.js'
|
|
10
10
|
import type {
|
|
11
11
|
ActionFailureCode,
|
|
12
12
|
ActionTargetResolved,
|
|
@@ -291,27 +291,25 @@ export class ToolsInteract {
|
|
|
291
291
|
}
|
|
292
292
|
|
|
293
293
|
private static _actionFailure(
|
|
294
|
-
actionId: string,
|
|
295
|
-
timestamp: string,
|
|
296
294
|
actionType: string,
|
|
297
295
|
selector: Record<string, unknown> | null,
|
|
298
296
|
resolved: ActionTargetResolved | null,
|
|
299
297
|
failureCode: ActionFailureCode,
|
|
300
298
|
retryable: boolean,
|
|
301
299
|
uiFingerprintBefore: string | null,
|
|
302
|
-
uiFingerprintAfter?: string | null
|
|
300
|
+
uiFingerprintAfter?: string | null,
|
|
301
|
+
sourceModule: 'server' | 'interact' = 'interact'
|
|
303
302
|
): TapElementResponse {
|
|
304
|
-
return {
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
target: { selector, resolved },
|
|
303
|
+
return buildActionExecutionResult({
|
|
304
|
+
actionType,
|
|
305
|
+
selector,
|
|
306
|
+
resolved,
|
|
309
307
|
success: false,
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
}
|
|
308
|
+
uiFingerprintBefore,
|
|
309
|
+
uiFingerprintAfter: uiFingerprintAfter ?? null,
|
|
310
|
+
failure: { failureCode, retryable },
|
|
311
|
+
sourceModule
|
|
312
|
+
})
|
|
315
313
|
}
|
|
316
314
|
|
|
317
315
|
static _resetResolvedUiElementsForTests() {
|
|
@@ -472,14 +470,11 @@ export class ToolsInteract {
|
|
|
472
470
|
}
|
|
473
471
|
|
|
474
472
|
static async tapElementHandler({ elementId }: { elementId: string }): Promise<TapElementResponse> {
|
|
475
|
-
const timestampMs = Date.now()
|
|
476
|
-
const timestamp = new Date(timestampMs).toISOString()
|
|
477
473
|
const actionType = 'tap_element'
|
|
478
|
-
const actionId = nextActionId(actionType, timestampMs)
|
|
479
474
|
const selector = { elementId }
|
|
480
475
|
const resolved = ToolsInteract._resolvedUiElements.get(elementId)
|
|
481
476
|
if (!resolved) {
|
|
482
|
-
return ToolsInteract._actionFailure(
|
|
477
|
+
return ToolsInteract._actionFailure(actionType, selector, null, 'STALE_REFERENCE', true, null)
|
|
483
478
|
}
|
|
484
479
|
|
|
485
480
|
const fingerprintBefore = await ToolsInteract._captureFingerprint(resolved.platform, resolved.deviceId)
|
|
@@ -491,22 +486,22 @@ export class ToolsInteract {
|
|
|
491
486
|
const currentMatch = ToolsInteract._findCurrentResolvedElement(elements, treePlatform, treeDeviceId, resolved)
|
|
492
487
|
|
|
493
488
|
if (!currentMatch) {
|
|
494
|
-
return ToolsInteract._actionFailure(
|
|
489
|
+
return ToolsInteract._actionFailure(actionType, selector, null, 'STALE_REFERENCE', true, fingerprintBefore)
|
|
495
490
|
}
|
|
496
491
|
|
|
497
492
|
const resolvedTarget = ToolsInteract._resolvedTargetFromElement(resolved.elementId, currentMatch.el, currentMatch.index)
|
|
498
493
|
|
|
499
494
|
if (!ToolsInteract._isVisibleElement(currentMatch.el)) {
|
|
500
|
-
return ToolsInteract._actionFailure(
|
|
495
|
+
return ToolsInteract._actionFailure(actionType, selector, resolvedTarget, 'ELEMENT_NOT_INTERACTABLE', true, fingerprintBefore)
|
|
501
496
|
}
|
|
502
497
|
|
|
503
498
|
if (currentMatch.el.enabled === false) {
|
|
504
|
-
return ToolsInteract._actionFailure(
|
|
499
|
+
return ToolsInteract._actionFailure(actionType, selector, resolvedTarget, 'ELEMENT_NOT_INTERACTABLE', true, fingerprintBefore)
|
|
505
500
|
}
|
|
506
501
|
|
|
507
502
|
const bounds = ToolsInteract._normalizeBounds(currentMatch.el.bounds) ?? resolved.bounds
|
|
508
503
|
if (!bounds || bounds[2] <= bounds[0] || bounds[3] <= bounds[1]) {
|
|
509
|
-
return ToolsInteract._actionFailure(
|
|
504
|
+
return ToolsInteract._actionFailure(actionType, selector, resolvedTarget, 'ELEMENT_NOT_INTERACTABLE', true, fingerprintBefore)
|
|
510
505
|
}
|
|
511
506
|
|
|
512
507
|
const x = Math.floor((bounds[0] + bounds[2]) / 2)
|
|
@@ -515,23 +510,20 @@ export class ToolsInteract {
|
|
|
515
510
|
|
|
516
511
|
if (!tapResult.success) {
|
|
517
512
|
const fingerprintAfterFailure = await ToolsInteract._captureFingerprint(resolved.platform, resolved.deviceId)
|
|
518
|
-
return ToolsInteract._actionFailure(
|
|
513
|
+
return ToolsInteract._actionFailure(actionType, selector, resolvedTarget, 'UNKNOWN', false, fingerprintBefore, fingerprintAfterFailure)
|
|
519
514
|
}
|
|
520
515
|
|
|
521
516
|
const fingerprintAfter = await ToolsInteract._captureFingerprint(resolved.platform, resolved.deviceId)
|
|
522
|
-
return {
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
target: {
|
|
528
|
-
selector,
|
|
529
|
-
resolved: resolvedTarget
|
|
530
|
-
},
|
|
517
|
+
return buildActionExecutionResult({
|
|
518
|
+
actionType,
|
|
519
|
+
device: tree?.device,
|
|
520
|
+
selector,
|
|
521
|
+
resolved: resolvedTarget,
|
|
531
522
|
success: true,
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
523
|
+
uiFingerprintBefore: fingerprintBefore,
|
|
524
|
+
uiFingerprintAfter: fingerprintAfter,
|
|
525
|
+
sourceModule: 'interact'
|
|
526
|
+
})
|
|
535
527
|
}
|
|
536
528
|
|
|
537
529
|
static async swipeHandler({ platform = 'android', x1, y1, x2, y2, duration, deviceId }: { platform?: 'android' | 'ios', x1: number, y1: number, x2: number, y2: number, duration: number, deviceId?: string }) {
|
package/src/server/common.ts
CHANGED
|
@@ -112,6 +112,23 @@ export function inferScrollFailure(message: string | undefined): { failureCode:
|
|
|
112
112
|
return { failureCode: 'UNKNOWN', retryable: false }
|
|
113
113
|
}
|
|
114
114
|
|
|
115
|
+
const ACTION_LIFECYCLE_STATE_BY_OUTCOME = {
|
|
116
|
+
success: 'pending_verification',
|
|
117
|
+
failure: 'failed'
|
|
118
|
+
} as const
|
|
119
|
+
|
|
120
|
+
export function determineActionLifecycleState({
|
|
121
|
+
success,
|
|
122
|
+
failure
|
|
123
|
+
}: {
|
|
124
|
+
success: boolean
|
|
125
|
+
failure?: { failureCode: ActionFailureCode; retryable: boolean }
|
|
126
|
+
}): NonNullable<ActionExecutionResult['lifecycle_state']> {
|
|
127
|
+
if (failure) return ACTION_LIFECYCLE_STATE_BY_OUTCOME.failure
|
|
128
|
+
if (success) return ACTION_LIFECYCLE_STATE_BY_OUTCOME.success
|
|
129
|
+
return ACTION_LIFECYCLE_STATE_BY_OUTCOME.success
|
|
130
|
+
}
|
|
131
|
+
|
|
115
132
|
export function buildActionExecutionResult({
|
|
116
133
|
actionType,
|
|
117
134
|
device,
|
|
@@ -121,7 +138,8 @@ export function buildActionExecutionResult({
|
|
|
121
138
|
uiFingerprintBefore,
|
|
122
139
|
uiFingerprintAfter,
|
|
123
140
|
failure,
|
|
124
|
-
details
|
|
141
|
+
details,
|
|
142
|
+
sourceModule
|
|
125
143
|
}: {
|
|
126
144
|
actionType: string
|
|
127
145
|
device?: ActionExecutionResult['device']
|
|
@@ -132,6 +150,7 @@ export function buildActionExecutionResult({
|
|
|
132
150
|
uiFingerprintAfter: string | null
|
|
133
151
|
failure?: { failureCode: ActionFailureCode; retryable: boolean }
|
|
134
152
|
details?: Record<string, unknown>
|
|
153
|
+
sourceModule: 'server' | 'interact'
|
|
135
154
|
}): ActionExecutionResult {
|
|
136
155
|
const timestampMs = Date.now()
|
|
137
156
|
const timestamp = new Date(timestampMs).toISOString()
|
|
@@ -139,6 +158,8 @@ export function buildActionExecutionResult({
|
|
|
139
158
|
action_id: nextActionId(actionType, timestampMs),
|
|
140
159
|
timestamp,
|
|
141
160
|
action_type: actionType,
|
|
161
|
+
lifecycle_state: determineActionLifecycleState({ success, failure }),
|
|
162
|
+
source_module: sourceModule,
|
|
142
163
|
...(device ? { device } : {}),
|
|
143
164
|
target: {
|
|
144
165
|
selector,
|