mobile-debug-mcp 0.26.0 → 0.26.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,230 @@
1
+ # RFC 006 — Runtime Action Instrumentation & Binding Layer
2
+
3
+ ## 1. Summary
4
+
5
+ This RFC defines how the execution model in RFC 005 is mapped onto the current runtime behaviour of the system.
6
+
7
+ It does not assume a new instrumentation system exists. Instead, it describes how lifecycle semantics are derived from existing execution flows, logs, module behaviour, and lightweight runtime metadata attached to action envelopes.
8
+
9
+ It specifies:
10
+ - how existing `action_type` values are interpreted under RFC 005 semantics
11
+ - how lifecycle states are inferred from current runtime execution
12
+ - how `src/server` and `src/interact` currently participate in execution
13
+ - how legacy and platform actions are incorporated into the model
14
+
15
+ This RFC is a runtime binding and normalisation layer over existing implementation behaviour.
16
+
17
+ ---
18
+
19
+ ## 2. Problem Statement
20
+
21
+ RFC 005 defines a unified execution lifecycle:
22
+ - Resolved
23
+ - Dispatched
24
+ - Pending Verification
25
+ - Verified
26
+ - Failed
27
+
28
+ However, the current system already contains:
29
+ - a concrete `action_type` execution model
30
+ - execution logic split across `src/server` and `src/interact`
31
+ - platform-specific actions (tap_element, type_text, press_back, start_app, restart_app, scroll_to_element)
32
+ - distributed logging and partial instrumentation within modules
33
+
34
+ There is no central instrumentation system and no explicit lifecycle emitter.
35
+ Instead, lifecycle meaning is inferred from runtime behaviour and the `lifecycle_state` / `source_module` fields now attached to action envelopes.
36
+
37
+ This results in:
38
+ - implicit execution state transitions
39
+ - distributed observability signals
40
+ - non-uniform traceability across actions
41
+
42
+ ---
43
+
44
+ ## 3. Design Goals
45
+
46
+ This layer MUST:
47
+
48
+ - Map existing runtime behaviour to RFC 005 lifecycle semantics
49
+ - Use existing `action_type` values as the authoritative execution taxonomy
50
+ - Derive lifecycle states from observable runtime transitions
51
+ - Reflect actual module responsibilities (not idealised separation)
52
+ - Work with existing logging and execution hooks
53
+ - Preserve compatibility with all current action implementations
54
+
55
+ ---
56
+
57
+ ## 4. Runtime Execution Flow (Observed)
58
+
59
+ Current observed execution flow:
60
+
61
+ UI Request
62
+ → src/server (routing + validation)
63
+ → src/interact (execution + platform dispatch)
64
+ → platform layer
65
+ → response handling + logs
66
+ → optional state verification (where available)
67
+
68
+ Lifecycle states are derived from this flow rather than explicitly emitted.
69
+
70
+ ---
71
+
72
+ ## 5. Action Type Mapping (Current Runtime)
73
+
74
+ This RFC maps existing `action_type` values to RFC 005 semantics.
75
+
76
+ | action_type | RFC 005 Semantic Interpretation |
77
+ |------------|---------------------------------|
78
+ | tap | Selection |
79
+ | tap_element | Selection |
80
+ | type_text | Input |
81
+ | press_back | Navigation |
82
+ | start_app | System Action |
83
+ | restart_app | System Action |
84
+ | scroll_to_element | Navigation |
85
+
86
+ This table reflects the current runtime contract.
87
+
88
+ ---
89
+
90
+ ## 6. Lifecycle State Derivation
91
+
92
+ Lifecycle states are NOT explicitly emitted. They are inferred as follows:
93
+
94
+ ### 6.1 Resolved
95
+ Inferred when:
96
+ - src/server accepts request
97
+ - action is validated and normalized
98
+ - action_id is assigned (or equivalent identifier exists)
99
+
100
+ ---
101
+
102
+ ### 6.2 Dispatched
103
+ Inferred when:
104
+ - control passes from src/server to src/interact
105
+ - execution call is issued to platform layer
106
+
107
+ ---
108
+
109
+ ### 6.3 Pending Verification
110
+ Inferred when:
111
+ - platform execution returns a result
112
+ - before any UI/state evaluation occurs
113
+
114
+ ---
115
+
116
+ ### 6.4 Verified / Failed
117
+ Inferred when:
118
+ - post-execution evaluation is performed (if available)
119
+
120
+ Rules:
121
+ - Verified = expected outcome observed in UI/state/log signals
122
+ - Failed = timeout, error, or mismatch in expected outcome
123
+
124
+ Where no formal verification exists, outcome is derived from best available signals (logs, UI diff, or absence of error).
125
+
126
+ ---
127
+
128
+ ## 7. Instrumentation Reality
129
+
130
+ There is no central instrumentation layer in the current system.
131
+
132
+ Instead:
133
+ - src/server emits partial logs during routing and validation
134
+ - src/interact emits execution logs and platform responses
135
+ - platform adapters may emit additional debugging information
136
+ - action envelopes now carry lightweight lifecycle metadata for post-dispatch state and source ownership
137
+
138
+ Lifecycle traceability is therefore assembled from distributed signals rather than a unified event system.
139
+
140
+ ---
141
+
142
+ ## 8. Module Responsibilities (Observed Behaviour)
143
+
144
+ ### src/server
145
+ - receives action requests
146
+ - performs validation and normalization
147
+ - assigns identifiers where applicable
148
+ - routes actions to src/interact
149
+ - emits partial logs for request lifecycle
150
+
151
+ ---
152
+
153
+ ### src/interact
154
+ - executes platform-specific actions
155
+ - handles retries and fallback behaviours
156
+ - emits execution logs
157
+ - returns execution results
158
+ - may perform lightweight post-processing
159
+
160
+ ---
161
+
162
+ ## 9. Verification Reality
163
+
164
+ Verification is not a uniform system-wide layer.
165
+
166
+ It may occur via:
167
+ - UI state comparison (where available)
168
+ - log-based confirmation
169
+ - absence of error signals
170
+ - platform feedback
171
+
172
+ Verification outcomes are best-effort only where no formal verifier exists, and deterministic where reliable state signals or explicit evaluation paths are available.
173
+
174
+ ---
175
+
176
+ ## 10. Legacy and Special Actions
177
+
178
+ Actions such as:
179
+ - scroll_to_element
180
+ - start_app
181
+ - restart_app
182
+ - press_back
183
+
184
+ are fully supported in the runtime.
185
+
186
+ These actions:
187
+ - may bypass full lifecycle observability
188
+ - may not have explicit verification paths
189
+ - are interpreted using best-effort semantic mapping
190
+
191
+ ---
192
+
193
+ ## 11. Observability Model
194
+
195
+ Observability is currently distributed across:
196
+ - src/server logs
197
+ - src/interact logs
198
+ - platform debug output
199
+ - action envelope metadata
200
+
201
+ There is no unified event schema.
202
+
203
+ Lifecycle reconstruction requires correlation of:
204
+ - action_type
205
+ - timestamps
206
+ - execution boundaries
207
+ - error signals
208
+
209
+ ---
210
+
211
+ ## 12. Relationship to RFC 005
212
+
213
+ RFC 005 defines the ideal execution lifecycle semantics.
214
+
215
+ RFC 006 defines how those semantics are interpreted from the existing runtime system.
216
+
217
+ Together:
218
+ - RFC 005 = conceptual correctness model
219
+ - RFC 006 = runtime behavioural mapping layer
220
+
221
+ ---
222
+
223
+ ## 13. Summary
224
+
225
+ This RFC ensures:
226
+ - lifecycle semantics can be derived from current runtime behaviour
227
+ - existing action_type contract is preserved as source of truth
228
+ - no assumption of new instrumentation infrastructure is required
229
+ - real module responsibilities are accurately represented
230
+ - observability is understood as distributed rather than centralised
@@ -41,7 +41,7 @@ Outcome-specific guidance:
41
41
  - visible navigation expected -> `wait_for_screen_change` (optional) -> `expect_screen`
42
42
  - local UI change expected -> `wait_for_ui` (optional) -> `expect_element_visible`
43
43
  - readable element state expected -> `wait_for_ui` (optional) -> `expect_state`
44
- - backend/API activity expected without a visible UI change -> compare `get_screen_fingerprint` before/after, then call `get_network_activity` immediately after the action and `classify_action_outcome` with the observed requests
44
+ - backend/API activity expected without a visible UI change -> compare `get_screen_fingerprint` before/after, then call `classify_action_outcome` with the runtime `action_type`; collect `get_network_activity` only if the result remains ambiguous
45
45
 
46
46
  For backend/API activity, `wait_for_screen_change` is not the right verification tool unless a visible transition is also expected.
47
47
 
@@ -69,6 +69,8 @@ MUST be returned in this structure:
69
69
  action_id: string,
70
70
  timestamp: string,
71
71
  action_type: string,
72
+ lifecycle_state?: 'pending_verification' | 'failed',
73
+ source_module?: 'server' | 'interact',
72
74
  target: {
73
75
  selector: object,
74
76
  resolved: object | null
@@ -87,6 +89,8 @@ Rules:
87
89
 
88
90
  - `success` is at the top level, not nested
89
91
  - `target` contains only selection and resolution context
92
+ - `lifecycle_state` reflects the post-dispatch runtime state
93
+ - `source_module` identifies where the envelope was produced
90
94
  - fingerprints represent observed pre/post UI state on a best-effort basis
91
95
  - `failure_code` is optional but MUST be used when a structured mapping exists
92
96
 
@@ -294,11 +298,11 @@ Tool: `classify_action_outcome`
294
298
 
295
299
  Rules:
296
300
 
297
- - MAY use UI, network, and log signals
301
+ - MAY use UI, action, network, and log signals
298
302
  - MUST be deterministic
299
303
  - MUST NOT replace `expect_*` tools
300
304
  - MUST be treated as a supplementary signal only
301
- - SHOULD be used with `get_network_activity` when the expected outcome is backend/API activity without a visible UI change
305
+ - SHOULD be used with `get_network_activity` only when the outcome is still ambiguous after routing by `action_type`
302
306
 
303
307
  It is not a verification mechanism.
304
308
 
@@ -17,6 +17,7 @@ Important:
17
17
 
18
18
  - `wait_for_*` tools must not be used as the final verification of action success when an applicable `expect_*` tool exists.
19
19
  - action tools report execution success, not outcome correctness.
20
+ - `classify_action_outcome` should receive the runtime `action_type` when you want routing to distinguish local-state and side-effect actions.
20
21
 
21
22
  ## tap / swipe / type_text / press_back
22
23
 
@@ -35,6 +36,8 @@ Example response:
35
36
  "action_id": "tap_1710000000000_1",
36
37
  "timestamp": "2026-04-23T08:00:00.000Z",
37
38
  "action_type": "tap",
39
+ "lifecycle_state": "pending_verification",
40
+ "source_module": "server",
38
41
  "target": { "selector": { "x": 100, "y": 200 }, "resolved": null },
39
42
  "success": true,
40
43
  "ui_fingerprint_before": "fp_before",
@@ -54,10 +57,10 @@ Preferred verification:
54
57
  - navigation outcome known -> `expect_screen`
55
58
  - local UI change known -> `expect_element_visible`
56
59
  - readable element state known -> `expect_state`
57
- - backend/API activity expected -> `classify_action_outcome` + `get_network_activity`
60
+ - backend/API activity expected -> `classify_action_outcome` + optional `get_network_activity` if the UI signal remains ambiguous
58
61
 
59
- Use `wait_for_screen_change` only when a visible transition is the expected outcome. If a button should trigger an API request but the screen should stay the same, rely on network activity and classification instead.
60
- For backend-only actions, prefer comparing `get_screen_fingerprint` before/after and call `get_network_activity` immediately after the action; do not wait on `wait_for_screen_change` if no visible transition is expected.
62
+ Use `wait_for_screen_change` only when a visible transition is the expected outcome. If a button should trigger an API request but the screen should stay the same, rely on `action_type` plus classification first.
63
+ For backend-only actions, prefer comparing `get_screen_fingerprint` before/after and collect `get_network_activity` immediately after the action only if the result is still ambiguous; do not wait on `wait_for_screen_change` if no visible transition is expected.
61
64
  Use `wait_for_ui_change` when the screen stays in place but visible text or element state should change.
62
65
 
63
66
  ---
@@ -332,6 +335,8 @@ Success response:
332
335
  "action_id": "tap_element_1710000000000_1",
333
336
  "timestamp": "2026-04-23T08:00:00.000Z",
334
337
  "action_type": "tap_element",
338
+ "lifecycle_state": "pending_verification",
339
+ "source_module": "interact",
335
340
  "target": {
336
341
  "selector": { "elementId": "el_123" },
337
342
  "resolved": {
@@ -507,17 +512,18 @@ Notes:
507
512
 
508
513
  ## classify_action_outcome + get_network_activity
509
514
 
510
- Use this pair when the action is expected to trigger network/backend work and the screen may not visibly change.
515
+ Use this pair when the action may trigger network/backend work and the screen may not visibly change.
511
516
 
512
517
  Pattern:
513
518
 
514
519
  1. perform the action
515
520
  2. call `classify_action_outcome` with `uiChanged` from `wait_for_screen_change` or a screen fingerprint comparison
516
- 3. if the classifier asks for it, call `get_network_activity`
517
- 4. call `classify_action_outcome` again with `networkRequests`
521
+ 3. pass the runtime `action_type` value as `actionType`
522
+ 4. collect `get_network_activity` only if the action is side-effect oriented and the UI signal remains ambiguous
523
+ 5. call `classify_action_outcome` again with `networkRequests` if you collected them
518
524
 
519
525
  Guidance:
520
526
 
521
527
  - `uiChanged=true` or `expectedElementVisible=true` means the action outcome is already verified
522
- - `nextAction="call_get_network_activity"` means the UI signal was inconclusive and the agent should inspect network activity
523
- - if network requests succeed but the UI stays unchanged, treat the outcome as a backend/API result rather than a screen transition
528
+ - local-state actions should prefer refreshed snapshots, `expect_state`, or `expect_element_visible` over default network inspection
529
+ - network activity is auxiliary evidence, not mandatory proof
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mobile-debug-mcp",
3
- "version": "0.26.0",
3
+ "version": "0.26.2",
4
4
  "description": "MCP server for mobile app debugging (Android + iOS), with focus on security and reliability",
5
5
  "type": "module",
6
6
  "bin": {
@@ -1,5 +1,6 @@
1
1
  export type ActionOutcome = 'success' | 'no_op' | 'backend_failure' | 'ui_failure' | 'unknown'
2
2
  export type NetworkRequestStatus = 'success' | 'failure' | 'retryable'
3
+ export type ActionCategory = 'local_state' | 'side_effect'
3
4
 
4
5
  export interface NetworkRequest {
5
6
  endpoint: string
@@ -9,6 +10,8 @@ export interface NetworkRequest {
9
10
  export interface ClassifyActionOutcomeInput {
10
11
  uiChanged: boolean
11
12
  expectedElementVisible?: boolean | null
13
+ /** Concrete action_type from the runtime action result (for example: tap, type_text, start_app). */
14
+ actionType?: string | null
12
15
  /** null = get_network_activity has not been called yet */
13
16
  networkRequests?: NetworkRequest[] | null
14
17
  hasLogErrors?: boolean | null
@@ -17,8 +20,29 @@ export interface ClassifyActionOutcomeInput {
17
20
  export interface ClassifyActionOutcomeResult {
18
21
  outcome: ActionOutcome
19
22
  reasoning: string
20
- /** Present when the caller must call get_network_activity before a final classification is possible */
21
- nextAction?: 'call_get_network_activity'
23
+ }
24
+
25
+ const ACTION_CATEGORY_BY_TYPE: Record<string, ActionCategory> = {
26
+ tap: 'local_state',
27
+ tap_element: 'local_state',
28
+ swipe: 'local_state',
29
+ scroll_to_element: 'local_state',
30
+ type_text: 'local_state',
31
+ press_back: 'local_state',
32
+ start_app: 'side_effect',
33
+ restart_app: 'side_effect',
34
+ terminate_app: 'side_effect',
35
+ reset_app_data: 'side_effect',
36
+ install_app: 'side_effect',
37
+ build_app: 'side_effect',
38
+ build_and_install: 'side_effect'
39
+ }
40
+
41
+ function inferActionCategory(actionType?: string | null): ActionCategory | null {
42
+ if (typeof actionType !== 'string') return null
43
+ const normalized = actionType.trim().toLowerCase()
44
+ if (!normalized) return null
45
+ return ACTION_CATEGORY_BY_TYPE[normalized] ?? 'side_effect'
22
46
  }
23
47
 
24
48
  /**
@@ -26,39 +50,55 @@ export interface ClassifyActionOutcomeResult {
26
50
  * Same inputs always produce the same output.
27
51
  */
28
52
  export function classifyActionOutcome(input: ClassifyActionOutcomeInput): ClassifyActionOutcomeResult {
29
- const { uiChanged, expectedElementVisible, networkRequests, hasLogErrors } = input
53
+ const { uiChanged, expectedElementVisible, actionType, networkRequests, hasLogErrors } = input
54
+ const actionCategory = inferActionCategory(actionType)
30
55
 
31
56
  // Step 1 — UI signal is positive
32
57
  if (uiChanged || expectedElementVisible === true) {
33
58
  return { outcome: 'success', reasoning: expectedElementVisible === true ? 'expected element is visible' : 'UI changed after action' }
34
59
  }
35
60
 
36
- // Step 2 — UI did not change; network signal is required
37
- if (networkRequests === null || networkRequests === undefined) {
61
+ // Step 2 — no action type means we cannot choose a safe routing path
62
+ if (actionCategory === null) {
38
63
  return {
39
64
  outcome: 'unknown',
40
- reasoning: 'UI did not change; get_network_activity must be called before classification can proceed',
41
- nextAction: 'call_get_network_activity'
65
+ reasoning: 'actionType was not supplied; pass the runtime action_type so the classifier can distinguish local-state and side-effect routing'
42
66
  }
43
67
  }
44
68
 
45
- // Step 3 any network failure
46
- const failedRequest = networkRequests.find((r) => r.status === 'failure' || r.status === 'retryable')
69
+ const failedRequest = networkRequests?.find((r) => r.status === 'failure' || r.status === 'retryable')
47
70
  if (failedRequest) {
48
71
  return { outcome: 'backend_failure', reasoning: `network request ${failedRequest.endpoint} returned ${failedRequest.status}` }
49
72
  }
50
73
 
51
- // Step 4no network requests at all
74
+ // Step 3local-state actions should be verified with state-specific signals first
75
+ if (actionCategory === 'local_state') {
76
+ const logNote = hasLogErrors ? ' (log errors present)' : ''
77
+ return {
78
+ outcome: 'no_op',
79
+ reasoning: `local-state action${logNote}; use expect_state, refreshed snapshot comparison, or expect_element_visible instead of defaulting to network inspection`
80
+ }
81
+ }
82
+
83
+ // Step 4 — side-effect actions may legitimately need network or log inspection
84
+ if (networkRequests === null || networkRequests === undefined) {
85
+ return {
86
+ outcome: 'unknown',
87
+ reasoning: 'side-effect action without network data; inspect network or log signals only if the outcome is still ambiguous'
88
+ }
89
+ }
90
+
91
+ // Step 5 — no network requests at all
52
92
  if (networkRequests.length === 0) {
53
93
  const logNote = hasLogErrors ? ' (log errors present)' : ''
54
- return { outcome: 'no_op', reasoning: `no UI change and no network activity${logNote}` }
94
+ return { outcome: 'no_op', reasoning: `side-effect action and no network activity${logNote}` }
55
95
  }
56
96
 
57
- // Step 5 — network requests exist and all succeeded
97
+ // Step 6 — network requests exist and all succeeded
58
98
  if (networkRequests.every((r) => r.status === 'success')) {
59
99
  return { outcome: 'ui_failure', reasoning: 'network requests succeeded but UI did not change' }
60
100
  }
61
101
 
62
- // Step 6 — fallback
102
+ // Step 7 — fallback
63
103
  return { outcome: 'unknown', reasoning: 'signals are inconclusive' }
64
104
  }
@@ -6,7 +6,7 @@ export { AndroidInteract, iOSInteract };
6
6
  import { resolveTargetDevice } from '../utils/resolve-device.js'
7
7
  import { ToolsObserve } from '../observe/index.js'
8
8
  import { computeSnapshotSignature } from '../observe/snapshot-metadata.js'
9
- import { nextActionId } from '../server/common.js'
9
+ import { buildActionExecutionResult } from '../server/common.js'
10
10
  import type {
11
11
  ActionFailureCode,
12
12
  ActionTargetResolved,
@@ -291,27 +291,25 @@ export class ToolsInteract {
291
291
  }
292
292
 
293
293
  private static _actionFailure(
294
- actionId: string,
295
- timestamp: string,
296
294
  actionType: string,
297
295
  selector: Record<string, unknown> | null,
298
296
  resolved: ActionTargetResolved | null,
299
297
  failureCode: ActionFailureCode,
300
298
  retryable: boolean,
301
299
  uiFingerprintBefore: string | null,
302
- uiFingerprintAfter?: string | null
300
+ uiFingerprintAfter?: string | null,
301
+ sourceModule: 'server' | 'interact' = 'interact'
303
302
  ): TapElementResponse {
304
- return {
305
- action_id: actionId,
306
- timestamp,
307
- action_type: actionType,
308
- target: { selector, resolved },
303
+ return buildActionExecutionResult({
304
+ actionType,
305
+ selector,
306
+ resolved,
309
307
  success: false,
310
- failure_code: failureCode,
311
- retryable,
312
- ui_fingerprint_before: uiFingerprintBefore,
313
- ui_fingerprint_after: uiFingerprintAfter
314
- }
308
+ uiFingerprintBefore,
309
+ uiFingerprintAfter: uiFingerprintAfter ?? null,
310
+ failure: { failureCode, retryable },
311
+ sourceModule
312
+ })
315
313
  }
316
314
 
317
315
  static _resetResolvedUiElementsForTests() {
@@ -472,14 +470,11 @@ export class ToolsInteract {
472
470
  }
473
471
 
474
472
  static async tapElementHandler({ elementId }: { elementId: string }): Promise<TapElementResponse> {
475
- const timestampMs = Date.now()
476
- const timestamp = new Date(timestampMs).toISOString()
477
473
  const actionType = 'tap_element'
478
- const actionId = nextActionId(actionType, timestampMs)
479
474
  const selector = { elementId }
480
475
  const resolved = ToolsInteract._resolvedUiElements.get(elementId)
481
476
  if (!resolved) {
482
- return ToolsInteract._actionFailure(actionId, timestamp, actionType, selector, null, 'STALE_REFERENCE', true, null)
477
+ return ToolsInteract._actionFailure(actionType, selector, null, 'STALE_REFERENCE', true, null)
483
478
  }
484
479
 
485
480
  const fingerprintBefore = await ToolsInteract._captureFingerprint(resolved.platform, resolved.deviceId)
@@ -491,22 +486,22 @@ export class ToolsInteract {
491
486
  const currentMatch = ToolsInteract._findCurrentResolvedElement(elements, treePlatform, treeDeviceId, resolved)
492
487
 
493
488
  if (!currentMatch) {
494
- return ToolsInteract._actionFailure(actionId, timestamp, actionType, selector, null, 'STALE_REFERENCE', true, fingerprintBefore)
489
+ return ToolsInteract._actionFailure(actionType, selector, null, 'STALE_REFERENCE', true, fingerprintBefore)
495
490
  }
496
491
 
497
492
  const resolvedTarget = ToolsInteract._resolvedTargetFromElement(resolved.elementId, currentMatch.el, currentMatch.index)
498
493
 
499
494
  if (!ToolsInteract._isVisibleElement(currentMatch.el)) {
500
- return ToolsInteract._actionFailure(actionId, timestamp, actionType, selector, resolvedTarget, 'ELEMENT_NOT_INTERACTABLE', true, fingerprintBefore)
495
+ return ToolsInteract._actionFailure(actionType, selector, resolvedTarget, 'ELEMENT_NOT_INTERACTABLE', true, fingerprintBefore)
501
496
  }
502
497
 
503
498
  if (currentMatch.el.enabled === false) {
504
- return ToolsInteract._actionFailure(actionId, timestamp, actionType, selector, resolvedTarget, 'ELEMENT_NOT_INTERACTABLE', true, fingerprintBefore)
499
+ return ToolsInteract._actionFailure(actionType, selector, resolvedTarget, 'ELEMENT_NOT_INTERACTABLE', true, fingerprintBefore)
505
500
  }
506
501
 
507
502
  const bounds = ToolsInteract._normalizeBounds(currentMatch.el.bounds) ?? resolved.bounds
508
503
  if (!bounds || bounds[2] <= bounds[0] || bounds[3] <= bounds[1]) {
509
- return ToolsInteract._actionFailure(actionId, timestamp, actionType, selector, resolvedTarget, 'ELEMENT_NOT_INTERACTABLE', true, fingerprintBefore)
504
+ return ToolsInteract._actionFailure(actionType, selector, resolvedTarget, 'ELEMENT_NOT_INTERACTABLE', true, fingerprintBefore)
510
505
  }
511
506
 
512
507
  const x = Math.floor((bounds[0] + bounds[2]) / 2)
@@ -515,23 +510,20 @@ export class ToolsInteract {
515
510
 
516
511
  if (!tapResult.success) {
517
512
  const fingerprintAfterFailure = await ToolsInteract._captureFingerprint(resolved.platform, resolved.deviceId)
518
- return ToolsInteract._actionFailure(actionId, timestamp, actionType, selector, resolvedTarget, 'UNKNOWN', false, fingerprintBefore, fingerprintAfterFailure)
513
+ return ToolsInteract._actionFailure(actionType, selector, resolvedTarget, 'UNKNOWN', false, fingerprintBefore, fingerprintAfterFailure)
519
514
  }
520
515
 
521
516
  const fingerprintAfter = await ToolsInteract._captureFingerprint(resolved.platform, resolved.deviceId)
522
- return {
523
- action_id: actionId,
524
- timestamp,
525
- action_type: actionType,
526
- ...(tree?.device ? { device: tree.device } : {}),
527
- target: {
528
- selector,
529
- resolved: resolvedTarget
530
- },
517
+ return buildActionExecutionResult({
518
+ actionType,
519
+ device: tree?.device,
520
+ selector,
521
+ resolved: resolvedTarget,
531
522
  success: true,
532
- ui_fingerprint_before: fingerprintBefore,
533
- ui_fingerprint_after: fingerprintAfter
534
- }
523
+ uiFingerprintBefore: fingerprintBefore,
524
+ uiFingerprintAfter: fingerprintAfter,
525
+ sourceModule: 'interact'
526
+ })
535
527
  }
536
528
 
537
529
  static async swipeHandler({ platform = 'android', x1, y1, x2, y2, duration, deviceId }: { platform?: 'android' | 'ios', x1: number, y1: number, x2: number, y2: number, duration: number, deviceId?: string }) {
@@ -112,6 +112,23 @@ export function inferScrollFailure(message: string | undefined): { failureCode:
112
112
  return { failureCode: 'UNKNOWN', retryable: false }
113
113
  }
114
114
 
115
+ const ACTION_LIFECYCLE_STATE_BY_OUTCOME = {
116
+ success: 'pending_verification',
117
+ failure: 'failed'
118
+ } as const
119
+
120
+ export function determineActionLifecycleState({
121
+ success,
122
+ failure
123
+ }: {
124
+ success: boolean
125
+ failure?: { failureCode: ActionFailureCode; retryable: boolean }
126
+ }): NonNullable<ActionExecutionResult['lifecycle_state']> {
127
+ if (failure) return ACTION_LIFECYCLE_STATE_BY_OUTCOME.failure
128
+ if (success) return ACTION_LIFECYCLE_STATE_BY_OUTCOME.success
129
+ return ACTION_LIFECYCLE_STATE_BY_OUTCOME.success
130
+ }
131
+
115
132
  export function buildActionExecutionResult({
116
133
  actionType,
117
134
  device,
@@ -121,7 +138,8 @@ export function buildActionExecutionResult({
121
138
  uiFingerprintBefore,
122
139
  uiFingerprintAfter,
123
140
  failure,
124
- details
141
+ details,
142
+ sourceModule
125
143
  }: {
126
144
  actionType: string
127
145
  device?: ActionExecutionResult['device']
@@ -132,6 +150,7 @@ export function buildActionExecutionResult({
132
150
  uiFingerprintAfter: string | null
133
151
  failure?: { failureCode: ActionFailureCode; retryable: boolean }
134
152
  details?: Record<string, unknown>
153
+ sourceModule: 'server' | 'interact'
135
154
  }): ActionExecutionResult {
136
155
  const timestampMs = Date.now()
137
156
  const timestamp = new Date(timestampMs).toISOString()
@@ -139,6 +158,8 @@ export function buildActionExecutionResult({
139
158
  action_id: nextActionId(actionType, timestampMs),
140
159
  timestamp,
141
160
  action_type: actionType,
161
+ lifecycle_state: determineActionLifecycleState({ success, failure }),
162
+ source_module: sourceModule,
142
163
  ...(device ? { device } : {}),
143
164
  target: {
144
165
  selector,