mobile-debug-mcp 0.26.0 → 0.26.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,35 +1,72 @@
1
+ const ACTION_CATEGORY_BY_TYPE = {
2
+ tap: 'local_state',
3
+ tap_element: 'local_state',
4
+ swipe: 'local_state',
5
+ scroll_to_element: 'local_state',
6
+ type_text: 'local_state',
7
+ press_back: 'local_state',
8
+ start_app: 'side_effect',
9
+ restart_app: 'side_effect',
10
+ terminate_app: 'side_effect',
11
+ reset_app_data: 'side_effect',
12
+ install_app: 'side_effect',
13
+ build_app: 'side_effect',
14
+ build_and_install: 'side_effect'
15
+ };
16
+ function inferActionCategory(actionType) {
17
+ if (typeof actionType !== 'string')
18
+ return null;
19
+ const normalized = actionType.trim().toLowerCase();
20
+ if (!normalized)
21
+ return null;
22
+ return ACTION_CATEGORY_BY_TYPE[normalized] ?? 'side_effect';
23
+ }
1
24
  /**
2
25
  * Pure deterministic classifier. Applies rules in fixed order.
3
26
  * Same inputs always produce the same output.
4
27
  */
5
28
  export function classifyActionOutcome(input) {
6
- const { uiChanged, expectedElementVisible, networkRequests, hasLogErrors } = input;
29
+ const { uiChanged, expectedElementVisible, actionType, networkRequests, hasLogErrors } = input;
30
+ const actionCategory = inferActionCategory(actionType);
7
31
  // Step 1 — UI signal is positive
8
32
  if (uiChanged || expectedElementVisible === true) {
9
33
  return { outcome: 'success', reasoning: expectedElementVisible === true ? 'expected element is visible' : 'UI changed after action' };
10
34
  }
11
- // Step 2 — UI did not change; network signal is required
12
- if (networkRequests === null || networkRequests === undefined) {
35
+ // Step 2 — no action type means we cannot choose a safe routing path
36
+ if (actionCategory === null) {
13
37
  return {
14
38
  outcome: 'unknown',
15
- reasoning: 'UI did not change; get_network_activity must be called before classification can proceed',
16
- nextAction: 'call_get_network_activity'
39
+ reasoning: 'actionType was not supplied; pass the runtime action_type so the classifier can distinguish local-state and side-effect routing'
17
40
  };
18
41
  }
19
- // Step 3 any network failure
20
- const failedRequest = networkRequests.find((r) => r.status === 'failure' || r.status === 'retryable');
42
+ const failedRequest = networkRequests?.find((r) => r.status === 'failure' || r.status === 'retryable');
21
43
  if (failedRequest) {
22
44
  return { outcome: 'backend_failure', reasoning: `network request ${failedRequest.endpoint} returned ${failedRequest.status}` };
23
45
  }
24
- // Step 4no network requests at all
46
+ // Step 3local-state actions should be verified with state-specific signals first
47
+ if (actionCategory === 'local_state') {
48
+ const logNote = hasLogErrors ? ' (log errors present)' : '';
49
+ return {
50
+ outcome: 'no_op',
51
+ reasoning: `local-state action${logNote}; use expect_state, refreshed snapshot comparison, or expect_element_visible instead of defaulting to network inspection`
52
+ };
53
+ }
54
+ // Step 4 — side-effect actions may legitimately need network or log inspection
55
+ if (networkRequests === null || networkRequests === undefined) {
56
+ return {
57
+ outcome: 'unknown',
58
+ reasoning: 'side-effect action without network data; inspect network or log signals only if the outcome is still ambiguous'
59
+ };
60
+ }
61
+ // Step 5 — no network requests at all
25
62
  if (networkRequests.length === 0) {
26
63
  const logNote = hasLogErrors ? ' (log errors present)' : '';
27
- return { outcome: 'no_op', reasoning: `no UI change and no network activity${logNote}` };
64
+ return { outcome: 'no_op', reasoning: `side-effect action and no network activity${logNote}` };
28
65
  }
29
- // Step 5 — network requests exist and all succeeded
66
+ // Step 6 — network requests exist and all succeeded
30
67
  if (networkRequests.every((r) => r.status === 'success')) {
31
68
  return { outcome: 'ui_failure', reasoning: 'network requests succeeded but UI did not change' };
32
69
  }
33
- // Step 6 — fallback
70
+ // Step 7 — fallback
34
71
  return { outcome: 'unknown', reasoning: 'signals are inconclusive' };
35
72
  }
@@ -344,7 +344,7 @@ Capabilities:
344
344
  Constraints:
345
345
  - Does not verify correctness of the resulting state
346
346
  - Must not be used alone to confirm action success when an applicable expect_* tool exists
347
- - Use classify_action_outcome + get_network_activity when the expected outcome is backend/API activity without a visible UI change
347
+ - For backend/API activity without a visible UI change, pass the runtime action_type into classify_action_outcome and collect network evidence only if the result remains ambiguous
348
348
 
349
349
  Recommended Usage:
350
350
  1. Capture or define the expected outcome
@@ -918,26 +918,29 @@ Failure Handling:
918
918
  name: 'classify_action_outcome',
919
919
  description: `Classify the outcome of the most recent action into exactly one of: success, no_op, backend_failure, ui_failure, unknown.
920
920
 
921
- MUST be called after every action (tap, swipe, type_text, press_back, start_app, etc). Never skip.
922
- Use this with get_network_activity when the expected outcome is backend/API activity without a visible UI change.
923
- For backend/API activity, compare get_screen_fingerprint before and after the action and call get_network_activity immediately after the action instead of waiting for wait_for_screen_change.
921
+ Use the runtime action result's \`action_type\` as \`actionType\` so the classifier can distinguish local-state actions from side-effect actions.
922
+ Use this when the intended outcome is not already fully verified by the UI signal alone.
923
+ For backend/API activity, compare get_screen_fingerprint before and after the action and call get_network_activity immediately after the action if the outcome is still ambiguous.
924
924
 
925
925
  HOW TO GATHER INPUTS before calling:
926
926
  1. Call wait_for_screen_change or compare get_screen_fingerprint before/after — set uiChanged accordingly.
927
927
  2. If you checked for a specific element with wait_for_ui, set expectedElementVisible.
928
- 3. Do NOT call get_network_activity yet omit networkRequests on the first call.
928
+ 3. Pass actionType from the action response when available.
929
+ 4. Only provide networkRequests if you already collected them or want to classify a side-effect action with backend evidence.
929
930
 
930
931
  RULES (applied in order — stop at first match):
931
932
  1. If uiChanged=true OR expectedElementVisible=true → outcome=success
932
- 2. Otherwise this tool returns nextAction="call_get_network_activity" — you MUST call get_network_activity once, then call classify_action_outcome again with the results in networkRequests.
933
+ 2. If actionType is missing → outcome=unknown
933
934
  3. If any request has status=failure or retryable → outcome=backend_failure
934
- 4. If no requests returned → outcome=no_op
935
- 5. If all requests succeeded → outcome=ui_failure
936
- 6. Otherwise → outcome=unknown
935
+ 4. If actionType maps to a local-state action → outcome=no_op; prefer state-based verification and avoid default network fallback
936
+ 5. If actionType maps to a side-effect action and no networkRequests were supplied → outcome=unknown
937
+ 6. If no requests returned → outcome=no_op
938
+ 7. If all requests succeeded → outcome=ui_failure
939
+ 8. Otherwise → outcome=unknown
937
940
 
938
941
  BEHAVIOUR after outcome:
939
942
  - success → continue
940
- - no_op → retry the action once or re-resolve the element
943
+ - no_op → retry with richer state verification or re-resolve the element
941
944
  - backend_failure → stop and report the failing endpoint
942
945
  - ui_failure → stop and report failure
943
946
  - unknown → take one recovery step (e.g. capture_debug_snapshot), then stop`,
@@ -952,9 +955,13 @@ BEHAVIOUR after outcome:
952
955
  type: 'boolean',
953
956
  description: 'true if the element you expected to appear is now visible (from wait_for_ui). Omit if you did not check for a specific element.'
954
957
  },
958
+ actionType: {
959
+ type: 'string',
960
+ description: 'The runtime action_type from the action response (for example tap, tap_element, swipe, type_text, press_back, start_app).'
961
+ },
955
962
  networkRequests: {
956
963
  type: 'array',
957
- description: 'Pass this only after calling get_network_activity as instructed by nextAction. Also use it when the expected outcome is backend/API activity without a visible UI change.',
964
+ description: 'Optional network evidence collected after the action. Use it when the expected outcome is backend/API activity or when the UI signal is ambiguous.',
958
965
  items: {
959
966
  type: 'object',
960
967
  properties: {
@@ -976,7 +983,7 @@ BEHAVIOUR after outcome:
976
983
  name: 'get_network_activity',
977
984
  description: `Returns structured network events captured from platform logs since the last action.
978
985
 
979
- Call this when classify_action_outcome returns nextAction="call_get_network_activity" or immediately after an action whose expected outcome is backend/API activity without a visible UI change.
986
+ Call this immediately after an action when you want backend evidence for a side-effect flow, only if the result is still ambiguous.
980
987
  Do not call more than once per action.
981
988
 
982
989
  Events are filtered to significant (non-background) requests only.
@@ -385,11 +385,13 @@ async function handleStopLogStream(args) {
385
385
  function handleClassifyActionOutcome(args) {
386
386
  const uiChanged = requireBooleanArg(args, 'uiChanged');
387
387
  const expectedElementVisible = getBooleanArg(args, 'expectedElementVisible');
388
+ const actionType = getStringArg(args, 'actionType');
388
389
  const networkRequests = getArrayArg(args, 'networkRequests');
389
390
  const hasLogErrors = getBooleanArg(args, 'hasLogErrors');
390
391
  const result = classifyActionOutcome({
391
392
  uiChanged,
392
393
  expectedElementVisible: expectedElementVisible ?? null,
394
+ actionType: actionType ?? null,
393
395
  networkRequests: networkRequests ?? null,
394
396
  hasLogErrors: hasLogErrors ?? null
395
397
  });
@@ -6,7 +6,7 @@ import { handleToolCall } from './server/tool-handlers.js';
6
6
  export { wrapResponse, toolDefinitions, handleToolCall };
7
7
  export const serverInfo = {
8
8
  name: 'mobile-debug-mcp',
9
- version: '0.26.0'
9
+ version: '0.26.1'
10
10
  };
11
11
  export function createServer() {
12
12
  const server = new Server(serverInfo, {
package/docs/CHANGELOG.md CHANGED
@@ -2,6 +2,9 @@
2
2
 
3
3
  All notable changes to the **Mobile Debug MCP** project will be documented in this file.
4
4
 
5
+ ## [0.26.1]
6
+ - Fixed overuse of `get_network_activity`
7
+
5
8
  ## [0.26.0]
6
9
  - RFC-003 wait/synchronization contract with `snapshot_revision`, `captured_at_ms`, and `loading_state`
7
10
  - Added `wait_for_ui_change` for stable in-place UI mutations
package/docs/ROADMAP.md CHANGED
@@ -1,6 +1,6 @@
1
- # Mobile Debug MCP Prioritized Roadmap
1
+ # Mobile Debug MCP Roadmap
2
2
 
3
- ## Prioritization Criteria
3
+ ## Planning Principles
4
4
 
5
5
  Ordered by:
6
6
 
@@ -26,33 +26,45 @@ Higher task success with fewer retries.
26
26
 
27
27
  ---
28
28
 
29
- # Completed
29
+ # Roadmap Status Overview
30
30
 
31
- These priorities are done and kept here for history:
31
+ ## Completed Foundations
32
32
 
33
- - Priority 1 Stronger State Verification
34
- - Priority 2 — Richer Element Identity
33
+ | Capability | Status | Notes |
34
+ |-----------|--------|-------|
35
+ | Stronger State Verification | Complete | Foundational verification layer shipped |
36
+ | Richer Element Identity | Complete | Identity and selector confidence foundations shipped |
37
+
38
+ ## Current Focus
39
+
40
+ - Wait and Synchronization Reliability
41
+
42
+ ## Upcoming Work
43
+
44
+ - Long Press Gesture
45
+ - Better Compose / Custom Control Semantics
35
46
 
36
- Completion notes:
47
+ ## Later Horizon
37
48
 
38
- - State-aware verification is now implemented and wired through the tool surface.
39
- - Platform-native element metadata and selector-confidence hints are now part of the runtime contract.
49
+ - Pinch to Zoom
50
+ - Action Trace Correlation
40
51
 
41
52
  ---
42
53
 
43
- # Priority 1 — Stronger State Verification
54
+ # Stronger State Verification
44
55
 
45
56
  ## Why first
46
57
  Highest leverage improvement.
47
58
 
48
- **Status:** Completed
59
+ **Status:** Completed
60
+ **Priority:** P1
49
61
 
50
62
  Most failures are not “can’t act,” they’re:
51
63
  - uncertain state
52
64
  - weak verification
53
65
  - retry loops caused by inference
54
66
 
55
- ## Deliver
67
+ ## Scope
56
68
  - Direct readable control values
57
69
  - Expanded `expect_*` verification
58
70
  - Move from inference to state introspection
@@ -60,7 +72,7 @@ Most failures are not “can’t act,” they’re:
60
72
  ## Expected Impact
61
73
  Very high.
62
74
 
63
- ## Done Criteria
75
+ ## Exit Criteria
64
76
  - Control state readable for core widgets (toggle, slider, input, dropdown)
65
77
  - New expect_* state verifiers implemented
66
78
  - Agents can verify state without visual inference in representative flows
@@ -79,19 +91,20 @@ Blocks or strengthens:
79
91
 
80
92
  ---
81
93
 
82
- # Priority 2 — Richer Element Identity
94
+ # Richer Element Identity
83
95
 
84
96
  ## Why second
85
97
  Directly reduces selector brittleness.
86
98
 
87
- **Status:** Completed
99
+ **Status:** Completed
100
+ **Priority:** P2
88
101
 
89
102
  Improves:
90
103
  - targeting stability
91
104
  - repeatability
92
105
  - agent confidence
93
106
 
94
- ## Deliver
107
+ ## Scope
95
108
  - Stable IDs / test tags prioritization
96
109
  - Selector confidence metadata
97
110
  - Preferred selector hierarchy
@@ -99,7 +112,7 @@ Improves:
99
112
  ## Expected Impact
100
113
  Very high.
101
114
 
102
- ## Done Criteria
115
+ ## Exit Criteria
103
116
  - Stable selector preference order implemented
104
117
  - Test tags/resource IDs surfaced where available
105
118
  - Selector confidence metadata available
@@ -118,18 +131,21 @@ Blocks or strengthens:
118
131
 
119
132
  ---
120
133
 
121
- # Priority 3 — Wait and Synchronization Reliability
134
+ # Wait and Synchronization Reliability
122
135
 
123
136
  ## Why third
124
137
  Reliable async synchronization is foundational for agent success and should precede gesture expansion.
125
138
 
139
+ **Status:** Spec Ready
140
+ **Priority:** P3
141
+
126
142
  Addresses failures where agents:
127
143
  - skip UI waits after actions
128
144
  - rely on network/log signals too early
129
145
  - struggle with in-place UI updates
130
146
  - misread stale UI snapshots
131
147
 
132
- ## Deliver
148
+ ## Scope
133
149
  - UI-first synchronization policy guidance
134
150
  - wait_for_ui_change (hierarchy diff based waiting)
135
151
  - Structured loading state detection
@@ -139,7 +155,7 @@ Addresses failures where agents:
139
155
  ## Expected Impact
140
156
  Very high.
141
157
 
142
- ## Done Criteria
158
+ ## Exit Criteria
143
159
  - wait_for_ui_change implemented
144
160
  - Loading state detection available for representative controls
145
161
  - Snapshot revision or staleness metadata exposed
@@ -163,11 +179,14 @@ Blocks or strengthens:
163
179
 
164
180
  ---
165
181
 
166
- # Priority 4 — Long Press Gesture
182
+ # Long Press Gesture
167
183
 
168
184
  ## Why fourth
169
185
  High utility, relatively low complexity.
170
186
 
187
+ **Status:** Planned
188
+ **Priority:** P4
189
+
171
190
  Unlocks many currently awkward interactions:
172
191
 
173
192
  - context menus
@@ -177,7 +196,7 @@ Unlocks many currently awkward interactions:
177
196
 
178
197
  Broad usefulness.
179
198
 
180
- ## Deliver
199
+ ## Scope
181
200
  New tool:
182
201
 
183
202
  ```json
@@ -191,7 +210,7 @@ Verification alignment:
191
210
  ## Expected Impact
192
211
  High.
193
212
 
194
- ## Done Criteria
213
+ ## Exit Criteria
195
214
  - long_press tool implemented across supported platforms
196
215
  - Duration defaults and overrides supported
197
216
  - Verification patterns for long press outcomes defined
@@ -211,18 +230,21 @@ Strengthens:
211
230
 
212
231
  ---
213
232
 
214
- # Priority 5 — Better Compose / Custom Control Semantics
233
+ # Better Compose / Custom Control Semantics
215
234
 
216
235
  ## Why fifth
217
236
  Important, but strengthened by priorities 1–4 first.
218
237
 
238
+ **Status:** Planned
239
+ **Priority:** P5
240
+
219
241
  Semantics become more useful once:
220
242
  - identity is stronger
221
243
  - verification is stronger
222
244
  - gestures are richer
223
245
  - synchronization is more reliable
224
246
 
225
- ## Deliver
247
+ ## Scope
226
248
  - Composite control traits
227
249
  - Control role enrichment (adjustable, expandable, selectable_group)
228
250
  - Interaction contracts metadata
@@ -233,7 +255,7 @@ Semantics become more useful once:
233
255
  ## Expected Impact
234
256
  High.
235
257
 
236
- ## Done Criteria
258
+ ## Exit Criteria
237
259
  - Semantic traits implemented for major custom control classes
238
260
  - Interaction contracts surfaced in snapshot model
239
261
  - Confidence model defined for derived semantics
@@ -253,11 +275,14 @@ Depends on:
253
275
 
254
276
  ---
255
277
 
256
- # Priority 6 — Pinch to Zoom
278
+ # Pinch to Zoom
257
279
 
258
280
  ## Why sixth
259
281
  Valuable, but narrower than long press.
260
282
 
283
+ **Status:** Planned
284
+ **Priority:** P6
285
+
261
286
  Applies mainly to:
262
287
  - maps
263
288
  - images
@@ -266,7 +291,7 @@ Applies mainly to:
266
291
 
267
292
  Useful, but less universal.
268
293
 
269
- ## Deliver
294
+ ## Scope
270
295
 
271
296
  ```json
272
297
  pinch_to_zoom(target, scale, center?)
@@ -279,7 +304,7 @@ Verification:
279
304
  ## Expected Impact
280
305
  Medium-high.
281
306
 
282
- ## Done Criteria
307
+ ## Exit Criteria
283
308
  - pinch_to_zoom implemented
284
309
  - Zoom in/out flows supported
285
310
  - Verification primitives for viewport or zoom state available
@@ -297,22 +322,25 @@ Depends on:
297
322
 
298
323
  ---
299
324
 
300
- # Priority 7 — Action Trace Correlation
325
+ # Action Trace Correlation
301
326
 
302
327
  ## Why seventh
303
328
  Very valuable for debugging,
304
329
  but less critical than improving control success first.
305
330
 
331
+ **Status:** Planned
332
+ **Priority:** P7
333
+
306
334
  Improves diagnosis more than task completion.
307
335
 
308
- ## Deliver
336
+ ## Scope
309
337
  - Action correlation metadata
310
338
  - UI/network/log linkage
311
339
 
312
340
  ## Expected Impact
313
341
  Medium-high.
314
342
 
315
- ## Done Criteria
343
+ ## Exit Criteria
316
344
  - Action correlation model defined
317
345
  - UI/network/log linkage captured for representative actions
318
346
  - Correlation metadata exposed to agents
@@ -331,7 +359,7 @@ Depends on:
331
359
 
332
360
  ---
333
361
 
334
- # Delivery Waves
362
+ # Roadmap Sequence
335
363
 
336
364
  ## Dependency Summary
337
365
  Foundational sequence:
@@ -351,7 +379,7 @@ Layer 3 (Interaction Expansion)
351
379
  Layer 4 (Observability)
352
380
  - Priority 7 depends on 1,2,3
353
381
 
354
- ## Wave 1 (Immediate)
382
+ ## Wave 1 (Current Focus)
355
383
  - Stronger State Verification
356
384
  - Richer Element Identity
357
385
  - Wait and Synchronization Reliability
@@ -361,7 +389,7 @@ Make core loop more reliable.
361
389
 
362
390
  ---
363
391
 
364
- ## Wave 2
392
+ ## Wave 2 (Expansion)
365
393
  - Long Press
366
394
  - Better Compose Semantics
367
395
 
@@ -370,7 +398,7 @@ Expand interaction capability.
370
398
 
371
399
  ---
372
400
 
373
- ## Wave 3
401
+ ## Wave 3 (Advanced)
374
402
  - Pinch to Zoom
375
403
  - Action Trace Correlation
376
404
 
@@ -379,7 +407,7 @@ Advanced gestures + observability.
379
407
 
380
408
  ---
381
409
 
382
- # Priority Stack Summary
410
+ # Capability Sequence
383
411
 
384
412
  Execution Order:
385
413
  1. Stronger State Verification
@@ -397,7 +425,7 @@ Rationale:
397
425
 
398
426
  ---
399
427
 
400
- ## Explicitly Deferred
428
+ ## Future Considerations
401
429
  Still out of scope:
402
430
 
403
431
  - Recovery planning logic
@@ -0,0 +1,342 @@
1
+
2
+
3
+ # RFC 004: Verification Routing for Local and Side-Effect Actions
4
+
5
+ ## Status
6
+ Draft
7
+
8
+ ## Summary
9
+
10
+ This RFC corrects a specification flaw in action verification routing where agents may treat lack of obvious UI change as a trigger to inspect network activity by default.
11
+
12
+ The current fallback can cause unnecessary network calls during purely local UI interactions (for example sliders, pickers, toggles, text entry), creating noise and reinforcing incorrect agent behavior.
13
+
14
+ This RFC separates:
15
+ - action verification
16
+ - failure diagnosis
17
+ - backend signal inspection
18
+
19
+ And introduces context-aware routing based on action type.
20
+
21
+ ## Motivation
22
+
23
+ Observed agent sessions showed `get_network_activity` being invoked during local UI manipulation solely because an action produced no coarse-grained UI diff.
24
+
25
+ Current implicit reasoning resembles:
26
+
27
+ ```text
28
+ if uiChanged == false:
29
+ inspect network activity
30
+ ```
31
+
32
+ This is overly broad.
33
+
34
+ For many interactions, absence of obvious snapshot change does not imply backend ambiguity. It often means verification used the wrong signals.
35
+
36
+ Examples:
37
+ - Slider value changed but tree structure did not.
38
+ - Picker selection updated in-place.
39
+ - Toggle changed checked state only.
40
+ - Text field value changed without large snapshot delta.
41
+ - Tab or accordion state changed through selection metadata.
42
+
43
+ In these cases network inspection is diagnostic noise, not evidence.
44
+
45
+ ## Problem Statement
46
+
47
+ The current model conflates:
48
+
49
+ 1. Verifying whether an action succeeded.
50
+ 2. Diagnosing why an action may have failed.
51
+
52
+ These are distinct phases.
53
+
54
+ As a result:
55
+ - agents overuse network inspection
56
+ - verification costs increase
57
+ - local-state actions are treated as ambiguous too often
58
+ - network hints can be elevated beyond their intended role
59
+
60
+ ## Goals
61
+
62
+ This RFC:
63
+ - Prevents default network fallbacks for local-state actions.
64
+ - Makes verification primarily state-driven.
65
+ - Restricts network activity inspection to side-effect actions where ambiguity remains.
66
+ - Refines `classify_action_outcome` decision routing.
67
+
68
+ ## Non-Goals
69
+
70
+ This RFC does not:
71
+ - change raw snapshot precedence (raw remains authoritative)
72
+ - redefine expect_* ownership of verification
73
+ - make network activity mandatory evidence
74
+ - expand semantic hints into executable truth
75
+
76
+ ## Action Categories
77
+
78
+ ### Category A: Local-State Actions
79
+
80
+ Actions expected to modify client-side UI state.
81
+
82
+ Examples:
83
+ - tap toggle
84
+ - drag slider
85
+ - picker selection
86
+ - text entry
87
+ - scrolling
88
+ - tab switching
89
+ - expand/collapse
90
+ - local navigation controls
91
+
92
+ ### Category B: Side-Effect Actions
93
+
94
+ Actions that may trigger backend or asynchronous side effects.
95
+
96
+ Examples:
97
+ - submit
98
+ - save
99
+ - sync
100
+ - search
101
+ - refresh
102
+ - login
103
+ - purchase flows
104
+
105
+ ## Action Classification Source of Truth
106
+
107
+ ## Action Type Emission (Runtime Contract)
108
+
109
+ `action_type` MUST be emitted by the runtime layer that produces or executes actions. It is not inferred by the agent.
110
+
111
+ There are three valid sources of truth, in order of precedence:
112
+
113
+ ### 1. Tool Schema Annotation (preferred)
114
+
115
+ If the action originates from a tool invocation, `action_type` MUST be defined in the tool’s schema definition.
116
+
117
+ Example:
118
+
119
+ ```json
120
+ {
121
+ "name": "toggle_switch",
122
+ "action_type": "local_state"
123
+ }
124
+ ```
125
+
126
+ or
127
+
128
+ ```json
129
+ {
130
+ "name": "submit_form",
131
+ "action_type": "side_effect"
132
+ }
133
+ ```
134
+
135
+ This is the canonical source.
136
+
137
+ ### 2. Handler Output (runtime execution layer)
138
+
139
+ If tool schema does not define `action_type`, the runtime handler that executes the action MUST attach it before returning the action result.
140
+
141
+ Example:
142
+
143
+ ```json
144
+ {
145
+ "action": "click",
146
+ "target": "save_button",
147
+ "action_type": "side_effect"
148
+ }
149
+ ```
150
+
151
+ This is valid only when schema-level annotation is absent.
152
+
153
+ ### 3. Fallback Mapping Table (last resort, deterministic only)
154
+
155
+ If neither schema nor handler provides `action_type`, the system MUST use a deterministic mapping table maintained by the runtime.
156
+
157
+ This table MUST be:
158
+ - static (no runtime inference)
159
+ - versioned
160
+ - explicitly defined in implementation
161
+
162
+ Example mapping:
163
+
164
+ | action | action_type |
165
+ |--------|------------|
166
+ | tap_toggle | local_state |
167
+ | enter_text | local_state |
168
+ | submit | side_effect |
169
+ | refresh | side_effect |
170
+
171
+ If an action is not in the table, it MUST default to:
172
+
173
+ ```
174
+ side_effect
175
+ ```
176
+
177
+ ### Hard Constraint
178
+
179
+ Agents MUST NOT infer or override `action_type` based on UI state changes, snapshot diffs, or network activity.
180
+
181
+ ### Normative Interpretation
182
+
183
+ `action_type` is part of the execution contract, not the reasoning layer.
184
+
185
+ Action type MUST be explicitly defined by the action schema or tool output.
186
+
187
+ Valid values:
188
+ - local_state
189
+ - side_effect
190
+
191
+ Agents MUST NOT infer action type from UI changes.
192
+
193
+ If action type is missing, agents MUST treat it as side_effect only if backend interaction is plausible; otherwise classify as local_state.
194
+
195
+ ## Revised Verification Routing
196
+
197
+ ### For Local-State Actions
198
+
199
+ Verification priority:
200
+
201
+ 1. Expected state assertions
202
+ 2. Refreshed snapshot comparison
203
+ 3. Element property checks
204
+ 4. Targeted expect_* verification
205
+
206
+ Signals may include:
207
+ - value changes
208
+ - selected state
209
+ - checked state
210
+ - focus changes
211
+ - labels/text
212
+ - enabled/disabled transitions
213
+ - position/state metadata
214
+
215
+ Network activity should not be used as default fallback.
216
+
217
+ ## For Side-Effect Actions
218
+
219
+ Verification priority:
220
+
221
+ 1. Expected UI/state verification first
222
+ 2. Retry richer local verification if ambiguous
223
+ 3. Only then optionally inspect network or log signals
224
+
225
+ Network signals are supporting hints, not primary proof of success.
226
+
227
+ ## Decision Logic Update
228
+
229
+ Replace implied logic:
230
+
231
+ ```text
232
+ if uiChanged == false:
233
+ get_network_activity()
234
+ ```
235
+
236
+ With:
237
+
238
+ ```text
239
+ if expected_state_verified:
240
+ success
241
+
242
+ elif action_type == local_state:
243
+ retry using richer state verification
244
+
245
+ elif action_type == side_effect and ambiguity_remains:
246
+ optionally inspect network activity
247
+
248
+ else:
249
+ inconclusive
250
+ ```
251
+
252
+ ## Definition of Ambiguity
253
+
254
+ Ambiguity exists only when:
255
+
256
+ - expected state cannot be evaluated from UI snapshot, AND
257
+ - no single deterministic state predicate can be computed from UI fields
258
+
259
+ Ambiguity does NOT include:
260
+ - absence of visual diff
261
+ - absence of network activity
262
+ - lack of large UI tree changes
263
+
264
+ ## Normative Rules
265
+
266
+ ### Rule 1
267
+
268
+ Agents MUST NOT use network activity inspection as a default fallback for local-state actions solely because coarse UI diffs are absent.
269
+
270
+ ### Rule 2
271
+
272
+ Agents MUST prefer explicit state verification over backend diagnostics whenever the action is expected to be locally observable.
273
+
274
+ ### Rule 3
275
+
276
+ Network activity MAY be consulted only when:
277
+ - the action plausibly triggers backend work, and
278
+ - local verification remains ambiguous under the defined ambiguity criteria.
279
+
280
+ ### Rule 4
281
+
282
+ Network activity evidence MUST be treated as auxiliary signal, not authoritative proof of action success.
283
+
284
+ ## Unified Diagnostic Signals
285
+
286
+ Network activity and log inspection are equivalent diagnostic signals.
287
+
288
+ Both:
289
+ - are secondary to UI state verification
290
+ - MUST NOT be used as default fallback for local-state actions
291
+ - follow the same escalation rules defined in this RFC
292
+
293
+ ## Impact on classify_action_outcome
294
+
295
+ `classify_action_outcome` should be interpreted as routing logic, not a mandatory network escalation path.
296
+
297
+ For `uiChanged=false`, action category determines next step.
298
+
299
+ No automatic implication:
300
+
301
+ ```text
302
+ uiChanged=false => inspect network
303
+ ```
304
+
305
+ ## Expected Benefits
306
+
307
+ - Fewer unnecessary tool calls
308
+ - Cleaner verification traces
309
+ - Reduced cargo-cult network probing
310
+ - Better behavior for local UI interactions
311
+ - Stronger separation between verification and diagnosis
312
+ - More reliable agent reasoning
313
+
314
+ ## Compatibility
315
+
316
+ This is a patch-level specification correction.
317
+
318
+ It refines routing semantics but does not break:
319
+ - existing expect_* semantics
320
+ - snapshot response shape
321
+ - raw-over-semantic precedence
322
+ - action execution model
323
+
324
+ ## Implementation Notes
325
+
326
+ Follow-up work may include:
327
+ - prompt updates
328
+ - regression examples for sliders/toggles/pickers
329
+ - protocol examples showing correct routing
330
+ - telemetry on reduced unnecessary network inspections
331
+
332
+ ## Open Questions
333
+
334
+ Questions for review:
335
+
336
+ 1. Should action category be explicitly emitted as runtime metadata, or is heuristic inference acceptable only within the fallback mapping layer defined in the Action Type Emission contract?
337
+ 2. Should side-effect actions permit optional log inspection alongside network hints?
338
+ 3. Should local-state verification examples be added to core spec or examples appendix?
339
+
340
+ ## Decision Requested
341
+
342
+ Adopt verification routing based on action type and remove implicit default escalation from missing UI diffs to network inspection.
@@ -41,7 +41,7 @@ Outcome-specific guidance:
41
41
  - visible navigation expected -> `wait_for_screen_change` (optional) -> `expect_screen`
42
42
  - local UI change expected -> `wait_for_ui` (optional) -> `expect_element_visible`
43
43
  - readable element state expected -> `wait_for_ui` (optional) -> `expect_state`
44
- - backend/API activity expected without a visible UI change -> compare `get_screen_fingerprint` before/after, then call `get_network_activity` immediately after the action and `classify_action_outcome` with the observed requests
44
+ - backend/API activity expected without a visible UI change -> compare `get_screen_fingerprint` before/after, then call `classify_action_outcome` with the runtime `action_type`; collect `get_network_activity` only if the result remains ambiguous
45
45
 
46
46
  For backend/API activity, `wait_for_screen_change` is not the right verification tool unless a visible transition is also expected.
47
47
 
@@ -294,11 +294,11 @@ Tool: `classify_action_outcome`
294
294
 
295
295
  Rules:
296
296
 
297
- - MAY use UI, network, and log signals
297
+ - MAY use UI, action, network, and log signals
298
298
  - MUST be deterministic
299
299
  - MUST NOT replace `expect_*` tools
300
300
  - MUST be treated as a supplementary signal only
301
- - SHOULD be used with `get_network_activity` when the expected outcome is backend/API activity without a visible UI change
301
+ - SHOULD be used with `get_network_activity` only when the outcome is still ambiguous after routing by `action_type`
302
302
 
303
303
  It is not a verification mechanism.
304
304
 
@@ -17,6 +17,7 @@ Important:
17
17
 
18
18
  - `wait_for_*` tools must not be used as the final verification of action success when an applicable `expect_*` tool exists.
19
19
  - action tools report execution success, not outcome correctness.
20
+ - `classify_action_outcome` should receive the runtime `action_type` when you want routing to distinguish local-state and side-effect actions.
20
21
 
21
22
  ## tap / swipe / type_text / press_back
22
23
 
@@ -54,10 +55,10 @@ Preferred verification:
54
55
  - navigation outcome known -> `expect_screen`
55
56
  - local UI change known -> `expect_element_visible`
56
57
  - readable element state known -> `expect_state`
57
- - backend/API activity expected -> `classify_action_outcome` + `get_network_activity`
58
+ - backend/API activity expected -> `classify_action_outcome` + optional `get_network_activity` if the UI signal remains ambiguous
58
59
 
59
- Use `wait_for_screen_change` only when a visible transition is the expected outcome. If a button should trigger an API request but the screen should stay the same, rely on network activity and classification instead.
60
- For backend-only actions, prefer comparing `get_screen_fingerprint` before/after and call `get_network_activity` immediately after the action; do not wait on `wait_for_screen_change` if no visible transition is expected.
60
+ Use `wait_for_screen_change` only when a visible transition is the expected outcome. If a button should trigger an API request but the screen should stay the same, rely on `action_type` plus classification first.
61
+ For backend-only actions, prefer comparing `get_screen_fingerprint` before/after and collect `get_network_activity` immediately after the action only if the result is still ambiguous; do not wait on `wait_for_screen_change` if no visible transition is expected.
61
62
  Use `wait_for_ui_change` when the screen stays in place but visible text or element state should change.
62
63
 
63
64
  ---
@@ -507,17 +508,18 @@ Notes:
507
508
 
508
509
  ## classify_action_outcome + get_network_activity
509
510
 
510
- Use this pair when the action is expected to trigger network/backend work and the screen may not visibly change.
511
+ Use this pair when the action may trigger network/backend work and the screen may not visibly change.
511
512
 
512
513
  Pattern:
513
514
 
514
515
  1. perform the action
515
516
  2. call `classify_action_outcome` with `uiChanged` from `wait_for_screen_change` or a screen fingerprint comparison
516
- 3. if the classifier asks for it, call `get_network_activity`
517
- 4. call `classify_action_outcome` again with `networkRequests`
517
+ 3. pass the runtime `action_type` value as `actionType`
518
+ 4. collect `get_network_activity` only if the action is side-effect oriented and the UI signal remains ambiguous
519
+ 5. call `classify_action_outcome` again with `networkRequests` if you collected them
518
520
 
519
521
  Guidance:
520
522
 
521
523
  - `uiChanged=true` or `expectedElementVisible=true` means the action outcome is already verified
522
- - `nextAction="call_get_network_activity"` means the UI signal was inconclusive and the agent should inspect network activity
523
- - if network requests succeed but the UI stays unchanged, treat the outcome as a backend/API result rather than a screen transition
524
+ - local-state actions should prefer refreshed snapshots, `expect_state`, or `expect_element_visible` over default network inspection
525
+ - network activity is auxiliary evidence, not mandatory proof
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mobile-debug-mcp",
3
- "version": "0.26.0",
3
+ "version": "0.26.1",
4
4
  "description": "MCP server for mobile app debugging (Android + iOS), with focus on security and reliability",
5
5
  "type": "module",
6
6
  "bin": {
@@ -1,5 +1,6 @@
1
1
  export type ActionOutcome = 'success' | 'no_op' | 'backend_failure' | 'ui_failure' | 'unknown'
2
2
  export type NetworkRequestStatus = 'success' | 'failure' | 'retryable'
3
+ export type ActionCategory = 'local_state' | 'side_effect'
3
4
 
4
5
  export interface NetworkRequest {
5
6
  endpoint: string
@@ -9,6 +10,8 @@ export interface NetworkRequest {
9
10
  export interface ClassifyActionOutcomeInput {
10
11
  uiChanged: boolean
11
12
  expectedElementVisible?: boolean | null
13
+ /** Concrete action_type from the runtime action result (for example: tap, type_text, start_app). */
14
+ actionType?: string | null
12
15
  /** null = get_network_activity has not been called yet */
13
16
  networkRequests?: NetworkRequest[] | null
14
17
  hasLogErrors?: boolean | null
@@ -17,8 +20,29 @@ export interface ClassifyActionOutcomeInput {
17
20
  export interface ClassifyActionOutcomeResult {
18
21
  outcome: ActionOutcome
19
22
  reasoning: string
20
- /** Present when the caller must call get_network_activity before a final classification is possible */
21
- nextAction?: 'call_get_network_activity'
23
+ }
24
+
25
+ const ACTION_CATEGORY_BY_TYPE: Record<string, ActionCategory> = {
26
+ tap: 'local_state',
27
+ tap_element: 'local_state',
28
+ swipe: 'local_state',
29
+ scroll_to_element: 'local_state',
30
+ type_text: 'local_state',
31
+ press_back: 'local_state',
32
+ start_app: 'side_effect',
33
+ restart_app: 'side_effect',
34
+ terminate_app: 'side_effect',
35
+ reset_app_data: 'side_effect',
36
+ install_app: 'side_effect',
37
+ build_app: 'side_effect',
38
+ build_and_install: 'side_effect'
39
+ }
40
+
41
+ function inferActionCategory(actionType?: string | null): ActionCategory | null {
42
+ if (typeof actionType !== 'string') return null
43
+ const normalized = actionType.trim().toLowerCase()
44
+ if (!normalized) return null
45
+ return ACTION_CATEGORY_BY_TYPE[normalized] ?? 'side_effect'
22
46
  }
23
47
 
24
48
  /**
@@ -26,39 +50,55 @@ export interface ClassifyActionOutcomeResult {
26
50
  * Same inputs always produce the same output.
27
51
  */
28
52
  export function classifyActionOutcome(input: ClassifyActionOutcomeInput): ClassifyActionOutcomeResult {
29
- const { uiChanged, expectedElementVisible, networkRequests, hasLogErrors } = input
53
+ const { uiChanged, expectedElementVisible, actionType, networkRequests, hasLogErrors } = input
54
+ const actionCategory = inferActionCategory(actionType)
30
55
 
31
56
  // Step 1 — UI signal is positive
32
57
  if (uiChanged || expectedElementVisible === true) {
33
58
  return { outcome: 'success', reasoning: expectedElementVisible === true ? 'expected element is visible' : 'UI changed after action' }
34
59
  }
35
60
 
36
- // Step 2 — UI did not change; network signal is required
37
- if (networkRequests === null || networkRequests === undefined) {
61
+ // Step 2 — no action type means we cannot choose a safe routing path
62
+ if (actionCategory === null) {
38
63
  return {
39
64
  outcome: 'unknown',
40
- reasoning: 'UI did not change; get_network_activity must be called before classification can proceed',
41
- nextAction: 'call_get_network_activity'
65
+ reasoning: 'actionType was not supplied; pass the runtime action_type so the classifier can distinguish local-state and side-effect routing'
42
66
  }
43
67
  }
44
68
 
45
- // Step 3 any network failure
46
- const failedRequest = networkRequests.find((r) => r.status === 'failure' || r.status === 'retryable')
69
+ const failedRequest = networkRequests?.find((r) => r.status === 'failure' || r.status === 'retryable')
47
70
  if (failedRequest) {
48
71
  return { outcome: 'backend_failure', reasoning: `network request ${failedRequest.endpoint} returned ${failedRequest.status}` }
49
72
  }
50
73
 
51
- // Step 4no network requests at all
74
+ // Step 3local-state actions should be verified with state-specific signals first
75
+ if (actionCategory === 'local_state') {
76
+ const logNote = hasLogErrors ? ' (log errors present)' : ''
77
+ return {
78
+ outcome: 'no_op',
79
+ reasoning: `local-state action${logNote}; use expect_state, refreshed snapshot comparison, or expect_element_visible instead of defaulting to network inspection`
80
+ }
81
+ }
82
+
83
+ // Step 4 — side-effect actions may legitimately need network or log inspection
84
+ if (networkRequests === null || networkRequests === undefined) {
85
+ return {
86
+ outcome: 'unknown',
87
+ reasoning: 'side-effect action without network data; inspect network or log signals only if the outcome is still ambiguous'
88
+ }
89
+ }
90
+
91
+ // Step 5 — no network requests at all
52
92
  if (networkRequests.length === 0) {
53
93
  const logNote = hasLogErrors ? ' (log errors present)' : ''
54
- return { outcome: 'no_op', reasoning: `no UI change and no network activity${logNote}` }
94
+ return { outcome: 'no_op', reasoning: `side-effect action and no network activity${logNote}` }
55
95
  }
56
96
 
57
- // Step 5 — network requests exist and all succeeded
97
+ // Step 6 — network requests exist and all succeeded
58
98
  if (networkRequests.every((r) => r.status === 'success')) {
59
99
  return { outcome: 'ui_failure', reasoning: 'network requests succeeded but UI did not change' }
60
100
  }
61
101
 
62
- // Step 6 — fallback
102
+ // Step 7 — fallback
63
103
  return { outcome: 'unknown', reasoning: 'signals are inconclusive' }
64
104
  }
@@ -344,7 +344,7 @@ Capabilities:
344
344
  Constraints:
345
345
  - Does not verify correctness of the resulting state
346
346
  - Must not be used alone to confirm action success when an applicable expect_* tool exists
347
- - Use classify_action_outcome + get_network_activity when the expected outcome is backend/API activity without a visible UI change
347
+ - For backend/API activity without a visible UI change, pass the runtime action_type into classify_action_outcome and collect network evidence only if the result remains ambiguous
348
348
 
349
349
  Recommended Usage:
350
350
  1. Capture or define the expected outcome
@@ -918,26 +918,29 @@ Failure Handling:
918
918
  name: 'classify_action_outcome',
919
919
  description: `Classify the outcome of the most recent action into exactly one of: success, no_op, backend_failure, ui_failure, unknown.
920
920
 
921
- MUST be called after every action (tap, swipe, type_text, press_back, start_app, etc). Never skip.
922
- Use this with get_network_activity when the expected outcome is backend/API activity without a visible UI change.
923
- For backend/API activity, compare get_screen_fingerprint before and after the action and call get_network_activity immediately after the action instead of waiting for wait_for_screen_change.
921
+ Use the runtime action result's \`action_type\` as \`actionType\` so the classifier can distinguish local-state actions from side-effect actions.
922
+ Use this when the intended outcome is not already fully verified by the UI signal alone.
923
+ For backend/API activity, compare get_screen_fingerprint before and after the action and call get_network_activity immediately after the action if the outcome is still ambiguous.
924
924
 
925
925
  HOW TO GATHER INPUTS before calling:
926
926
  1. Call wait_for_screen_change or compare get_screen_fingerprint before/after — set uiChanged accordingly.
927
927
  2. If you checked for a specific element with wait_for_ui, set expectedElementVisible.
928
- 3. Do NOT call get_network_activity yet omit networkRequests on the first call.
928
+ 3. Pass actionType from the action response when available.
929
+ 4. Only provide networkRequests if you already collected them or want to classify a side-effect action with backend evidence.
929
930
 
930
931
  RULES (applied in order — stop at first match):
931
932
  1. If uiChanged=true OR expectedElementVisible=true → outcome=success
932
- 2. Otherwise this tool returns nextAction="call_get_network_activity" — you MUST call get_network_activity once, then call classify_action_outcome again with the results in networkRequests.
933
+ 2. If actionType is missing → outcome=unknown
933
934
  3. If any request has status=failure or retryable → outcome=backend_failure
934
- 4. If no requests returned → outcome=no_op
935
- 5. If all requests succeeded → outcome=ui_failure
936
- 6. Otherwise → outcome=unknown
935
+ 4. If actionType maps to a local-state action → outcome=no_op; prefer state-based verification and avoid default network fallback
936
+ 5. If actionType maps to a side-effect action and no networkRequests were supplied → outcome=unknown
937
+ 6. If no requests returned → outcome=no_op
938
+ 7. If all requests succeeded → outcome=ui_failure
939
+ 8. Otherwise → outcome=unknown
937
940
 
938
941
  BEHAVIOUR after outcome:
939
942
  - success → continue
940
- - no_op → retry the action once or re-resolve the element
943
+ - no_op → retry with richer state verification or re-resolve the element
941
944
  - backend_failure → stop and report the failing endpoint
942
945
  - ui_failure → stop and report failure
943
946
  - unknown → take one recovery step (e.g. capture_debug_snapshot), then stop`,
@@ -952,9 +955,13 @@ BEHAVIOUR after outcome:
952
955
  type: 'boolean',
953
956
  description: 'true if the element you expected to appear is now visible (from wait_for_ui). Omit if you did not check for a specific element.'
954
957
  },
958
+ actionType: {
959
+ type: 'string',
960
+ description: 'The runtime action_type from the action response (for example tap, tap_element, swipe, type_text, press_back, start_app).'
961
+ },
955
962
  networkRequests: {
956
963
  type: 'array',
957
- description: 'Pass this only after calling get_network_activity as instructed by nextAction. Also use it when the expected outcome is backend/API activity without a visible UI change.',
964
+ description: 'Optional network evidence collected after the action. Use it when the expected outcome is backend/API activity or when the UI signal is ambiguous.',
958
965
  items: {
959
966
  type: 'object',
960
967
  properties: {
@@ -976,7 +983,7 @@ BEHAVIOUR after outcome:
976
983
  name: 'get_network_activity',
977
984
  description: `Returns structured network events captured from platform logs since the last action.
978
985
 
979
- Call this when classify_action_outcome returns nextAction="call_get_network_activity" or immediately after an action whose expected outcome is backend/API activity without a visible UI change.
986
+ Call this immediately after an action when you want backend evidence for a side-effect flow, only if the result is still ambiguous.
980
987
  Do not call more than once per action.
981
988
 
982
989
  Events are filtered to significant (non-background) requests only.
@@ -448,11 +448,13 @@ async function handleStopLogStream(args: ToolCallArgs) {
448
448
  function handleClassifyActionOutcome(args: ToolCallArgs) {
449
449
  const uiChanged = requireBooleanArg(args, 'uiChanged')
450
450
  const expectedElementVisible = getBooleanArg(args, 'expectedElementVisible')
451
+ const actionType = getStringArg(args, 'actionType')
451
452
  const networkRequests = getArrayArg<ClassifyNetworkRequestArg>(args, 'networkRequests')
452
453
  const hasLogErrors = getBooleanArg(args, 'hasLogErrors')
453
454
  const result = classifyActionOutcome({
454
455
  uiChanged,
455
456
  expectedElementVisible: expectedElementVisible ?? null,
457
+ actionType: actionType ?? null,
456
458
  networkRequests: networkRequests ?? null,
457
459
  hasLogErrors: hasLogErrors ?? null
458
460
  })
@@ -13,7 +13,7 @@ export { wrapResponse, toolDefinitions, handleToolCall }
13
13
 
14
14
  export const serverInfo = {
15
15
  name: 'mobile-debug-mcp',
16
- version: '0.26.0'
16
+ version: '0.26.1'
17
17
  }
18
18
 
19
19
  export function createServer() {
@@ -7,7 +7,6 @@ function run() {
7
7
  const result = classifyActionOutcome({ uiChanged: true })
8
8
  assert.strictEqual(result.outcome, 'success')
9
9
  assert.ok(result.reasoning.length > 0)
10
- assert.strictEqual(result.nextAction, undefined)
11
10
  }
12
11
 
13
12
  // Step 1 — expectedElementVisible → success
@@ -15,7 +14,6 @@ function run() {
15
14
  const result = classifyActionOutcome({ uiChanged: false, expectedElementVisible: true })
16
15
  assert.strictEqual(result.outcome, 'success')
17
16
  assert.strictEqual(result.reasoning, 'expected element is visible')
18
- assert.strictEqual(result.nextAction, undefined)
19
17
  }
20
18
 
21
19
  // Step 1 — both uiChanged and expectedElementVisible → success
@@ -24,24 +22,50 @@ function run() {
24
22
  assert.strictEqual(result.outcome, 'success')
25
23
  }
26
24
 
27
- // Step 2 UI did not change, networkRequests not yet provided nextAction required
25
+ // No actionType suppliedunknown
28
26
  {
29
27
  const result = classifyActionOutcome({ uiChanged: false })
30
28
  assert.strictEqual(result.outcome, 'unknown')
31
- assert.strictEqual(result.nextAction, 'call_get_network_activity')
29
+ assert.ok(result.reasoning.includes('actionType was not supplied'))
32
30
  }
33
31
 
34
- // Step 2 explicit null networkRequests nextAction required
32
+ // Local-state action routes to state verification rather than forced network probing
35
33
  {
36
- const result = classifyActionOutcome({ uiChanged: false, expectedElementVisible: null, networkRequests: null })
34
+ const result = classifyActionOutcome({ uiChanged: false, actionType: 'tap' })
35
+ assert.strictEqual(result.outcome, 'no_op')
36
+ assert.ok(result.reasoning.includes('local-state action'))
37
+ }
38
+
39
+ // Local-state action with network data still prefers local-state semantics
40
+ {
41
+ const result = classifyActionOutcome({
42
+ uiChanged: false,
43
+ actionType: 'type_text',
44
+ networkRequests: []
45
+ })
46
+ assert.strictEqual(result.outcome, 'no_op')
47
+ assert.ok(result.reasoning.includes('local-state action'))
48
+ }
49
+
50
+ // Explicit side-effect action without networkRequests supplied → unknown
51
+ {
52
+ const result = classifyActionOutcome({ uiChanged: false, actionType: 'start_app' })
37
53
  assert.strictEqual(result.outcome, 'unknown')
38
- assert.strictEqual(result.nextAction, 'call_get_network_activity')
54
+ assert.ok(result.reasoning.includes('side-effect action'))
55
+ }
56
+
57
+ // Side-effect action with empty networkRequests → no_op
58
+ {
59
+ const result = classifyActionOutcome({ uiChanged: false, actionType: 'start_app', networkRequests: [] })
60
+ assert.strictEqual(result.outcome, 'no_op')
61
+ assert.ok(result.reasoning.includes('side-effect action'))
39
62
  }
40
63
 
41
- // Step 3 — failure status → backend_failure
64
+ // Network failure → backend_failure
42
65
  {
43
66
  const result = classifyActionOutcome({
44
67
  uiChanged: false,
68
+ actionType: 'start_app',
45
69
  networkRequests: [{ endpoint: '/login', status: 'failure' }]
46
70
  })
47
71
  assert.strictEqual(result.outcome, 'backend_failure')
@@ -49,10 +73,11 @@ function run() {
49
73
  assert.ok(result.reasoning.includes('failure'))
50
74
  }
51
75
 
52
- // Step 3 — retryable status → backend_failure
76
+ // Retryable status → backend_failure
53
77
  {
54
78
  const result = classifyActionOutcome({
55
79
  uiChanged: false,
80
+ actionType: 'start_app',
56
81
  networkRequests: [
57
82
  { endpoint: '/api/submit', status: 'retryable' },
58
83
  { endpoint: '/api/other', status: 'success' }
@@ -62,25 +87,11 @@ function run() {
62
87
  assert.ok(result.reasoning.includes('/api/submit'))
63
88
  }
64
89
 
65
- // Step 4 empty network requestsno_op
66
- {
67
- const result = classifyActionOutcome({ uiChanged: false, networkRequests: [] })
68
- assert.strictEqual(result.outcome, 'no_op')
69
- assert.ok(result.reasoning.includes('no UI change'))
70
- assert.ok(result.reasoning.includes('no network activity'))
71
- }
72
-
73
- // Step 4 — empty network requests with log errors → no_op with note
74
- {
75
- const result = classifyActionOutcome({ uiChanged: false, networkRequests: [], hasLogErrors: true })
76
- assert.strictEqual(result.outcome, 'no_op')
77
- assert.ok(result.reasoning.includes('log errors'))
78
- }
79
-
80
- // Step 5 — all requests succeeded but UI unchanged → ui_failure
90
+ // All requests succeeded and UI stayed unchanged ui_failure
81
91
  {
82
92
  const result = classifyActionOutcome({
83
93
  uiChanged: false,
94
+ actionType: 'start_app',
84
95
  networkRequests: [
85
96
  { endpoint: '/api/save', status: 'success' },
86
97
  { endpoint: '/api/refresh', status: 'success' }
@@ -90,10 +101,18 @@ function run() {
90
101
  assert.ok(result.reasoning.includes('network requests succeeded'))
91
102
  }
92
103
 
104
+ // Empty network requests with log errors → no_op with note
105
+ {
106
+ const result = classifyActionOutcome({ uiChanged: false, actionType: 'start_app', networkRequests: [], hasLogErrors: true })
107
+ assert.strictEqual(result.outcome, 'no_op')
108
+ assert.ok(result.reasoning.includes('log errors'))
109
+ }
110
+
93
111
  // Step 1 takes priority over network signals — success even when failures present
94
112
  {
95
113
  const result = classifyActionOutcome({
96
114
  uiChanged: true,
115
+ actionType: 'start_app',
97
116
  networkRequests: [{ endpoint: '/api/log', status: 'failure' }]
98
117
  })
99
118
  assert.strictEqual(result.outcome, 'success')
@@ -68,15 +68,17 @@ async function run() {
68
68
 
69
69
  const classifyActionOutcome = toolDefinitions.find((tool) => tool.name === 'classify_action_outcome')
70
70
  assert(classifyActionOutcome, 'classify_action_outcome should be registered')
71
- assert.match((classifyActionOutcome as any).description, /backend\/API activity without a visible UI change/i)
72
- assert.match((classifyActionOutcome as any).description, /get_network_activity/i)
73
- assert.match((classifyActionOutcome as any).description, /immediately after the action/i)
71
+ assert.match((classifyActionOutcome as any).description, /action_type/i)
72
+ assert.match((classifyActionOutcome as any).description, /local-state/i)
73
+ assert.match((classifyActionOutcome as any).description, /side-effect/i)
74
+ assert.strictEqual((classifyActionOutcome as any).inputSchema.properties.actionType.type, 'string')
75
+ assert.match((classifyActionOutcome as any).inputSchema.properties.networkRequests.description, /optional network evidence/i)
74
76
 
75
77
  const getNetworkActivity = toolDefinitions.find((tool) => tool.name === 'get_network_activity')
76
78
  assert(getNetworkActivity, 'get_network_activity should be registered')
77
- assert.match((getNetworkActivity as any).description, /backend\/API activity without a visible UI change/i)
78
- assert.doesNotMatch((getNetworkActivity as any).description, /Call this only when/i)
79
- assert.match((getNetworkActivity as any).description, /immediately after an action/i)
79
+ assert.match((getNetworkActivity as any).description, /side-effect/i)
80
+ assert.doesNotMatch((getNetworkActivity as any).description, /nextAction/i)
81
+ assert.match((getNetworkActivity as any).description, /only if the result is still ambiguous/i)
80
82
 
81
83
  await assert.rejects(() => handleToolCall('unknown_tool'), /Unknown tool: unknown_tool/)
82
84