npm - mobile-debug-mcp - Versions diffs - 0.26.0 → 0.26.1 - Mend

mobile-debug-mcp 0.26.0 → 0.26.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/dist/interact/classify.js +48 -11
package/dist/server/tool-definitions.js +19 -12
package/dist/server/tool-handlers.js +2 -0
package/dist/server-core.js +1 -1
package/docs/CHANGELOG.md +3 -0
package/docs/ROADMAP.md +66 -38
package/docs/rfcs/004-action-verification-routing.md +342 -0
package/docs/specs/mcp-tooling-spec-v1.md +3 -3
package/docs/tools/interact.md +10 -8
package/package.json +1 -1
package/src/interact/classify.ts +53 -13
package/src/server/tool-definitions.ts +19 -12
package/src/server/tool-handlers.ts +2 -0
package/src/server-core.ts +1 -1
package/test/unit/interact/classify_action_outcome.test.ts +44 -25
package/test/unit/server/contract.test.ts +8 -6

package/dist/interact/classify.js CHANGED Viewed

@@ -1,35 +1,72 @@
+const ACTION_CATEGORY_BY_TYPE = {
+    tap: 'local_state',
+    tap_element: 'local_state',
+    swipe: 'local_state',
+    scroll_to_element: 'local_state',
+    type_text: 'local_state',
+    press_back: 'local_state',
+    start_app: 'side_effect',
+    restart_app: 'side_effect',
+    terminate_app: 'side_effect',
+    reset_app_data: 'side_effect',
+    install_app: 'side_effect',
+    build_app: 'side_effect',
+    build_and_install: 'side_effect'
+};
+function inferActionCategory(actionType) {
+    if (typeof actionType !== 'string')
+        return null;
+    const normalized = actionType.trim().toLowerCase();
+    if (!normalized)
+        return null;
+    return ACTION_CATEGORY_BY_TYPE[normalized] ?? 'side_effect';
+}
 /**
  * Pure deterministic classifier. Applies rules in fixed order.
  * Same inputs always produce the same output.
  */
 export function classifyActionOutcome(input) {
-    const { uiChanged, expectedElementVisible, networkRequests, hasLogErrors } = input;
+    const { uiChanged, expectedElementVisible, actionType, networkRequests, hasLogErrors } = input;
+    const actionCategory = inferActionCategory(actionType);
     // Step 1 — UI signal is positive
     if (uiChanged || expectedElementVisible === true) {
         return { outcome: 'success', reasoning: expectedElementVisible === true ? 'expected element is visible' : 'UI changed after action' };
     }
-    // Step 2 — UI did not change; network signal is required
-    if (networkRequests === null || networkRequests === undefined) {
+    // Step 2 — no action type means we cannot choose a safe routing path
+    if (actionCategory === null) {
         return {
             outcome: 'unknown',
-            reasoning: 'UI did not change; get_network_activity must be called before classification can proceed',
-            nextAction: 'call_get_network_activity'
+            reasoning: 'actionType was not supplied; pass the runtime action_type so the classifier can distinguish local-state and side-effect routing'
         };
     }
-    // Step 3 — any network failure
-    const failedRequest = networkRequests.find((r) => r.status === 'failure' || r.status === 'retryable');
+    const failedRequest = networkRequests?.find((r) => r.status === 'failure' || r.status === 'retryable');
     if (failedRequest) {
         return { outcome: 'backend_failure', reasoning: `network request ${failedRequest.endpoint} returned ${failedRequest.status}` };
     }
-    // Step 4 — no network requests at all
+    // Step 3 — local-state actions should be verified with state-specific signals first
+    if (actionCategory === 'local_state') {
+        const logNote = hasLogErrors ? ' (log errors present)' : '';
+        return {
+            outcome: 'no_op',
+            reasoning: `local-state action${logNote}; use expect_state, refreshed snapshot comparison, or expect_element_visible instead of defaulting to network inspection`
+        };
+    }
+    // Step 4 — side-effect actions may legitimately need network or log inspection
+    if (networkRequests === null || networkRequests === undefined) {
+        return {
+            outcome: 'unknown',
+            reasoning: 'side-effect action without network data; inspect network or log signals only if the outcome is still ambiguous'
+        };
+    }
+    // Step 5 — no network requests at all
     if (networkRequests.length === 0) {
         const logNote = hasLogErrors ? ' (log errors present)' : '';
-        return { outcome: 'no_op', reasoning: `no UI change and no network activity${logNote}` };
+        return { outcome: 'no_op', reasoning: `side-effect action and no network activity${logNote}` };
     }
-    // Step 5 — network requests exist and all succeeded
+    // Step 6 — network requests exist and all succeeded
     if (networkRequests.every((r) => r.status === 'success')) {
         return { outcome: 'ui_failure', reasoning: 'network requests succeeded but UI did not change' };
     }
-    // Step 6 — fallback
+    // Step 7 — fallback
     return { outcome: 'unknown', reasoning: 'signals are inconclusive' };
 }

package/dist/server/tool-definitions.js CHANGED Viewed

@@ -344,7 +344,7 @@ Capabilities:
 Constraints:
 - Does not verify correctness of the resulting state
 - Must not be used alone to confirm action success when an applicable expect_* tool exists
-- Use classify_action_outcome + get_network_activity when the expected outcome is backend/API activity without a visible UI change
+- For backend/API activity without a visible UI change, pass the runtime action_type into classify_action_outcome and collect network evidence only if the result remains ambiguous
 Recommended Usage:
 1. Capture or define the expected outcome
@@ -918,26 +918,29 @@ Failure Handling:
         name: 'classify_action_outcome',
         description: `Classify the outcome of the most recent action into exactly one of: success, no_op, backend_failure, ui_failure, unknown.
-MUST be called after every action (tap, swipe, type_text, press_back, start_app, etc). Never skip.
-Use this with get_network_activity when the expected outcome is backend/API activity without a visible UI change.
-For backend/API activity, compare get_screen_fingerprint before and after the action and call get_network_activity immediately after the action instead of waiting for wait_for_screen_change.
+Use the runtime action result's \`action_type\` as \`actionType\` so the classifier can distinguish local-state actions from side-effect actions.
+Use this when the intended outcome is not already fully verified by the UI signal alone.
+For backend/API activity, compare get_screen_fingerprint before and after the action and call get_network_activity immediately after the action if the outcome is still ambiguous.
 HOW TO GATHER INPUTS before calling:
 1. Call wait_for_screen_change or compare get_screen_fingerprint before/after — set uiChanged accordingly.
 2. If you checked for a specific element with wait_for_ui, set expectedElementVisible.
-3. Do NOT call get_network_activity yet — omit networkRequests on the first call.
+3. Pass actionType from the action response when available.
+4. Only provide networkRequests if you already collected them or want to classify a side-effect action with backend evidence.
 RULES (applied in order — stop at first match):
 1. If uiChanged=true OR expectedElementVisible=true → outcome=success
-2. Otherwise this tool returns nextAction="call_get_network_activity" — you MUST call get_network_activity once, then call classify_action_outcome again with the results in networkRequests.
+2. If actionType is missing → outcome=unknown
 3. If any request has status=failure or retryable → outcome=backend_failure
-4. If no requests returned → outcome=no_op
-5. If all requests succeeded → outcome=ui_failure
-6. Otherwise → outcome=unknown
+4. If actionType maps to a local-state action → outcome=no_op; prefer state-based verification and avoid default network fallback
+5. If actionType maps to a side-effect action and no networkRequests were supplied → outcome=unknown
+6. If no requests returned → outcome=no_op
+7. If all requests succeeded → outcome=ui_failure
+8. Otherwise → outcome=unknown
 BEHAVIOUR after outcome:
 - success → continue
-- no_op → retry the action once or re-resolve the element
+- no_op → retry with richer state verification or re-resolve the element
 - backend_failure → stop and report the failing endpoint
 - ui_failure → stop and report failure
 - unknown → take one recovery step (e.g. capture_debug_snapshot), then stop`,
@@ -952,9 +955,13 @@ BEHAVIOUR after outcome:
                     type: 'boolean',
                     description: 'true if the element you expected to appear is now visible (from wait_for_ui). Omit if you did not check for a specific element.'
                 },
+                actionType: {
+                    type: 'string',
+                    description: 'The runtime action_type from the action response (for example tap, tap_element, swipe, type_text, press_back, start_app).'
+                },
                 networkRequests: {
                     type: 'array',
-                    description: 'Pass this only after calling get_network_activity as instructed by nextAction. Also use it when the expected outcome is backend/API activity without a visible UI change.',
+                    description: 'Optional network evidence collected after the action. Use it when the expected outcome is backend/API activity or when the UI signal is ambiguous.',
                     items: {
                         type: 'object',
                         properties: {
@@ -976,7 +983,7 @@ BEHAVIOUR after outcome:
         name: 'get_network_activity',
         description: `Returns structured network events captured from platform logs since the last action.
-Call this when classify_action_outcome returns nextAction="call_get_network_activity" or immediately after an action whose expected outcome is backend/API activity without a visible UI change.
+Call this immediately after an action when you want backend evidence for a side-effect flow, only if the result is still ambiguous.
 Do not call more than once per action.
 Events are filtered to significant (non-background) requests only.

package/dist/server/tool-handlers.js CHANGED Viewed

@@ -385,11 +385,13 @@ async function handleStopLogStream(args) {
 function handleClassifyActionOutcome(args) {
     const uiChanged = requireBooleanArg(args, 'uiChanged');
     const expectedElementVisible = getBooleanArg(args, 'expectedElementVisible');
+    const actionType = getStringArg(args, 'actionType');
     const networkRequests = getArrayArg(args, 'networkRequests');
     const hasLogErrors = getBooleanArg(args, 'hasLogErrors');
     const result = classifyActionOutcome({
         uiChanged,
         expectedElementVisible: expectedElementVisible ?? null,
+        actionType: actionType ?? null,
         networkRequests: networkRequests ?? null,
         hasLogErrors: hasLogErrors ?? null
     });

package/dist/server-core.js CHANGED Viewed

@@ -6,7 +6,7 @@ import { handleToolCall } from './server/tool-handlers.js';
 export { wrapResponse, toolDefinitions, handleToolCall };
 export const serverInfo = {
     name: 'mobile-debug-mcp',
-    version: '0.26.0'
+    version: '0.26.1'
 };
 export function createServer() {
     const server = new Server(serverInfo, {

package/docs/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,9 @@
 All notable changes to the **Mobile Debug MCP** project will be documented in this file.
+## [0.26.1]
+- Fixed overuse of `get_network_activity`
 ## [0.26.0]
 - RFC-003 wait/synchronization contract with `snapshot_revision`, `captured_at_ms`, and `loading_state`
 - Added `wait_for_ui_change` for stable in-place UI mutations

package/docs/ROADMAP.md CHANGED Viewed

@@ -1,6 +1,6 @@
-# Mobile Debug MCP Prioritized Roadmap
+# Mobile Debug MCP Roadmap
-## Prioritization Criteria
+## Planning Principles
 Ordered by:
@@ -26,33 +26,45 @@ Higher task success with fewer retries.
 ---
-# Completed
+# Roadmap Status Overview
-These priorities are done and kept here for history:
+## Completed Foundations
-- Priority 1 — Stronger State Verification
-- Priority 2 — Richer Element Identity
+| Capability | Status | Notes |
+|-----------|--------|-------|
+| Stronger State Verification | Complete | Foundational verification layer shipped |
+| Richer Element Identity | Complete | Identity and selector confidence foundations shipped |
+## Current Focus
+- Wait and Synchronization Reliability
+## Upcoming Work
+- Long Press Gesture
+- Better Compose / Custom Control Semantics
-Completion notes:
+## Later Horizon
-- State-aware verification is now implemented and wired through the tool surface.
-- Platform-native element metadata and selector-confidence hints are now part of the runtime contract.
+- Pinch to Zoom
+- Action Trace Correlation
 ---
-# Priority 1 — Stronger State Verification
+# Stronger State Verification
 ## Why first
 Highest leverage improvement.
-**Status:** Completed
+**Status:** Completed
+**Priority:** P1
 Most failures are not “can’t act,” they’re:
 - uncertain state
 - weak verification
 - retry loops caused by inference
-## Deliver
+## Scope
 - Direct readable control values
 - Expanded `expect_*` verification
 - Move from inference to state introspection
@@ -60,7 +72,7 @@ Most failures are not “can’t act,” they’re:
 ## Expected Impact
 Very high.
-## Done Criteria
+## Exit Criteria
 - Control state readable for core widgets (toggle, slider, input, dropdown)
 - New expect_* state verifiers implemented
 - Agents can verify state without visual inference in representative flows
@@ -79,19 +91,20 @@ Blocks or strengthens:
 ---
-# Priority 2 — Richer Element Identity
+# Richer Element Identity
 ## Why second
 Directly reduces selector brittleness.
-**Status:** Completed
+**Status:** Completed
+**Priority:** P2
 Improves:
 - targeting stability
 - repeatability
 - agent confidence
-## Deliver
+## Scope
 - Stable IDs / test tags prioritization
 - Selector confidence metadata
 - Preferred selector hierarchy
@@ -99,7 +112,7 @@ Improves:
 ## Expected Impact
 Very high.
-## Done Criteria
+## Exit Criteria
 - Stable selector preference order implemented
 - Test tags/resource IDs surfaced where available
 - Selector confidence metadata available
@@ -118,18 +131,21 @@ Blocks or strengthens:
 ---
-# Priority 3 — Wait and Synchronization Reliability
+# Wait and Synchronization Reliability
 ## Why third
 Reliable async synchronization is foundational for agent success and should precede gesture expansion.
+**Status:** Spec Ready
+**Priority:** P3
 Addresses failures where agents:
 - skip UI waits after actions
 - rely on network/log signals too early
 - struggle with in-place UI updates
 - misread stale UI snapshots
-## Deliver
+## Scope
 - UI-first synchronization policy guidance
 - wait_for_ui_change (hierarchy diff based waiting)
 - Structured loading state detection
@@ -139,7 +155,7 @@ Addresses failures where agents:
 ## Expected Impact
 Very high.
-## Done Criteria
+## Exit Criteria
 - wait_for_ui_change implemented
 - Loading state detection available for representative controls
 - Snapshot revision or staleness metadata exposed
@@ -163,11 +179,14 @@ Blocks or strengthens:
 ---
-# Priority 4 — Long Press Gesture
+# Long Press Gesture
 ## Why fourth
 High utility, relatively low complexity.
+**Status:** Planned
+**Priority:** P4
 Unlocks many currently awkward interactions:
 - context menus
@@ -177,7 +196,7 @@ Unlocks many currently awkward interactions:
 Broad usefulness.
-## Deliver
+## Scope
 New tool:
 ```json
@@ -191,7 +210,7 @@ Verification alignment:
 ## Expected Impact
 High.
-## Done Criteria
+## Exit Criteria
 - long_press tool implemented across supported platforms
 - Duration defaults and overrides supported
 - Verification patterns for long press outcomes defined
@@ -211,18 +230,21 @@ Strengthens:
 ---
-# Priority 5 — Better Compose / Custom Control Semantics
+# Better Compose / Custom Control Semantics
 ## Why fifth
 Important, but strengthened by priorities 1–4 first.
+**Status:** Planned
+**Priority:** P5
 Semantics become more useful once:
 - identity is stronger
 - verification is stronger
 - gestures are richer
 - synchronization is more reliable
-## Deliver
+## Scope
 - Composite control traits
 - Control role enrichment (adjustable, expandable, selectable_group)
 - Interaction contracts metadata
@@ -233,7 +255,7 @@ Semantics become more useful once:
 ## Expected Impact
 High.
-## Done Criteria
+## Exit Criteria
 - Semantic traits implemented for major custom control classes
 - Interaction contracts surfaced in snapshot model
 - Confidence model defined for derived semantics
@@ -253,11 +275,14 @@ Depends on:
 ---
-# Priority 6 — Pinch to Zoom
+# Pinch to Zoom
 ## Why sixth
 Valuable, but narrower than long press.
+**Status:** Planned
+**Priority:** P6
 Applies mainly to:
 - maps
 - images
@@ -266,7 +291,7 @@ Applies mainly to:
 Useful, but less universal.
-## Deliver
+## Scope
 ```json
 pinch_to_zoom(target, scale, center?)
@@ -279,7 +304,7 @@ Verification:
 ## Expected Impact
 Medium-high.
-## Done Criteria
+## Exit Criteria
 - pinch_to_zoom implemented
 - Zoom in/out flows supported
 - Verification primitives for viewport or zoom state available
@@ -297,22 +322,25 @@ Depends on:
 ---
-# Priority 7 — Action Trace Correlation
+# Action Trace Correlation
 ## Why seventh
 Very valuable for debugging,
 but less critical than improving control success first.
+**Status:** Planned
+**Priority:** P7
 Improves diagnosis more than task completion.
-## Deliver
+## Scope
 - Action correlation metadata
 - UI/network/log linkage
 ## Expected Impact
 Medium-high.
-## Done Criteria
+## Exit Criteria
 - Action correlation model defined
 - UI/network/log linkage captured for representative actions
 - Correlation metadata exposed to agents
@@ -331,7 +359,7 @@ Depends on:
 ---
-# Delivery Waves
+# Roadmap Sequence
 ## Dependency Summary
 Foundational sequence:
@@ -351,7 +379,7 @@ Layer 3 (Interaction Expansion)
 Layer 4 (Observability)
 - Priority 7 depends on 1,2,3
-## Wave 1 (Immediate)
+## Wave 1 (Current Focus)
 - Stronger State Verification
 - Richer Element Identity
 - Wait and Synchronization Reliability
@@ -361,7 +389,7 @@ Make core loop more reliable.
 ---
-## Wave 2
+## Wave 2 (Expansion)
 - Long Press
 - Better Compose Semantics
@@ -370,7 +398,7 @@ Expand interaction capability.
 ---
-## Wave 3
+## Wave 3 (Advanced)
 - Pinch to Zoom
 - Action Trace Correlation
@@ -379,7 +407,7 @@ Advanced gestures + observability.
 ---
-# Priority Stack Summary
+# Capability Sequence
 Execution Order:
 1. Stronger State Verification
@@ -397,7 +425,7 @@ Rationale:
 ---
-## Explicitly Deferred
+## Future Considerations
 Still out of scope:
 - Recovery planning logic

package/docs/rfcs/004-action-verification-routing.md ADDED Viewed

@@ -0,0 +1,342 @@
+# RFC 004: Verification Routing for Local and Side-Effect Actions
+## Status
+Draft
+## Summary
+This RFC corrects a specification flaw in action verification routing where agents may treat lack of obvious UI change as a trigger to inspect network activity by default.
+The current fallback can cause unnecessary network calls during purely local UI interactions (for example sliders, pickers, toggles, text entry), creating noise and reinforcing incorrect agent behavior.
+This RFC separates:
+- action verification
+- failure diagnosis
+- backend signal inspection
+And introduces context-aware routing based on action type.
+## Motivation
+Observed agent sessions showed `get_network_activity` being invoked during local UI manipulation solely because an action produced no coarse-grained UI diff.
+Current implicit reasoning resembles:
+```text
+if uiChanged == false:
+  inspect network activity
+```
+This is overly broad.
+For many interactions, absence of obvious snapshot change does not imply backend ambiguity. It often means verification used the wrong signals.
+Examples:
+- Slider value changed but tree structure did not.
+- Picker selection updated in-place.
+- Toggle changed checked state only.
+- Text field value changed without large snapshot delta.
+- Tab or accordion state changed through selection metadata.
+In these cases network inspection is diagnostic noise, not evidence.
+## Problem Statement
+The current model conflates:
+1. Verifying whether an action succeeded.
+2. Diagnosing why an action may have failed.
+These are distinct phases.
+As a result:
+- agents overuse network inspection
+- verification costs increase
+- local-state actions are treated as ambiguous too often
+- network hints can be elevated beyond their intended role
+## Goals
+This RFC:
+- Prevents default network fallbacks for local-state actions.
+- Makes verification primarily state-driven.
+- Restricts network activity inspection to side-effect actions where ambiguity remains.
+- Refines `classify_action_outcome` decision routing.
+## Non-Goals
+This RFC does not:
+- change raw snapshot precedence (raw remains authoritative)
+- redefine expect_* ownership of verification
+- make network activity mandatory evidence
+- expand semantic hints into executable truth
+## Action Categories
+### Category A: Local-State Actions
+Actions expected to modify client-side UI state.
+Examples:
+- tap toggle
+- drag slider
+- picker selection
+- text entry
+- scrolling
+- tab switching
+- expand/collapse
+- local navigation controls
+### Category B: Side-Effect Actions
+Actions that may trigger backend or asynchronous side effects.
+Examples:
+- submit
+- save
+- sync
+- search
+- refresh
+- login
+- purchase flows
+## Action Classification Source of Truth
+## Action Type Emission (Runtime Contract)
+`action_type` MUST be emitted by the runtime layer that produces or executes actions. It is not inferred by the agent.
+There are three valid sources of truth, in order of precedence:
+### 1. Tool Schema Annotation (preferred)
+If the action originates from a tool invocation, `action_type` MUST be defined in the tool’s schema definition.
+Example:
+```json
+{
+  "name": "toggle_switch",
+  "action_type": "local_state"
+}
+```
+or
+```json
+{
+  "name": "submit_form",
+  "action_type": "side_effect"
+}
+```
+This is the canonical source.
+### 2. Handler Output (runtime execution layer)
+If tool schema does not define `action_type`, the runtime handler that executes the action MUST attach it before returning the action result.
+Example:
+```json
+{
+  "action": "click",
+  "target": "save_button",
+  "action_type": "side_effect"
+}
+```
+This is valid only when schema-level annotation is absent.
+### 3. Fallback Mapping Table (last resort, deterministic only)
+If neither schema nor handler provides `action_type`, the system MUST use a deterministic mapping table maintained by the runtime.
+This table MUST be:
+- static (no runtime inference)
+- versioned
+- explicitly defined in implementation
+Example mapping:
+| action | action_type |
+|--------|------------|
+| tap_toggle | local_state |
+| enter_text | local_state |
+| submit | side_effect |
+| refresh | side_effect |
+If an action is not in the table, it MUST default to:
+```
+side_effect
+```
+### Hard Constraint
+Agents MUST NOT infer or override `action_type` based on UI state changes, snapshot diffs, or network activity.
+### Normative Interpretation
+`action_type` is part of the execution contract, not the reasoning layer.
+Action type MUST be explicitly defined by the action schema or tool output.
+Valid values:
+- local_state
+- side_effect
+Agents MUST NOT infer action type from UI changes.
+If action type is missing, agents MUST treat it as side_effect only if backend interaction is plausible; otherwise classify as local_state.
+## Revised Verification Routing
+### For Local-State Actions
+Verification priority:
+1. Expected state assertions
+2. Refreshed snapshot comparison
+3. Element property checks
+4. Targeted expect_* verification
+Signals may include:
+- value changes
+- selected state
+- checked state
+- focus changes
+- labels/text
+- enabled/disabled transitions
+- position/state metadata
+Network activity should not be used as default fallback.
+## For Side-Effect Actions
+Verification priority:
+1. Expected UI/state verification first
+2. Retry richer local verification if ambiguous
+3. Only then optionally inspect network or log signals
+Network signals are supporting hints, not primary proof of success.
+## Decision Logic Update
+Replace implied logic:
+```text
+if uiChanged == false:
+  get_network_activity()
+```
+With:
+```text
+if expected_state_verified:
+  success
+elif action_type == local_state:
+  retry using richer state verification
+elif action_type == side_effect and ambiguity_remains:
+  optionally inspect network activity
+else:
+  inconclusive
+```
+## Definition of Ambiguity
+Ambiguity exists only when:
+- expected state cannot be evaluated from UI snapshot, AND
+- no single deterministic state predicate can be computed from UI fields
+Ambiguity does NOT include:
+- absence of visual diff
+- absence of network activity
+- lack of large UI tree changes
+## Normative Rules
+### Rule 1
+Agents MUST NOT use network activity inspection as a default fallback for local-state actions solely because coarse UI diffs are absent.
+### Rule 2
+Agents MUST prefer explicit state verification over backend diagnostics whenever the action is expected to be locally observable.
+### Rule 3
+Network activity MAY be consulted only when:
+- the action plausibly triggers backend work, and
+- local verification remains ambiguous under the defined ambiguity criteria.
+### Rule 4
+Network activity evidence MUST be treated as auxiliary signal, not authoritative proof of action success.
+## Unified Diagnostic Signals
+Network activity and log inspection are equivalent diagnostic signals.
+Both:
+- are secondary to UI state verification
+- MUST NOT be used as default fallback for local-state actions
+- follow the same escalation rules defined in this RFC
+## Impact on classify_action_outcome
+`classify_action_outcome` should be interpreted as routing logic, not a mandatory network escalation path.
+For `uiChanged=false`, action category determines next step.
+No automatic implication:
+```text
+uiChanged=false => inspect network
+```
+## Expected Benefits
+- Fewer unnecessary tool calls
+- Cleaner verification traces
+- Reduced cargo-cult network probing
+- Better behavior for local UI interactions
+- Stronger separation between verification and diagnosis
+- More reliable agent reasoning
+## Compatibility
+This is a patch-level specification correction.
+It refines routing semantics but does not break:
+- existing expect_* semantics
+- snapshot response shape
+- raw-over-semantic precedence
+- action execution model
+## Implementation Notes
+Follow-up work may include:
+- prompt updates
+- regression examples for sliders/toggles/pickers
+- protocol examples showing correct routing
+- telemetry on reduced unnecessary network inspections
+## Open Questions
+Questions for review:
+1. Should action category be explicitly emitted as runtime metadata, or is heuristic inference acceptable only within the fallback mapping layer defined in the Action Type Emission contract?
+2. Should side-effect actions permit optional log inspection alongside network hints?
+3. Should local-state verification examples be added to core spec or examples appendix?
+## Decision Requested
+Adopt verification routing based on action type and remove implicit default escalation from missing UI diffs to network inspection.

package/docs/specs/mcp-tooling-spec-v1.md CHANGED Viewed

@@ -41,7 +41,7 @@ Outcome-specific guidance:
 - visible navigation expected -> `wait_for_screen_change` (optional) -> `expect_screen`
 - local UI change expected -> `wait_for_ui` (optional) -> `expect_element_visible`
 - readable element state expected -> `wait_for_ui` (optional) -> `expect_state`
-- backend/API activity expected without a visible UI change -> compare `get_screen_fingerprint` before/after, then call `get_network_activity` immediately after the action and `classify_action_outcome` with the observed requests
+- backend/API activity expected without a visible UI change -> compare `get_screen_fingerprint` before/after, then call `classify_action_outcome` with the runtime `action_type`; collect `get_network_activity` only if the result remains ambiguous
 For backend/API activity, `wait_for_screen_change` is not the right verification tool unless a visible transition is also expected.
@@ -294,11 +294,11 @@ Tool: `classify_action_outcome`
 Rules:
-- MAY use UI, network, and log signals
+- MAY use UI, action, network, and log signals
 - MUST be deterministic
 - MUST NOT replace `expect_*` tools
 - MUST be treated as a supplementary signal only
-- SHOULD be used with `get_network_activity` when the expected outcome is backend/API activity without a visible UI change
+- SHOULD be used with `get_network_activity` only when the outcome is still ambiguous after routing by `action_type`
 It is not a verification mechanism.

package/docs/tools/interact.md CHANGED Viewed

@@ -17,6 +17,7 @@ Important:
 - `wait_for_*` tools must not be used as the final verification of action success when an applicable `expect_*` tool exists.
 - action tools report execution success, not outcome correctness.
+- `classify_action_outcome` should receive the runtime `action_type` when you want routing to distinguish local-state and side-effect actions.
 ## tap / swipe / type_text / press_back
@@ -54,10 +55,10 @@ Preferred verification:
 - navigation outcome known -> `expect_screen`
 - local UI change known -> `expect_element_visible`
 - readable element state known -> `expect_state`
-- backend/API activity expected -> `classify_action_outcome` + `get_network_activity`
+- backend/API activity expected -> `classify_action_outcome` + optional `get_network_activity` if the UI signal remains ambiguous
-Use `wait_for_screen_change` only when a visible transition is the expected outcome. If a button should trigger an API request but the screen should stay the same, rely on network activity and classification instead.
-For backend-only actions, prefer comparing `get_screen_fingerprint` before/after and call `get_network_activity` immediately after the action; do not wait on `wait_for_screen_change` if no visible transition is expected.
+Use `wait_for_screen_change` only when a visible transition is the expected outcome. If a button should trigger an API request but the screen should stay the same, rely on `action_type` plus classification first.
+For backend-only actions, prefer comparing `get_screen_fingerprint` before/after and collect `get_network_activity` immediately after the action only if the result is still ambiguous; do not wait on `wait_for_screen_change` if no visible transition is expected.
 Use `wait_for_ui_change` when the screen stays in place but visible text or element state should change.
 ---
@@ -507,17 +508,18 @@ Notes:
 ## classify_action_outcome + get_network_activity
-Use this pair when the action is expected to trigger network/backend work and the screen may not visibly change.
+Use this pair when the action may trigger network/backend work and the screen may not visibly change.
 Pattern:
 1. perform the action
 2. call `classify_action_outcome` with `uiChanged` from `wait_for_screen_change` or a screen fingerprint comparison
-3. if the classifier asks for it, call `get_network_activity`
-4. call `classify_action_outcome` again with `networkRequests`
+3. pass the runtime `action_type` value as `actionType`
+4. collect `get_network_activity` only if the action is side-effect oriented and the UI signal remains ambiguous
+5. call `classify_action_outcome` again with `networkRequests` if you collected them
 Guidance:
 - `uiChanged=true` or `expectedElementVisible=true` means the action outcome is already verified
-- `nextAction="call_get_network_activity"` means the UI signal was inconclusive and the agent should inspect network activity
-- if network requests succeed but the UI stays unchanged, treat the outcome as a backend/API result rather than a screen transition
+- local-state actions should prefer refreshed snapshots, `expect_state`, or `expect_element_visible` over default network inspection
+- network activity is auxiliary evidence, not mandatory proof

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "mobile-debug-mcp",
-  "version": "0.26.0",
+  "version": "0.26.1",
   "description": "MCP server for mobile app debugging (Android + iOS), with focus on security and reliability",
   "type": "module",
   "bin": {

package/src/interact/classify.ts CHANGED Viewed

@@ -1,5 +1,6 @@
 export type ActionOutcome = 'success' | 'no_op' | 'backend_failure' | 'ui_failure' | 'unknown'
 export type NetworkRequestStatus = 'success' | 'failure' | 'retryable'
+export type ActionCategory = 'local_state' | 'side_effect'
 export interface NetworkRequest {
   endpoint: string
@@ -9,6 +10,8 @@ export interface NetworkRequest {
 export interface ClassifyActionOutcomeInput {
   uiChanged: boolean
   expectedElementVisible?: boolean | null
+  /** Concrete action_type from the runtime action result (for example: tap, type_text, start_app). */
+  actionType?: string | null
   /** null = get_network_activity has not been called yet */
   networkRequests?: NetworkRequest[] | null
   hasLogErrors?: boolean | null
@@ -17,8 +20,29 @@ export interface ClassifyActionOutcomeInput {
 export interface ClassifyActionOutcomeResult {
   outcome: ActionOutcome
   reasoning: string
-  /** Present when the caller must call get_network_activity before a final classification is possible */
-  nextAction?: 'call_get_network_activity'
+}
+const ACTION_CATEGORY_BY_TYPE: Record<string, ActionCategory> = {
+  tap: 'local_state',
+  tap_element: 'local_state',
+  swipe: 'local_state',
+  scroll_to_element: 'local_state',
+  type_text: 'local_state',
+  press_back: 'local_state',
+  start_app: 'side_effect',
+  restart_app: 'side_effect',
+  terminate_app: 'side_effect',
+  reset_app_data: 'side_effect',
+  install_app: 'side_effect',
+  build_app: 'side_effect',
+  build_and_install: 'side_effect'
+}
+function inferActionCategory(actionType?: string | null): ActionCategory | null {
+  if (typeof actionType !== 'string') return null
+  const normalized = actionType.trim().toLowerCase()
+  if (!normalized) return null
+  return ACTION_CATEGORY_BY_TYPE[normalized] ?? 'side_effect'
 }
 /**
@@ -26,39 +50,55 @@ export interface ClassifyActionOutcomeResult {
  * Same inputs always produce the same output.
  */
 export function classifyActionOutcome(input: ClassifyActionOutcomeInput): ClassifyActionOutcomeResult {
-  const { uiChanged, expectedElementVisible, networkRequests, hasLogErrors } = input
+  const { uiChanged, expectedElementVisible, actionType, networkRequests, hasLogErrors } = input
+  const actionCategory = inferActionCategory(actionType)
   // Step 1 — UI signal is positive
   if (uiChanged || expectedElementVisible === true) {
     return { outcome: 'success', reasoning: expectedElementVisible === true ? 'expected element is visible' : 'UI changed after action' }
   }
-  // Step 2 — UI did not change; network signal is required
-  if (networkRequests === null || networkRequests === undefined) {
+  // Step 2 — no action type means we cannot choose a safe routing path
+  if (actionCategory === null) {
     return {
       outcome: 'unknown',
-      reasoning: 'UI did not change; get_network_activity must be called before classification can proceed',
-      nextAction: 'call_get_network_activity'
+      reasoning: 'actionType was not supplied; pass the runtime action_type so the classifier can distinguish local-state and side-effect routing'
     }
   }
-  // Step 3 — any network failure
-  const failedRequest = networkRequests.find((r) => r.status === 'failure' || r.status === 'retryable')
+  const failedRequest = networkRequests?.find((r) => r.status === 'failure' || r.status === 'retryable')
   if (failedRequest) {
     return { outcome: 'backend_failure', reasoning: `network request ${failedRequest.endpoint} returned ${failedRequest.status}` }
   }
-  // Step 4 — no network requests at all
+  // Step 3 — local-state actions should be verified with state-specific signals first
+  if (actionCategory === 'local_state') {
+    const logNote = hasLogErrors ? ' (log errors present)' : ''
+    return {
+      outcome: 'no_op',
+      reasoning: `local-state action${logNote}; use expect_state, refreshed snapshot comparison, or expect_element_visible instead of defaulting to network inspection`
+    }
+  }
+  // Step 4 — side-effect actions may legitimately need network or log inspection
+  if (networkRequests === null || networkRequests === undefined) {
+    return {
+      outcome: 'unknown',
+      reasoning: 'side-effect action without network data; inspect network or log signals only if the outcome is still ambiguous'
+    }
+  }
+  // Step 5 — no network requests at all
   if (networkRequests.length === 0) {
     const logNote = hasLogErrors ? ' (log errors present)' : ''
-    return { outcome: 'no_op', reasoning: `no UI change and no network activity${logNote}` }
+    return { outcome: 'no_op', reasoning: `side-effect action and no network activity${logNote}` }
   }
-  // Step 5 — network requests exist and all succeeded
+  // Step 6 — network requests exist and all succeeded
   if (networkRequests.every((r) => r.status === 'success')) {
     return { outcome: 'ui_failure', reasoning: 'network requests succeeded but UI did not change' }
   }
-  // Step 6 — fallback
+  // Step 7 — fallback
   return { outcome: 'unknown', reasoning: 'signals are inconclusive' }
 }

package/src/server/tool-definitions.ts CHANGED Viewed

@@ -344,7 +344,7 @@ Capabilities:
 Constraints:
 - Does not verify correctness of the resulting state
 - Must not be used alone to confirm action success when an applicable expect_* tool exists
-- Use classify_action_outcome + get_network_activity when the expected outcome is backend/API activity without a visible UI change
+- For backend/API activity without a visible UI change, pass the runtime action_type into classify_action_outcome and collect network evidence only if the result remains ambiguous
 Recommended Usage:
 1. Capture or define the expected outcome
@@ -918,26 +918,29 @@ Failure Handling:
     name: 'classify_action_outcome',
     description: `Classify the outcome of the most recent action into exactly one of: success, no_op, backend_failure, ui_failure, unknown.
-MUST be called after every action (tap, swipe, type_text, press_back, start_app, etc). Never skip.
-Use this with get_network_activity when the expected outcome is backend/API activity without a visible UI change.
-For backend/API activity, compare get_screen_fingerprint before and after the action and call get_network_activity immediately after the action instead of waiting for wait_for_screen_change.
+Use the runtime action result's \`action_type\` as \`actionType\` so the classifier can distinguish local-state actions from side-effect actions.
+Use this when the intended outcome is not already fully verified by the UI signal alone.
+For backend/API activity, compare get_screen_fingerprint before and after the action and call get_network_activity immediately after the action if the outcome is still ambiguous.
 HOW TO GATHER INPUTS before calling:
 1. Call wait_for_screen_change or compare get_screen_fingerprint before/after — set uiChanged accordingly.
 2. If you checked for a specific element with wait_for_ui, set expectedElementVisible.
-3. Do NOT call get_network_activity yet — omit networkRequests on the first call.
+3. Pass actionType from the action response when available.
+4. Only provide networkRequests if you already collected them or want to classify a side-effect action with backend evidence.
 RULES (applied in order — stop at first match):
 1. If uiChanged=true OR expectedElementVisible=true → outcome=success
-2. Otherwise this tool returns nextAction="call_get_network_activity" — you MUST call get_network_activity once, then call classify_action_outcome again with the results in networkRequests.
+2. If actionType is missing → outcome=unknown
 3. If any request has status=failure or retryable → outcome=backend_failure
-4. If no requests returned → outcome=no_op
-5. If all requests succeeded → outcome=ui_failure
-6. Otherwise → outcome=unknown
+4. If actionType maps to a local-state action → outcome=no_op; prefer state-based verification and avoid default network fallback
+5. If actionType maps to a side-effect action and no networkRequests were supplied → outcome=unknown
+6. If no requests returned → outcome=no_op
+7. If all requests succeeded → outcome=ui_failure
+8. Otherwise → outcome=unknown
 BEHAVIOUR after outcome:
 - success → continue
-- no_op → retry the action once or re-resolve the element
+- no_op → retry with richer state verification or re-resolve the element
 - backend_failure → stop and report the failing endpoint
 - ui_failure → stop and report failure
 - unknown → take one recovery step (e.g. capture_debug_snapshot), then stop`,
@@ -952,9 +955,13 @@ BEHAVIOUR after outcome:
           type: 'boolean',
           description: 'true if the element you expected to appear is now visible (from wait_for_ui). Omit if you did not check for a specific element.'
         },
+        actionType: {
+          type: 'string',
+          description: 'The runtime action_type from the action response (for example tap, tap_element, swipe, type_text, press_back, start_app).'
+        },
         networkRequests: {
           type: 'array',
-          description: 'Pass this only after calling get_network_activity as instructed by nextAction. Also use it when the expected outcome is backend/API activity without a visible UI change.',
+          description: 'Optional network evidence collected after the action. Use it when the expected outcome is backend/API activity or when the UI signal is ambiguous.',
           items: {
             type: 'object',
             properties: {
@@ -976,7 +983,7 @@ BEHAVIOUR after outcome:
     name: 'get_network_activity',
     description: `Returns structured network events captured from platform logs since the last action.
-Call this when classify_action_outcome returns nextAction="call_get_network_activity" or immediately after an action whose expected outcome is backend/API activity without a visible UI change.
+Call this immediately after an action when you want backend evidence for a side-effect flow, only if the result is still ambiguous.
 Do not call more than once per action.
 Events are filtered to significant (non-background) requests only.

package/src/server/tool-handlers.ts CHANGED Viewed

@@ -448,11 +448,13 @@ async function handleStopLogStream(args: ToolCallArgs) {
 function handleClassifyActionOutcome(args: ToolCallArgs) {
   const uiChanged = requireBooleanArg(args, 'uiChanged')
   const expectedElementVisible = getBooleanArg(args, 'expectedElementVisible')
+  const actionType = getStringArg(args, 'actionType')
   const networkRequests = getArrayArg<ClassifyNetworkRequestArg>(args, 'networkRequests')
   const hasLogErrors = getBooleanArg(args, 'hasLogErrors')
   const result = classifyActionOutcome({
     uiChanged,
     expectedElementVisible: expectedElementVisible ?? null,
+    actionType: actionType ?? null,
     networkRequests: networkRequests ?? null,
     hasLogErrors: hasLogErrors ?? null
   })

package/src/server-core.ts CHANGED Viewed

@@ -13,7 +13,7 @@ export { wrapResponse, toolDefinitions, handleToolCall }
 export const serverInfo = {
   name: 'mobile-debug-mcp',
-  version: '0.26.0'
+  version: '0.26.1'
 }
 export function createServer() {

package/test/unit/interact/classify_action_outcome.test.ts CHANGED Viewed

@@ -7,7 +7,6 @@ function run() {
     const result = classifyActionOutcome({ uiChanged: true })
     assert.strictEqual(result.outcome, 'success')
     assert.ok(result.reasoning.length > 0)
-    assert.strictEqual(result.nextAction, undefined)
   }
   // Step 1 — expectedElementVisible → success
@@ -15,7 +14,6 @@ function run() {
     const result = classifyActionOutcome({ uiChanged: false, expectedElementVisible: true })
     assert.strictEqual(result.outcome, 'success')
     assert.strictEqual(result.reasoning, 'expected element is visible')
-    assert.strictEqual(result.nextAction, undefined)
   }
   // Step 1 — both uiChanged and expectedElementVisible → success
@@ -24,24 +22,50 @@ function run() {
     assert.strictEqual(result.outcome, 'success')
   }
-  // Step 2 — UI did not change, networkRequests not yet provided → nextAction required
+  // No actionType supplied → unknown
   {
     const result = classifyActionOutcome({ uiChanged: false })
     assert.strictEqual(result.outcome, 'unknown')
-    assert.strictEqual(result.nextAction, 'call_get_network_activity')
+    assert.ok(result.reasoning.includes('actionType was not supplied'))
   }
-  // Step 2 — explicit null networkRequests → nextAction required
+  // Local-state action routes to state verification rather than forced network probing
   {
-    const result = classifyActionOutcome({ uiChanged: false, expectedElementVisible: null, networkRequests: null })
+    const result = classifyActionOutcome({ uiChanged: false, actionType: 'tap' })
+    assert.strictEqual(result.outcome, 'no_op')
+    assert.ok(result.reasoning.includes('local-state action'))
+  }
+  // Local-state action with network data still prefers local-state semantics
+  {
+    const result = classifyActionOutcome({
+      uiChanged: false,
+      actionType: 'type_text',
+      networkRequests: []
+    })
+    assert.strictEqual(result.outcome, 'no_op')
+    assert.ok(result.reasoning.includes('local-state action'))
+  }
+  // Explicit side-effect action without networkRequests supplied → unknown
+  {
+    const result = classifyActionOutcome({ uiChanged: false, actionType: 'start_app' })
     assert.strictEqual(result.outcome, 'unknown')
-    assert.strictEqual(result.nextAction, 'call_get_network_activity')
+    assert.ok(result.reasoning.includes('side-effect action'))
+  }
+  // Side-effect action with empty networkRequests → no_op
+  {
+    const result = classifyActionOutcome({ uiChanged: false, actionType: 'start_app', networkRequests: [] })
+    assert.strictEqual(result.outcome, 'no_op')
+    assert.ok(result.reasoning.includes('side-effect action'))
   }
-  // Step 3 — failure status → backend_failure
+  // Network failure → backend_failure
   {
     const result = classifyActionOutcome({
       uiChanged: false,
+      actionType: 'start_app',
       networkRequests: [{ endpoint: '/login', status: 'failure' }]
     })
     assert.strictEqual(result.outcome, 'backend_failure')
@@ -49,10 +73,11 @@ function run() {
     assert.ok(result.reasoning.includes('failure'))
   }
-  // Step 3 — retryable status → backend_failure
+  // Retryable status → backend_failure
   {
     const result = classifyActionOutcome({
       uiChanged: false,
+      actionType: 'start_app',
       networkRequests: [
         { endpoint: '/api/submit', status: 'retryable' },
         { endpoint: '/api/other', status: 'success' }
@@ -62,25 +87,11 @@ function run() {
     assert.ok(result.reasoning.includes('/api/submit'))
   }
-  // Step 4 — empty network requests → no_op
-  {
-    const result = classifyActionOutcome({ uiChanged: false, networkRequests: [] })
-    assert.strictEqual(result.outcome, 'no_op')
-    assert.ok(result.reasoning.includes('no UI change'))
-    assert.ok(result.reasoning.includes('no network activity'))
-  }
-  // Step 4 — empty network requests with log errors → no_op with note
-  {
-    const result = classifyActionOutcome({ uiChanged: false, networkRequests: [], hasLogErrors: true })
-    assert.strictEqual(result.outcome, 'no_op')
-    assert.ok(result.reasoning.includes('log errors'))
-  }
-  // Step 5 — all requests succeeded but UI unchanged → ui_failure
+  // All requests succeeded and UI stayed unchanged → ui_failure
   {
     const result = classifyActionOutcome({
       uiChanged: false,
+      actionType: 'start_app',
       networkRequests: [
         { endpoint: '/api/save', status: 'success' },
         { endpoint: '/api/refresh', status: 'success' }
@@ -90,10 +101,18 @@ function run() {
     assert.ok(result.reasoning.includes('network requests succeeded'))
   }
+  // Empty network requests with log errors → no_op with note
+  {
+    const result = classifyActionOutcome({ uiChanged: false, actionType: 'start_app', networkRequests: [], hasLogErrors: true })
+    assert.strictEqual(result.outcome, 'no_op')
+    assert.ok(result.reasoning.includes('log errors'))
+  }
   // Step 1 takes priority over network signals — success even when failures present
   {
     const result = classifyActionOutcome({
       uiChanged: true,
+      actionType: 'start_app',
       networkRequests: [{ endpoint: '/api/log', status: 'failure' }]
     })
     assert.strictEqual(result.outcome, 'success')

package/test/unit/server/contract.test.ts CHANGED Viewed

@@ -68,15 +68,17 @@ async function run() {
   const classifyActionOutcome = toolDefinitions.find((tool) => tool.name === 'classify_action_outcome')
   assert(classifyActionOutcome, 'classify_action_outcome should be registered')
-  assert.match((classifyActionOutcome as any).description, /backend\/API activity without a visible UI change/i)
-  assert.match((classifyActionOutcome as any).description, /get_network_activity/i)
-  assert.match((classifyActionOutcome as any).description, /immediately after the action/i)
+  assert.match((classifyActionOutcome as any).description, /action_type/i)
+  assert.match((classifyActionOutcome as any).description, /local-state/i)
+  assert.match((classifyActionOutcome as any).description, /side-effect/i)
+  assert.strictEqual((classifyActionOutcome as any).inputSchema.properties.actionType.type, 'string')
+  assert.match((classifyActionOutcome as any).inputSchema.properties.networkRequests.description, /optional network evidence/i)
   const getNetworkActivity = toolDefinitions.find((tool) => tool.name === 'get_network_activity')
   assert(getNetworkActivity, 'get_network_activity should be registered')
-  assert.match((getNetworkActivity as any).description, /backend\/API activity without a visible UI change/i)
-  assert.doesNotMatch((getNetworkActivity as any).description, /Call this only when/i)
-  assert.match((getNetworkActivity as any).description, /immediately after an action/i)
+  assert.match((getNetworkActivity as any).description, /side-effect/i)
+  assert.doesNotMatch((getNetworkActivity as any).description, /nextAction/i)
+  assert.match((getNetworkActivity as any).description, /only if the result is still ambiguous/i)
   await assert.rejects(() => handleToolCall('unknown_tool'), /Unknown tool: unknown_tool/)