npm - mobile-debug-mcp - Versions diffs - 0.24.2 → 0.24.4 - Mend

mobile-debug-mcp 0.24.2 → 0.24.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/dist/interact/index.js +4 -2
package/dist/server/common.js +28 -2
package/dist/server/tool-definitions.js +13 -10
package/dist/server/tool-handlers.js +3 -4
package/docs/CHANGELOG.md +6 -0
package/docs/specs/baseline-spec-v0.md +312 -0
package/docs/specs/mcp-tooling-spec-v1.md +281 -0
package/docs/tools/interact.md +27 -3
package/docs/tools/manage.md +1 -1
package/package.json +1 -1
package/src/interact/index.ts +5 -3
package/src/server/common.ts +28 -2
package/src/server/tool-definitions.ts +13 -10
package/src/server/tool-handlers.ts +4 -4
package/src/types.ts +1 -1
package/test/unit/server/contract.test.ts +13 -0
package/test/unit/server/response_shapes.test.ts +28 -1

package/dist/interact/index.js CHANGED Viewed

@@ -185,9 +185,10 @@ export class ToolsInteract {
         return await interact.tap(x, y, resolved.id);
     }
     static async tapElementHandler({ elementId }) {
-        const timestamp = Date.now();
+        const timestampMs = Date.now();
+        const timestamp = new Date(timestampMs).toISOString();
         const actionType = 'tap_element';
-        const actionId = nextActionId(actionType, timestamp);
+        const actionId = nextActionId(actionType, timestampMs);
         const selector = { elementId };
         const resolved = ToolsInteract._resolvedUiElements.get(elementId);
         if (!resolved) {
@@ -225,6 +226,7 @@ export class ToolsInteract {
             action_id: actionId,
             timestamp,
             action_type: actionType,
+            ...(tree?.device ? { device: tree.device } : {}),
             target: {
                 selector,
                 resolved: resolvedTarget

package/dist/server/common.js CHANGED Viewed

@@ -49,9 +49,10 @@ export function inferScrollFailure(message) {
     return { failureCode: 'UNKNOWN', retryable: false };
 }
 export function buildActionExecutionResult({ actionType, device, selector, resolved, success, uiFingerprintBefore, uiFingerprintAfter, failure, details }) {
-    const timestamp = Date.now();
+    const timestampMs = Date.now();
+    const timestamp = new Date(timestampMs).toISOString();
     return {
-        action_id: nextActionId(actionType, timestamp),
+        action_id: nextActionId(actionType, timestampMs),
         timestamp,
         action_type: actionType,
         ...(device ? { device } : {}),
@@ -66,3 +67,28 @@ export function buildActionExecutionResult({ actionType, device, selector, resol
         ...(details ? { details } : {})
     };
 }
+export function wrapToolError(name, error) {
+    const message = error instanceof Error
+        ? error.message
+        : typeof error === 'object' && error !== null
+            ? (() => {
+                try {
+                    return JSON.stringify(error, null, 2);
+                }
+                catch {
+                    return '[unserializable error object]';
+                }
+            })()
+            : String(error);
+    return {
+        content: [{
+                type: 'text',
+                text: JSON.stringify({
+                    error: {
+                        tool: name,
+                        message
+                    }
+                }, null, 2)
+            }]
+    };
+}

package/dist/server/tool-definitions.js CHANGED Viewed

@@ -10,7 +10,7 @@ Inputs:
 - deviceId (optional)
 Output Structure:
-- action_id, timestamp, action_type
+ - action_id, timestamp (ISO 8601), action_type
 - target.selector = { appId }
 - success = true when launch was dispatched successfully
 - failure_code/retryable when launch dispatch fails
@@ -83,7 +83,7 @@ Inputs:
 - deviceId (optional)
 Output Structure:
-- action_id, timestamp, action_type
+ - action_id, timestamp (ISO 8601), action_type
 - target.selector = { appId }
 - success = true when the restart command completed
 - failure_code/retryable when restart dispatch fails
@@ -344,6 +344,7 @@ Capabilities:
 Constraints:
 - Does not verify correctness of the resulting state
 - Must not be used alone to confirm action success when an applicable expect_* tool exists
+- Use classify_action_outcome + get_network_activity when the expected outcome is backend/API activity without a visible UI change
 Recommended Usage:
 1. Capture or define the expected outcome
@@ -532,7 +533,7 @@ Inputs:
 - deviceId (optional)
 Output Structure:
-- action_id, timestamp, action_type
+ - action_id, timestamp (ISO 8601), action_type
 - target.selector = { x, y }
 - success = true when the tap was dispatched
 - failure_code/retryable when dispatch fails
@@ -587,7 +588,7 @@ Inputs:
 Output Structure:
 - action_id: unique timestamp-based action identifier
-- timestamp: epoch milliseconds for the action attempt
+- timestamp: ISO 8601 timestamp for the action attempt
 - action_type: "tap_element"
 - target.selector: original target handle ({ elementId })
 - target.resolved: minimal resolved element info used for the tap
@@ -640,7 +641,7 @@ Inputs:
 - platform/deviceId (optional)
 Output Structure:
-- action_id, timestamp, action_type
+- action_id, timestamp (ISO 8601), action_type
 - target.selector = { x1, y1, x2, y2, duration }
 - success = true when the swipe was dispatched
 - failure_code/retryable when dispatch fails
@@ -692,7 +693,7 @@ Inputs:
 - direction, maxScrolls, scrollAmount, deviceId (optional)
 Output Structure:
-- action_id, timestamp, action_type
+- action_id, timestamp (ISO 8601), action_type
 - target.selector = original selector
 - target.resolved = minimal resolved element info when found
 - success = true when scrolling produced a visible target element
@@ -746,7 +747,7 @@ Inputs:
 - platform/deviceId (optional)
 Output Structure:
-- action_id, timestamp, action_type
+- action_id, timestamp (ISO 8601), action_type
 - target.selector = { text }
 - success = true when text input was dispatched
 - failure_code/retryable when dispatch fails
@@ -795,7 +796,7 @@ Inputs:
 - platform/deviceId (optional)
 Output Structure:
-- action_id, timestamp, action_type
+- action_id, timestamp (ISO 8601), action_type
 - target.selector = { key: "back" }
 - success = true when the back action was dispatched
 - failure_code/retryable when dispatch fails
@@ -835,6 +836,8 @@ Failure Handling:
         description: `Classify the outcome of the most recent action into exactly one of: success, no_op, backend_failure, ui_failure, unknown.
 MUST be called after every action (tap, swipe, type_text, press_back, start_app, etc). Never skip.
+Use this with get_network_activity when the expected outcome is backend/API activity without a visible UI change.
+For backend/API activity, compare get_screen_fingerprint before and after the action and call get_network_activity immediately after the action instead of waiting for wait_for_screen_change.
 HOW TO GATHER INPUTS before calling:
 1. Call wait_for_screen_change or compare get_screen_fingerprint before/after — set uiChanged accordingly.
@@ -868,7 +871,7 @@ BEHAVIOUR after outcome:
                 },
                 networkRequests: {
                     type: 'array',
-                    description: 'Pass this only after calling get_network_activity as instructed by nextAction. Map each request to endpoint + status.',
+                    description: 'Pass this only after calling get_network_activity as instructed by nextAction. Also use it when the expected outcome is backend/API activity without a visible UI change.',
                     items: {
                         type: 'object',
                         properties: {
@@ -890,7 +893,7 @@ BEHAVIOUR after outcome:
         name: 'get_network_activity',
         description: `Returns structured network events captured from platform logs since the last action.
-Call this only when classify_action_outcome returns nextAction="call_get_network_activity".
+Call this when classify_action_outcome returns nextAction="call_get_network_activity" or immediately after an action whose expected outcome is backend/API activity without a visible UI change.
 Do not call more than once per action.
 Events are filtered to significant (non-background) requests only.

package/dist/server/tool-handlers.js CHANGED Viewed

@@ -4,7 +4,7 @@ import { ToolsObserve } from '../observe/index.js';
 import { classifyActionOutcome } from '../interact/classify.js';
 import { ToolsNetwork } from '../network/index.js';
 import { getSystemStatus } from '../system/index.js';
-import { buildActionExecutionResult, captureActionFingerprint, inferGenericFailure, inferScrollFailure, wrapResponse } from './common.js';
+import { buildActionExecutionResult, captureActionFingerprint, inferGenericFailure, inferScrollFailure, wrapResponse, wrapToolError } from './common.js';
 async function handleStartApp(args) {
     const { platform, appId, deviceId } = args;
     const uiFingerprintBefore = await captureActionFingerprint(platform, deviceId);
@@ -330,8 +330,7 @@ export async function handleToolCall(name, args = {}) {
         return await handler(args);
     }
     catch (error) {
-        return {
-            content: [{ type: 'text', text: `Error executing tool ${name}: ${error instanceof Error ? error.message : String(error)}` }]
-        };
+        console.error(`Error executing tool ${name}:`, error);
+        return wrapToolError(name, error);
     }
 }

package/docs/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,12 @@
 All notable changes to the **Mobile Debug MCP** project will be documented in this file.
+## [0.24.4]
+- Moving agents away from `wait_for_screen_change`
+## [0.24.3]
+- Improved output consistency
 ## [0.24.2]
 - Fixed Android install issue
 - Updated tools to have more detailed responses

package/docs/specs/baseline-spec-v0.md ADDED Viewed

@@ -0,0 +1,312 @@
+# Baseline Spec v0
+## 1. System Overview
+The MCP surface is defined in `src/server/tool-definitions.ts` and dispatched in `src/server/tool-handlers.ts`. Tools are grouped in code by module, not by an explicit runtime taxonomy: **manage**, **observe**, **interact**, **network/classification**, and **system**.
+Agents interact with tools by name through `handleToolCall(name, args)`. Most handlers return a **single text content block containing JSON** via `wrapResponse(...)`. Exceptions are observable in code:
+| Tool | MCP content shape |
+| --- | --- |
+| most tools | one text block with JSON |
+| `get_logs` | two text blocks: metadata JSON, then logs JSON |
+| `capture_screenshot` | one text block with JSON metadata, then one or more image blocks |
+| `build_and_install` | one NDJSON text block, then one JSON text block |
+| uncaught handler error | one plain text error string, not wrapped JSON |
+Observable execution flow for state-mutating action tools at the MCP boundary:
+1. resolve device/platform
+2. call `ToolsNetwork.notifyActionStart()`
+3. capture UI fingerprint before the action
+4. execute the platform action
+5. capture UI fingerprint after the action
+6. wrap the result into an action envelope
+That flow is applied to `start_app`, `restart_app`, `tap`, `swipe`, `scroll_to_element`, `type_text`, and `press_back`. `tap_element` builds a similar envelope inside `src/interact/index.ts` rather than through the shared wrapper.
+## 2. Tool Inventory
+### Manage / lifecycle
+| Tool | Purpose | Inputs | Outputs | Side effects |
+| --- | --- | --- | --- | --- |
+| `start_app` | Launch app on Android or iOS. | `{ platform: 'android'\|'ios', appId: string, deviceId?: string }` | `ActionExecutionResult` JSON with `device` and `details` (`launch_time_ms`, `device_id`, `output?`, `observed_app?`, `error?`). | Launches app, captures fingerprints, resets network window. |
+| `terminate_app` | Stop app process. | `{ platform: 'android'\|'ios', appId: string, deviceId?: string }` | `{ terminated: boolean, device: DeviceInfo }` | Terminates app. |
+| `restart_app` | Terminate then relaunch app. | `{ platform: 'android'\|'ios', appId: string, deviceId?: string }` | `ActionExecutionResult` JSON with `device` and restart `details` (`terminated_before_restart`, `terminate_error?`, `output?`, `observed_app?`, `error?`). | Stops and launches app, captures fingerprints, resets network window. |
+| `reset_app_data` | Clear app storage / simulator container data. | `{ platform: 'android'\|'ios', appId: string, deviceId?: string }` | `{ reset: boolean, device: DeviceInfo }` | Clears app state. |
+| `install_app` | Install built artifact or project output. | `{ platform: 'android'\|'ios', projectType: 'native'\|'kmp'\|'react-native'\|'flutter', appPath: string, deviceId?: string }` | `{ device: DeviceInfo, installed: boolean, output?: string, error?: string }` | Installs app; Android may push APK/AAB and run `pm install`; iOS may use `simctl` or `idb`. |
+| `build_app` | Build project and return artifact path. | `{ platform: 'android'\|'ios', projectType: ..., projectPath: string, variant?: string }` | Build result JSON from platform builder, including artifact path on success or `error`. | Runs Gradle or Xcode build. |
+| `build_and_install` | Build then install, streaming progress. | `{ platform: 'android'\|'ios', projectType: ..., projectPath: string, deviceId?: string, variant?: string }` | MCP response has NDJSON event block plus result JSON `{ success: boolean, artifactPath?: string, device?: DeviceInfo, output?: string, error?: string }`. | Builds, installs, emits progress events. |
+| `list_devices` | Enumerate available devices. | `{ platform?: 'android'\|'ios', appId?: string }` | `{ devices: DeviceInfo[] }` (runtime objects may also include `appInstalled`/`booted`). | Reads device lists. |
+### Observe / inspect
+| Tool | Purpose | Inputs | Outputs | Side effects |
+| --- | --- | --- | --- | --- |
+| `get_logs` | Fetch recent device logs. | `{ platform: 'android'\|'ios', appId?: string, deviceId?: string, pid?: number, tag?: string, level?: string, contains?: string, since_seconds?: number, limit?: number, lines?: number }` | Two text blocks: metadata `{ device, result: { count, filtered, crashLines, source, meta } }`, then `{ logs: [...] }`. | Reads platform logs. |
+| `capture_screenshot` | Capture current screenshot. | `{ platform: 'android'\|'ios', deviceId?: string }` | Text metadata block plus image block(s). | Captures screenshot; uses temp files. |
+| `capture_debug_snapshot` | Bundle screenshot, UI tree, screen, fingerprint, and logs. | `{ reason?: string, includeLogs?: boolean, logLines?: number, platform?: 'android'\|'ios', appId?: string, deviceId?: string, sessionId?: string }` | Wrapped JSON snapshot object with device metadata, screenshot metadata, UI tree, fingerprint, current screen, and logs/errors. | Captures multiple observations. |
+| `start_log_stream` | Start background structured log stream. | `{ platform?: 'android'\|'ios', packageName: string, level?: 'error'\|'warn'\|'info'\|'debug', deviceId?: string, sessionId?: string }` | `{ success: boolean, stream_started?: boolean, device_id?: string, pid?: number, error?: string }` | Starts long-lived log process, writes NDJSON file. |
+| `read_log_stream` | Read accumulated streamed logs. | `{ sessionId?: string }` | `{ entries: any[], crash_summary?: { crash_detected: boolean, exception?: string, sample?: string } }` | Reads stream file; no new device action. |
+| `stop_log_stream` | Stop background log stream. | `{ sessionId?: string }` | `{ success: boolean }` | Stops stream process and clears session entry. |
+| `get_ui_tree` | Return current UI hierarchy. | `{ platform: 'android'\|'ios', deviceId?: string }` | `GetUITreeResponse` with `device`, `elements`, `resolution`, optional `error`. | Dumps UI hierarchy; Android writes/pulls XML; iOS queries via `idb`. |
+| `get_current_screen` | Return visible Android activity. | `{ deviceId?: string }` | `GetCurrentScreenResponse` with `device`, `activity`, `package`, `shortActivity?`, `error?`. | Reads `dumpsys`; Android only. |
+| `get_screen_fingerprint` | Compute stable screen fingerprint from UI tree and current screen. | `{ platform?: 'android'\|'ios', deviceId?: string }` | `{ fingerprint: string\|null, activity?: string, error?: string }` | Reads UI tree and, on Android, current screen. |
+### Interact / wait / verify
+| Tool | Purpose | Inputs | Outputs | Side effects |
+| --- | --- | --- | --- | --- |
+| `wait_for_screen_change` | Wait until fingerprint differs from provided previous fingerprint. | `{ platform?: 'android'\|'ios', previousFingerprint: string, timeoutMs?: number, pollIntervalMs?: number, deviceId?: string }` | `{ success: boolean, previousFingerprint, newFingerprint?\|lastFingerprint?, elapsedMs, observed_screen: { fingerprint, activity }, reason?: 'timeout' }` | Polls fingerprints. |
+| `expect_screen` | Exact check against expected fingerprint or screen name. | `{ platform?: 'android'\|'ios', fingerprint?: string, screen?: string, deviceId?: string }` | `{ success, observed_screen, expected_screen, confidence, comparison: { basis, matched, reason } }` | Reads fingerprint/current screen. |
+| `expect_element_visible` | Binary visible check for selector. | `{ selector: { text?: string, resource_id?: string, accessibility_id?: string, contains?: boolean }, element_id?: string, timeout_ms?: number, poll_interval_ms?: number, platform?: 'android'\|'ios', deviceId?: string }` | `{ success, selector, element_id, expected_condition: 'visible', element?, observed, reason, failure_code?, retryable? }` | Polls UI tree through `wait_for_ui`. |
+| `wait_for_ui` | Deterministic UI wait and element resolution. | `{ selector?: { text?: string, resource_id?: string, accessibility_id?: string, contains?: boolean }, condition?: 'exists'\|'not_exists'\|'visible'\|'clickable', timeout_ms?: number, poll_interval_ms?: number, match?: { index?: number }, retry?: { max_attempts?: number, backoff_ms?: number }, platform?: 'android'\|'ios', deviceId?: string }` | Success: `{ status:'success', matched, element, metrics, requested, observed }`; failure: `{ status:'timeout', error:{code,message}, metrics, requested, observed }`. | Polls UI tree; resolves actionable ancestor for `clickable`. |
+| `find_element` | Heuristic semantic element search. | `{ query: string, exact?: boolean, timeoutMs?: number, platform?: 'android'\|'ios', deviceId?: string }` | `{ found: true, element, score, confidence }` or `{ found: false, error }` | Polls UI tree; no mutation. |
+### Action / mutation
+| Tool | Purpose | Inputs | Outputs | Side effects |
+| --- | --- | --- | --- | --- |
+| `tap` | Tap coordinates. | `{ x: number, y: number, platform?: 'android'\|'ios', deviceId?: string }` | `ActionExecutionResult` | Taps screen; captures fingerprints; resets network window. |
+| `tap_element` | Tap resolved UI element by `elementId`. | `{ elementId: string }` | Action-style JSON with `action_type: 'tap_element'`, target selector/resolved element, `success`, fingerprints, `failure_code?`, `retryable?`. | Reads cached element/UI context, validates element, taps it, resets network window. |
+| `swipe` | Swipe coordinates. | `{ platform?: 'android'\|'ios', x1, y1, x2, y2, duration, deviceId?: string }` | `ActionExecutionResult` | Swipes screen; captures fingerprints; resets network window. |
+| `scroll_to_element` | Repeatedly scroll until matching visible element is found. | `{ platform: 'android'\|'ios', selector: { text?: string, resourceId?: string, contentDesc?: string, className?: string }, direction?: 'down'\|'up', maxScrolls?: number, scrollAmount?: number, deviceId?: string }` | `ActionExecutionResult` | Repeated swipes plus UI tree checks; resets network window. |
+| `type_text` | Type text into focused field. | `{ platform?: 'android', text: string, deviceId?: string }` | `ActionExecutionResult` | Android text input; captures fingerprints; resets network window. |
+| `press_back` | Send Android Back key. | `{ platform?: 'android', deviceId?: string }` | `ActionExecutionResult` | Android back action; captures fingerprints; resets network window. |
+### Classification / network / system
+| Tool | Purpose | Inputs | Outputs | Side effects |
+| --- | --- | --- | --- | --- |
+| `classify_action_outcome` | Deterministic rule-based classifier over supplied signals. | `{ uiChanged: boolean, expectedElementVisible?: boolean, networkRequests?: { url?: string, status: 'success'\|'failure'\|'retryable' }[], hasLogErrors?: boolean }` | `{ outcome: 'success'\|'no_op'\|'backend_failure'\|'ui_failure'\|'unknown', reasoning: string, nextAction?: 'call_get_network_activity' }` | Pure computation. |
+| `get_network_activity` | Return normalized request events since last action window. | `{}` | `{ requests: NetworkRequestSummary[], count: number }` | Reads logs, advances internal `lastConsumedTimestamp`. |
+| `get_system_status` | Aggregate Android/iOS/Gradle readiness. | `{}` | `{ success, status: 'ready'\|'degraded'\|'blocked', adbAvailable, adbVersion, devices, deviceStates, logsAvailable, envValid, issues, appInstalled, iosAvailable, iosDevices, gradleJavaHome, gradleValid, gradleFilesChecked, gradleSuggestedFixes, summary }` | Reads toolchain/device state. |
+## 3. Action Tools (Mutation Tools)
+| Tool | Actual output shape | Success reporting | Failure structure | Retry logic |
+| --- | --- | --- | --- | --- |
+| `start_app` | `ActionExecutionResult` + `device` + `details` | `success` mirrors underlying launch success | `failure_code` inferred generically; raw launch `error` only appears in `details` | none |
+| `terminate_app` | `{ terminated: boolean, device }` | `terminated === true` | no standardized failure code; boolean only at MCP layer | none |
+| `restart_app` | `ActionExecutionResult` + `device` + restart `details` | `success` mirrors underlying restart success | `failure_code` inferred generically; terminate/start details kept in `details` | no retry; always does terminate then start |
+| `reset_app_data` | `{ reset: boolean, device }` | `reset === true` | no standardized failure code | none |
+| `install_app` | `{ device, installed, output?, error? }` | `installed === true` | unstructured `error` string; no action envelope | Android has internal fallback paths; iOS may fall back from `simctl` to `idb` |
+| `build_and_install` | NDJSON event stream + `{ success, artifactPath?, device?, output?, error? }` | final `success === true` | unstructured `error`; build/install phases encoded in NDJSON | build and install internals may retry depending on platform helpers |
+| `tap` | `ActionExecutionResult` | `success` means command executed | `failure_code`/`retryable` inferred from generic error text; raw error omitted | none |
+| `tap_element` | action-style JSON built in `src/interact/index.ts` | `success` means element was resolved and tap dispatched | structured `failure_code` from `ActionFailureCode`; includes `retryable` | none |
+| `swipe` | `ActionExecutionResult` | command executed | generic inferred `failure_code` | none |
+| `scroll_to_element` | `ActionExecutionResult` | **different semantics**: success means target element became visible during scroll loop | `failure_code` inferred by scroll-specific string matching | internal loop up to `maxScrolls` |
+| `type_text` | `ActionExecutionResult` | command executed | generic inferred `failure_code` | none |
+| `press_back` | `ActionExecutionResult` | command executed | generic inferred `failure_code` | none |
+**Observed inconsistency:** `start_app`/`restart_app` expose `device` and rich `details`; `tap`/`swipe`/`type_text`/`press_back` do not. `scroll_to_element` reports an outcome-oriented success, while the others mostly report execution success.
+## 4. Observation and Wait Tools
+### `wait_for_ui`
+- **Role:** both waits and resolves.
+- **Signals used:** only the current UI tree from `get_ui_tree`.
+- **Behavior:** filters elements by selector, supports `match.index`, evaluates `exists` / `not_exists` / `visible` / `clickable`, and resolves an actionable ancestor for `clickable`.
+- **Output:** descriptive, not binary. Returns `requested`, `observed`, `metrics`, and optionally `element`.
+- **Success model:** `status: 'success'`; otherwise `status: 'timeout'` with structured `error`.
+### `wait_for_screen_change`
+- **Role:** wait only.
+- **Signals used:** screen fingerprints from `get_screen_fingerprint`.
+- **Behavior:** polls until fingerprint differs from `previousFingerprint`, then performs a confirmation read for stability.
+- **Output:** binary `success` plus descriptive `observed_screen`, elapsed time, and either `newFingerprint` or `lastFingerprint`.
+### `find_element`
+- **Role:** resolve only.
+- **Signals used:** UI tree.
+- **Behavior:** heuristic scoring over text/content/resource/class; if best element is not interactable it tries to resolve a clickable ancestor.
+- **Output:** descriptive, scored result (`score`, `confidence`) or `{ found:false, error }`.
+### `get_ui_tree`
+- **Role:** inspect only.
+- **Signals used:** platform accessibility/UI dump.
+- **Output:** raw tree data with `elements`, `resolution`, and `device`.
+- **Notes:** Android and iOS each retry internally up to three attempts.
+### `get_current_screen`
+- **Role:** inspect only.
+- **Signals used:** Android activity manager / window dumps.
+- **Output:** current package/activity object.
+- **Notes:** Android-only.
+### `get_screen_fingerprint`
+- **Role:** inspect only.
+- **Signals used:** UI tree plus current screen on Android.
+- **Behavior:** normalizes a subset of visible, structurally significant elements and hashes them.
+- **Output:** `{ fingerprint, activity?, error? }`.
+- **Notes:** iOS fingerprint omits activity in the hash payload.
+### Log/snapshot observation
+- `get_logs` returns structured metadata plus raw/structured log entries.
+- `start_log_stream` / `read_log_stream` / `stop_log_stream` manage background NDJSON log capture.
+- `capture_screenshot` and `capture_debug_snapshot` provide point-in-time observation artifacts.
+## 5. Existing Verification Mechanisms
+| Mechanism | Success rule | Determinism | Ambiguity |
+| --- | --- | --- | --- |
+| `expect_screen` | exact fingerprint equality, else exact screen-name equality | binary and deterministic | if only `screen` is provided, Android may use either fingerprint-derived `activity` or `get_current_screen` label |
+| `expect_element_visible` | delegated `wait_for_ui(condition:'visible')` reaches success | binary wrapper over deterministic wait | failure collapses to `TIMEOUT` or `UNKNOWN` |
+| `wait_for_ui` used as verification | requested condition becomes true | deterministic per poll inputs | descriptive output, not a dedicated verification result |
+| `wait_for_screen_change` | fingerprint changes and stays stable for one confirmation pass | deterministic | verifies change, not correctness of destination |
+| `classify_action_outcome` | ordered rule evaluation over provided UI/network/log inputs | deterministic pure function | if `networkRequests` omitted, it returns `unknown` with `nextAction: 'call_get_network_activity'`; `hasLogErrors` does not change the enum outcome |
+## 6. Action Result Semantics
+Across action tools, **success is not uniform**:
+1. **Execution success:** `tap`, `swipe`, `type_text`, `press_back`, `start_app`, `restart_app`, and `tap_element` mainly report that the command ran or the tap was dispatched.
+2. **Outcome success:** `scroll_to_element` reports success only if the target element was actually found during scrolling.
+3. **Boolean operation success:** `install_app`, `terminate_app`, and `reset_app_data` use tool-specific booleans (`installed`, `terminated`, `reset`) instead of the action envelope.
+Failure handling is **partly standardized**:
+- action-envelope tools use `failure_code` and `retryable`
+- manage tools often use plain booleans plus `error` strings
+- some handlers drop underlying diagnostics before the MCP response is built
+## 7. Failure Handling
+### Structured failure signals
+| Source | Structured signals |
+| --- | --- |
+| action envelope | `ELEMENT_NOT_FOUND`, `ELEMENT_NOT_INTERACTABLE`, `TIMEOUT`, `NAVIGATION_NO_CHANGE`, `AMBIGUOUS_TARGET`, `STALE_REFERENCE`, `UNKNOWN` |
+| `wait_for_ui` | `INVALID_SELECTOR`, `INVALID_CONDITION`, `PLATFORM_NOT_SUPPORTED`, `ELEMENT_NOT_FOUND`, `INTERNAL_ERROR` |
+| `expect_element_visible` | `failure_code: 'TIMEOUT'\|'UNKNOWN'`, `retryable` |
+| `classify_action_outcome` | `outcome: success\|no_op\|backend_failure\|ui_failure\|unknown` |
+| `get_network_activity` | per-request `status: success\|failure\|retryable` |
+### Unstructured failure signals
+- plain `error` strings from `install_app`, `build_app`, `build_and_install`, `find_element`, `start_log_stream`, many platform helpers
+- boolean-only failures from `terminate_app` and `reset_app_data`
+- top-level handler fallback: `Error executing tool <name>: ...` as plain text, not JSON
+### Retry / recovery logic present in implementation
+| Area | Observed logic |
+| --- | --- |
+| `wait_for_ui` | `retry.max_attempts` and `retry.backoff_ms` |
+| `scroll_to_element` | repeated swipes up to `maxScrolls` |
+| Android `install_app` | retries `pm install` with `-t` on test-only failure; has push + shell fallback |
+| iOS `install_app` | tries `simctl install`, may fall back to `idb` |
+| `get_ui_tree` | platform handlers retry up to three times |
+| `wait_for_screen_change` | one stability confirmation pass after a detected change |
+## 8. Execution Patterns (Observed)
+1. **Generic action wrapper**
+   `notifyActionStart()` → fingerprint before → platform action → fingerprint after → action envelope.
+2. **Resolved tap flow**
+   `wait_for_ui` returns `element.elementId` → `tap_element` uses cached element and current UI tree to validate it → tap → fingerprints before/after.
+3. **Visibility verification flow**
+   `expect_element_visible` is implemented as `wait_for_ui(... condition:'visible' ...)` plus a narrower binary result.
+4. **Screen verification flow**
+   `wait_for_screen_change` and `expect_screen` both depend on `get_screen_fingerprint`; `expect_screen` may additionally call `get_current_screen` on Android when matching by screen name.
+5. **Network correlation flow**
+   action tools that call `notifyActionStart()` create the time window used by `get_network_activity`; `classify_action_outcome` can then classify using supplied request summaries.
+6. **Snapshot/debug flow**
+   `capture_debug_snapshot` aggregates screenshot, current screen, fingerprint, UI tree, and logs in one call.
+## 9. Inconsistencies and Gaps
+1. **Response envelope mismatch:** most tools return wrapped JSON, but `get_logs`, `capture_screenshot`, and `build_and_install` use multi-block responses.
+2. **Unexpected-error shape mismatch:** uncaught handler failures become plain text strings, not structured JSON.
+3. **Action result mismatch:** some mutation tools use `ActionExecutionResult`; `install_app`, `terminate_app`, `reset_app_data`, and `build_and_install` do not.
+4. **Success semantics mismatch:** `scroll_to_element` success is outcome-based; most other action tools are execution-based.
+5. **Detail richness mismatch:** `start_app` and `restart_app` include `device` and rich `details`; other action-envelope tools usually omit raw error/details.
+6. **Failure-code derivation mismatch:** generic action wrappers infer `failure_code` by matching substrings in error text; `tap_element` assigns codes directly.
+7. **Dropped diagnostics:** handler-level MCP responses omit some underlying `diagnostics`/`error` detail, especially for `terminate_app`, `reset_app_data`, and `get_logs`.
+8. **`expect_element_visible` type/implementation mismatch:** the type allows `ELEMENT_NOT_FOUND`, but the implementation only emits `TIMEOUT` or `UNKNOWN`.
+9. **Platform mismatch:** `get_current_screen` is Android-only; `type_text` and `press_back` are Android-only; other tools are dual-platform.
+10. **Observation helper gap:** `waitForUICore` supports `ui`/`log`/`screen`/`idle` modes internally, but only the newer selector-based `wait_for_ui` is exposed as a tool.
+11. **Network-window coverage gap:** only tools that call `notifyActionStart()` reset the network activity window; `install_app`, `terminate_app`, and `reset_app_data` do not.
+12. **`classify_action_outcome` log input is secondary in name only:** `hasLogErrors` affects reasoning text for `no_op` but never changes the enum outcome.
+13. **`build_and_install` has dead autodetect code:** handler requires `platform` and `projectType`, but later still contains unreachable fallback autodetection branches.
+14. **Runtime object shape drift:** `list_devices` may return extra runtime fields like `appInstalled` and `booted` beyond the base `DeviceInfo` shape.
+## 10. Minimal Canonical Model (Derived, Not Invented)
+### Common action shape already present
+```ts
+{
+  action_id: string,
+  timestamp: string,
+  action_type: string,
+  target: {
+    selector: Record<string, unknown>,
+    resolved: Record<string, unknown> | null
+  },
+  success: boolean,
+  failure_code?: string,
+  retryable?: boolean,
+  ui_fingerprint_before: string | null,
+  ui_fingerprint_after: string | null,
+  device?: DeviceInfo,
+  details?: Record<string, unknown>
+}
+```
+This shape is already used directly or closely approximated by:
+- `start_app`
+- `restart_app`
+- `tap`
+- `tap_element`
+- `swipe`
+- `scroll_to_element`
+- `type_text`
+- `press_back`
+### Common observation/verification pattern already present
+```ts
+{
+  requested|expected: ...,
+  observed: ...,
+  success|status: boolean | 'success' | 'timeout',
+  metrics?|confidence?|comparison?|reason?
+}
+```
+Examples:
+- `wait_for_ui` → `requested`, `observed`, `metrics`
+- `expect_screen` → `expected_screen`, `observed_screen`, `comparison`
+- `expect_element_visible` → `selector`, `observed`, `reason`
+- `wait_for_screen_change` → previous vs observed/new fingerprint
+### Common failure signals already present
+- action failure codes from `ActionFailureCode`
+- wait/expect codes (`INVALID_*`, `ELEMENT_NOT_FOUND`, `TIMEOUT`, `UNKNOWN`)
+- network request statuses (`success`, `failure`, `retryable`)
+- fallback unstructured `error` strings
+### Common flow already present
+- resolve device
+- perform platform operation
+- optionally capture fingerprints before/after
+- return structured JSON, usually in one text block
+- perform verification in separate tools rather than as part of most actions

package/docs/specs/mcp-tooling-spec-v1.md ADDED Viewed

@@ -0,0 +1,281 @@
+# MCP Tooling Specification — Spec v1 (Refined)
+## 1. Scope
+This specification defines the runtime contract for MCP tools used to interact with mobile applications.
+It standardizes:
+- action execution semantics
+- verification model
+- failure handling
+- response shape constraints
+This spec is incremental and aligned with the current implementation. It does not introduce new tools or require architectural redesign.
+## 2. Core Model
+The system is based on a strict separation:
+- Action tools perform execution
+- Verification tools determine outcome
+- `wait_for_*` tools resolve and synchronize
+- Observation tools inspect state
+## 3. Execution Model
+Canonical flow for verifiable interactions:
+`RESOLVE -> ACT -> WAIT (optional) -> EXPECT`
+This flow applies when outcome verification is required.
+It does not apply to:
+- pure inspection tools
+- observation-only flows
+- non-verifiable or exploratory actions
+Outcome-specific guidance:
+- visible navigation expected -> `wait_for_screen_change` (optional) -> `expect_screen`
+- local UI change expected -> `wait_for_ui` (optional) -> `expect_element_visible`
+- backend/API activity expected without a visible UI change -> compare `get_screen_fingerprint` before/after, then call `get_network_activity` immediately after the action and `classify_action_outcome` with the observed requests
+For backend/API activity, `wait_for_screen_change` is not the right verification tool unless a visible transition is also expected.
+## 4. Action Tools
+### 4.1 Definition
+Action tools mutate application state.
+Includes:
+`start_app`, `restart_app`, `tap`, `tap_element`, `swipe`, `scroll_to_element`, `type_text`, `press_back`
+### 4.2 Required Semantics
+- `success` MUST represent execution success only
+- execution success means the platform command was dispatched without error
+- `success` MUST NOT imply outcome success
+### 4.3 Action Envelope
+MUST be returned in this structure:
+```ts
+{
+  action_id: string,
+  timestamp: string,
+  action_type: string,
+  target: {
+    selector: object,
+    resolved: object | null
+  },
+  success: boolean,
+  ui_fingerprint_before: string | null,
+  ui_fingerprint_after: string | null,
+  failure_code?: string,
+  retryable?: boolean,
+  device?: DeviceInfo,
+  details?: object
+}
+```
+Rules:
+- `success` is at the top level, not nested
+- `target` contains only selection and resolution context
+- fingerprints represent observed pre/post UI state on a best-effort basis
+- `failure_code` is optional but MUST be used when a structured mapping exists
+### 4.4 Allowed Deviations
+Explicit temporary exceptions:
+- `install_app`, `terminate_app`, `reset_app_data` do not use this envelope
+- `scroll_to_element` may temporarily retain outcome-based success semantics
+- partial `failure_code` coverage is allowed
+- detail richness may vary across tools
+## 5. Verification Tools
+### 5.1 Definition
+Verification tools determine whether the intended outcome occurred.
+Primary:
+- `expect_screen`
+- `expect_element_visible`
+### 5.2 Required Semantics
+- MUST return `success` as a boolean
+- `success` MUST represent outcome truth
+- MUST be binary and deterministic
+Optional fields do not affect `success`:
+`observed`, `expected`, `comparison`, `reason`, `confidence`
+### 5.3 Authoritative Role
+Verification tools are the only authoritative source of outcome truth.
+Action tools MUST NOT be used to infer outcome success.
+### 5.4 Applicability Rules
+An `expect_*` tool is applicable when:
+- expected destination screen is known -> `expect_screen`
+- expected UI element state is known -> `expect_element_visible`
+- outcome is explicitly defined or testable
+Rules:
+- `wait_for_*` MAY be used before `expect_*` for synchronization
+- `wait_for_*` MUST NOT replace `expect_*` when an applicable `expect_*` tool exists
+- when no applicable `expect_*` tool exists, `expect_*` MAY be skipped
+## 6. wait_for_* Tools
+### 6.1 Definition
+`wait_for_*` tools provide deterministic resolution and synchronization.
+Examples:
+- `wait_for_ui`
+- `wait_for_screen_change`
+### 6.2 Rules
+- MAY resolve UI elements
+- MAY synchronize UI/system state
+- MUST NOT be treated as final verification when `expect_*` is applicable
+### 6.3 Semantics
+- `success` indicates condition met or resolution succeeded
+- `success` does NOT indicate outcome correctness
+## 7. Failure Semantics
+### 7.1 Canonical Codes
+- `ELEMENT_NOT_FOUND`
+- `ELEMENT_NOT_INTERACTABLE`
+- `TIMEOUT`
+- `NAVIGATION_NO_CHANGE`
+- `AMBIGUOUS_TARGET`
+- `STALE_REFERENCE`
+- `UNKNOWN`
+### 7.2 Rules
+- `failure_code` MUST be used when a structured mapping exists
+- `failure_code` MUST NOT be replaced by string errors
+- string errors MAY exist for diagnostics only
+- not all tools must emit all codes
+### 7.3 Scope
+Applies to:
+- action tools
+- verification tools
+- `wait_for_ui`-style tools
+## 8. Response Shape
+### 8.1 Default
+All responses MUST be a single JSON text block.
+### 8.2 Allowed Exceptions
+Multi-block responses are allowed only for:
+- `get_logs`
+- `capture_screenshot`
+- `build_and_install`
+### 8.3 Errors
+All handler/runtime errors MUST be JSON-wrapped.
+String-only errors are not allowed, including fallback handler errors.
+Note: string diagnostics may still appear inside structured JSON payloads where explicitly defined by a tool.
+## 9. Classification
+Tool: `classify_action_outcome`
+Rules:
+- MAY use UI, network, and log signals
+- MUST be deterministic
+- MUST NOT replace `expect_*` tools
+- MUST be treated as a supplementary signal only
+- SHOULD be used with `get_network_activity` when the expected outcome is backend/API activity without a visible UI change
+It is not a verification mechanism.
+## 10. Execution Patterns
+Canonical pattern:
+`wait_for_ui -> tap_element -> wait_for_screen_change (optional) -> expect_screen`
+Interpretation:
+- `tap_element.success` = executed
+- `wait_for_screen_change.success` = UI changed
+- `expect_screen.success` = correct outcome verified
+## 11. Known Deviations
+Explicitly allowed:
+- `install_app`, `terminate_app`, `reset_app_data` not using envelope
+- `build_and_install` streaming NDJSON
+- platform-specific tools
+- partial failure coverage
+- `scroll_to_element` outcome-based success (temporary exception)
+- extended runtime fields in `list_devices`
+## 12. Migration Rules
+Must change now:
+- uncaught errors must be JSON-wrapped
+Should align when touched:
+- `tap`, `swipe`, `type_text`, `press_back`
+- `start_app`, `restart_app`
+- `scroll_to_element`
+- `wait_for_ui`
+No change required:
+- `tap_element`
+- `expect_screen`
+- `expect_element_visible`
+- `wait_for_screen_change`
+## 13. Guiding Principles
+- Actions execute
+- Verification proves
+- Waiting synchronizes
+- Classification assists
+## Final Definition
+Action success equals execution success.
+Outcome success equals verification success.
+Verification tools are authoritative when the expected outcome is defined.

package/docs/tools/interact.md CHANGED Viewed

@@ -33,7 +33,7 @@ Example response:
 ```json
 {
   "action_id": "tap_1710000000000_1",
-  "timestamp": 1710000000000,
+  "timestamp": "2026-04-23T08:00:00.000Z",
   "action_type": "tap",
   "target": { "selector": { "x": 100, "y": 200 }, "resolved": null },
   "success": true,
@@ -53,6 +53,10 @@ Preferred verification:
 - navigation outcome known -> `expect_screen`
 - local UI change known -> `expect_element_visible`
+- backend/API activity expected -> `classify_action_outcome` + `get_network_activity`
+Use `wait_for_screen_change` only when a visible transition is the expected outcome. If a button should trigger an API request but the screen should stay the same, rely on network activity and classification instead.
+For backend-only actions, prefer comparing `get_screen_fingerprint` before/after and call `get_network_activity` immediately after the action; do not wait on `wait_for_screen_change` if no visible transition is expected.
 ---
@@ -139,6 +143,7 @@ Notes:
 - Treats `null` fingerprints as transient and keeps polling.
 - Adds a stability confirmation before returning success to avoid transient animation frames.
 - Follow with `expect_screen` when the expected destination is known.
+- Do not use this as the main success check for backend/API activity that does not change the visible UI.
 ---
@@ -303,7 +308,7 @@ Success response:
 ```json
 {
   "action_id": "tap_element_1710000000000_1",
-  "timestamp": 1710000000000,
+  "timestamp": "2026-04-23T08:00:00.000Z",
   "action_type": "tap_element",
   "target": {
     "selector": { "elementId": "el_123" },
@@ -328,7 +333,7 @@ Failure response:
 ```json
 {
   "action_id": "tap_element_1710000000001_2",
-  "timestamp": 1710000000001,
+  "timestamp": "2026-04-23T08:00:00.001Z",
   "action_type": "tap_element",
   "target": { "selector": { "elementId": "el_123" }, "resolved": null },
   "success": false,
@@ -451,3 +456,22 @@ Notes:
 - The tool resolves the selector internally when needed.
 - On failure, `reason` and `observed` tell you whether the selector was missing entirely or present but not yet visible.
 - Use when the screen should remain on the same destination but a specific element should appear or become visible.
+---
+## classify_action_outcome + get_network_activity
+Use this pair when the action is expected to trigger network/backend work and the screen may not visibly change.
+Pattern:
+1. perform the action
+2. call `classify_action_outcome` with `uiChanged` from `wait_for_screen_change` or a screen fingerprint comparison
+3. if the classifier asks for it, call `get_network_activity`
+4. call `classify_action_outcome` again with `networkRequests`
+Guidance:
+- `uiChanged=true` or `expectedElementVisible=true` means the action outcome is already verified
+- `nextAction="call_get_network_activity"` means the UI signal was inconclusive and the agent should inspect network activity
+- if network requests succeed but the UI stays unchanged, treat the outcome as a backend/API result rather than a screen transition

package/docs/tools/manage.md CHANGED Viewed

@@ -121,7 +121,7 @@ start_app response example:
 ```json
 {
   "action_id": "start_app_1710000000000_1",
-  "timestamp": 1710000000000,
+  "timestamp": "2026-04-23T08:00:00.000Z",
   "action_type": "start_app",
   "device": { "platform": "android", "id": "emulator-5554", "osVersion": "14", "model": "Pixel", "simulator": true },
   "target": { "selector": { "appId": "com.example.app" }, "resolved": null },

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "mobile-debug-mcp",
-  "version": "0.24.2",
+  "version": "0.24.4",
   "description": "MCP server for mobile app debugging (Android + iOS), with focus on security and reliability",
   "type": "module",
   "bin": {

package/src/interact/index.ts CHANGED Viewed

@@ -146,7 +146,7 @@ export class ToolsInteract {
   private static _actionFailure(
     actionId: string,
-    timestamp: number,
+    timestamp: string,
     actionType: string,
     selector: Record<string, unknown> | null,
     resolved: ActionTargetResolved | null,
@@ -254,9 +254,10 @@ export class ToolsInteract {
   }
   static async tapElementHandler({ elementId }: { elementId: string }): Promise<TapElementResponse> {
-    const timestamp = Date.now()
+    const timestampMs = Date.now()
+    const timestamp = new Date(timestampMs).toISOString()
     const actionType = 'tap_element'
-    const actionId = nextActionId(actionType, timestamp)
+    const actionId = nextActionId(actionType, timestampMs)
     const selector = { elementId }
     const resolved = ToolsInteract._resolvedUiElements.get(elementId)
     if (!resolved) {
@@ -304,6 +305,7 @@ export class ToolsInteract {
       action_id: actionId,
       timestamp,
       action_type: actionType,
+      ...(tree?.device ? { device: tree.device } : {}),
       target: {
         selector,
         resolved: resolvedTarget

package/src/server/common.ts CHANGED Viewed

@@ -82,9 +82,10 @@ export function buildActionExecutionResult({
   failure?: { failureCode: ActionFailureCode; retryable: boolean }
   details?: Record<string, unknown>
 }): ActionExecutionResult {
-  const timestamp = Date.now()
+  const timestampMs = Date.now()
+  const timestamp = new Date(timestampMs).toISOString()
   return {
-    action_id: nextActionId(actionType, timestamp),
+    action_id: nextActionId(actionType, timestampMs),
     timestamp,
     action_type: actionType,
     ...(device ? { device } : {}),
@@ -99,3 +100,28 @@ export function buildActionExecutionResult({
     ...(details ? { details } : {})
   }
 }
+export function wrapToolError(name: string, error: unknown) {
+  const message = error instanceof Error
+    ? error.message
+    : typeof error === 'object' && error !== null
+      ? (() => {
+          try {
+            return JSON.stringify(error, null, 2)
+          } catch {
+            return '[unserializable error object]'
+          }
+        })()
+      : String(error)
+  return {
+    content: [{
+      type: 'text' as const,
+      text: JSON.stringify({
+        error: {
+          tool: name,
+          message
+        }
+      }, null, 2)
+    }]
+  }
+}

package/src/server/tool-definitions.ts CHANGED Viewed

@@ -10,7 +10,7 @@ Inputs:
 - deviceId (optional)
 Output Structure:
-- action_id, timestamp, action_type
+ - action_id, timestamp (ISO 8601), action_type
 - target.selector = { appId }
 - success = true when launch was dispatched successfully
 - failure_code/retryable when launch dispatch fails
@@ -83,7 +83,7 @@ Inputs:
 - deviceId (optional)
 Output Structure:
-- action_id, timestamp, action_type
+ - action_id, timestamp (ISO 8601), action_type
 - target.selector = { appId }
 - success = true when the restart command completed
 - failure_code/retryable when restart dispatch fails
@@ -344,6 +344,7 @@ Capabilities:
 Constraints:
 - Does not verify correctness of the resulting state
 - Must not be used alone to confirm action success when an applicable expect_* tool exists
+- Use classify_action_outcome + get_network_activity when the expected outcome is backend/API activity without a visible UI change
 Recommended Usage:
 1. Capture or define the expected outcome
@@ -532,7 +533,7 @@ Inputs:
 - deviceId (optional)
 Output Structure:
-- action_id, timestamp, action_type
+ - action_id, timestamp (ISO 8601), action_type
 - target.selector = { x, y }
 - success = true when the tap was dispatched
 - failure_code/retryable when dispatch fails
@@ -587,7 +588,7 @@ Inputs:
 Output Structure:
 - action_id: unique timestamp-based action identifier
-- timestamp: epoch milliseconds for the action attempt
+- timestamp: ISO 8601 timestamp for the action attempt
 - action_type: "tap_element"
 - target.selector: original target handle ({ elementId })
 - target.resolved: minimal resolved element info used for the tap
@@ -640,7 +641,7 @@ Inputs:
 - platform/deviceId (optional)
 Output Structure:
-- action_id, timestamp, action_type
+- action_id, timestamp (ISO 8601), action_type
 - target.selector = { x1, y1, x2, y2, duration }
 - success = true when the swipe was dispatched
 - failure_code/retryable when dispatch fails
@@ -692,7 +693,7 @@ Inputs:
 - direction, maxScrolls, scrollAmount, deviceId (optional)
 Output Structure:
-- action_id, timestamp, action_type
+- action_id, timestamp (ISO 8601), action_type
 - target.selector = original selector
 - target.resolved = minimal resolved element info when found
 - success = true when scrolling produced a visible target element
@@ -746,7 +747,7 @@ Inputs:
 - platform/deviceId (optional)
 Output Structure:
-- action_id, timestamp, action_type
+- action_id, timestamp (ISO 8601), action_type
 - target.selector = { text }
 - success = true when text input was dispatched
 - failure_code/retryable when dispatch fails
@@ -795,7 +796,7 @@ Inputs:
 - platform/deviceId (optional)
 Output Structure:
-- action_id, timestamp, action_type
+- action_id, timestamp (ISO 8601), action_type
 - target.selector = { key: "back" }
 - success = true when the back action was dispatched
 - failure_code/retryable when dispatch fails
@@ -835,6 +836,8 @@ Failure Handling:
     description: `Classify the outcome of the most recent action into exactly one of: success, no_op, backend_failure, ui_failure, unknown.
 MUST be called after every action (tap, swipe, type_text, press_back, start_app, etc). Never skip.
+Use this with get_network_activity when the expected outcome is backend/API activity without a visible UI change.
+For backend/API activity, compare get_screen_fingerprint before and after the action and call get_network_activity immediately after the action instead of waiting for wait_for_screen_change.
 HOW TO GATHER INPUTS before calling:
 1. Call wait_for_screen_change or compare get_screen_fingerprint before/after — set uiChanged accordingly.
@@ -868,7 +871,7 @@ BEHAVIOUR after outcome:
         },
         networkRequests: {
           type: 'array',
-          description: 'Pass this only after calling get_network_activity as instructed by nextAction. Map each request to endpoint + status.',
+          description: 'Pass this only after calling get_network_activity as instructed by nextAction. Also use it when the expected outcome is backend/API activity without a visible UI change.',
           items: {
             type: 'object',
             properties: {
@@ -890,7 +893,7 @@ BEHAVIOUR after outcome:
     name: 'get_network_activity',
     description: `Returns structured network events captured from platform logs since the last action.
-Call this only when classify_action_outcome returns nextAction="call_get_network_activity".
+Call this when classify_action_outcome returns nextAction="call_get_network_activity" or immediately after an action whose expected outcome is backend/API activity without a visible UI change.
 Do not call more than once per action.
 Events are filtered to significant (non-background) requests only.

package/src/server/tool-handlers.ts CHANGED Viewed

@@ -16,7 +16,8 @@ import {
   inferScrollFailure,
   ToolCallArgs,
   ToolHandler,
-  wrapResponse
+  wrapResponse,
+  wrapToolError
 } from './common.js'
 async function handleStartApp(args: ToolCallArgs) {
@@ -375,8 +376,7 @@ export async function handleToolCall(name: string, args: ToolCallArgs = {}) {
   try {
     return await handler(args)
   } catch (error) {
-    return {
-      content: [{ type: 'text' as const, text: `Error executing tool ${name}: ${error instanceof Error ? error.message : String(error)}` }]
-    }
+    console.error(`Error executing tool ${name}:`, error)
+    return wrapToolError(name, error)
   }
 }

package/src/types.ts CHANGED Viewed

@@ -173,7 +173,7 @@ export interface ActionTargetResolved {
 export interface ActionExecutionResult {
   action_id: string;
-  timestamp: number;
+  timestamp: string;
   action_type: string;
   device?: DeviceInfo;
   target: {

package/test/unit/server/contract.test.ts CHANGED Viewed

@@ -26,6 +26,7 @@ async function run() {
   assert(waitForScreenChange, 'wait_for_screen_change should be registered')
   assert.match((waitForScreenChange as any).description, /does not verify correctness of the resulting state/i)
   assert.match((waitForScreenChange as any).description, /follow with expect_screen/i)
+  assert.match((waitForScreenChange as any).description, /backend\/API activity without a visible UI change/i)
   const captureDebugSnapshot = toolDefinitions.find((tool) => tool.name === 'capture_debug_snapshot')
   assert(captureDebugSnapshot, 'capture_debug_snapshot should be registered')
@@ -60,6 +61,18 @@ async function run() {
   assert.match((expectElementVisible as any).description, /selector is the primary input/i)
   assert.match((expectElementVisible as any).description, /Returns structured binary success\/failure only/i)
+  const classifyActionOutcome = toolDefinitions.find((tool) => tool.name === 'classify_action_outcome')
+  assert(classifyActionOutcome, 'classify_action_outcome should be registered')
+  assert.match((classifyActionOutcome as any).description, /backend\/API activity without a visible UI change/i)
+  assert.match((classifyActionOutcome as any).description, /get_network_activity/i)
+  assert.match((classifyActionOutcome as any).description, /immediately after the action/i)
+  const getNetworkActivity = toolDefinitions.find((tool) => tool.name === 'get_network_activity')
+  assert(getNetworkActivity, 'get_network_activity should be registered')
+  assert.match((getNetworkActivity as any).description, /backend\/API activity without a visible UI change/i)
+  assert.doesNotMatch((getNetworkActivity as any).description, /Call this only when/i)
+  assert.match((getNetworkActivity as any).description, /immediately after an action/i)
   await assert.rejects(() => handleToolCall('unknown_tool'), /Unknown tool: unknown_tool/)
   console.log('server contract tests passed')

package/test/unit/server/response_shapes.test.ts CHANGED Viewed

@@ -47,7 +47,7 @@ async function run() {
     ;(ToolsInteract as any).tapElementHandler = async () => ({
       action_id: 'tap_element_1',
-      timestamp: 1234567890,
+      timestamp: '2026-04-23T08:00:00.000Z',
       action_type: 'tap_element',
       target: {
         selector: { elementId: 'el_ready' },
@@ -62,6 +62,7 @@ async function run() {
     const tapElementPayload = JSON.parse((tapElementResponse as any).content[0].text)
     assert.strictEqual(tapElementPayload.success, true)
     assert.strictEqual(tapElementPayload.action_type, 'tap_element')
+    assert.match(tapElementPayload.timestamp, /^\d{4}-\d{2}-\d{2}T/)
     assert.strictEqual(tapElementPayload.target.resolved.elementId, 'el_ready')
     assert.strictEqual(tapElementPayload.ui_fingerprint_before, 'fp_before')
@@ -71,6 +72,7 @@ async function run() {
     const tapPayload = JSON.parse((tapResponse as any).content[0].text)
     assert.strictEqual(tapPayload.success, true)
     assert.strictEqual(tapPayload.action_type, 'tap')
+    assert.match(tapPayload.timestamp, /^\d{4}-\d{2}-\d{2}T/)
     assert.deepStrictEqual(tapPayload.target.selector, { x: 1, y: 2 })
     assert.strictEqual(tapPayload.ui_fingerprint_before, 'fp_mock')
@@ -93,6 +95,7 @@ async function run() {
     const startAppPayload = JSON.parse((startAppResponse as any).content[0].text)
     assert.strictEqual(startAppPayload.success, true)
     assert.strictEqual(startAppPayload.action_type, 'start_app')
+    assert.match(startAppPayload.timestamp, /^\d{4}-\d{2}-\d{2}T/)
     assert.strictEqual(startAppPayload.device.id, 'emulator-5554')
     assert.deepStrictEqual(startAppPayload.target.selector, { appId: 'com.example.app' })
     assert.strictEqual(startAppPayload.details.launch_time_ms, 123)
@@ -128,6 +131,30 @@ async function run() {
     assert.strictEqual(expectElementPayload.element_id, 'el_ready')
     assert.strictEqual(expectElementPayload.expected_condition, 'visible')
+    ;(ToolsInteract as any).tapHandler = async () => {
+      throw new Error('boom')
+    }
+    const failingTapResponse = await handleToolCall('tap', { platform: 'android', x: 1, y: 2 })
+    assert.strictEqual((failingTapResponse as any).content.length, 1)
+    const failingTapPayload = JSON.parse((failingTapResponse as any).content[0].text)
+    assert.deepStrictEqual(failingTapPayload, {
+      error: {
+        tool: 'tap',
+        message: 'boom'
+      }
+    })
+    ;(ToolsInteract as any).tapHandler = async () => {
+      throw { code: 'E_CUSTOM', detail: { field: 'value' } }
+    }
+    const objectTapResponse = await handleToolCall('tap', { platform: 'android', x: 1, y: 2 })
+    const objectTapPayload = JSON.parse((objectTapResponse as any).content[0].text)
+    assert.strictEqual(objectTapPayload.error.tool, 'tap')
+    assert.match(objectTapPayload.error.message, /"code": "E_CUSTOM"/)
+    assert.match(objectTapPayload.error.message, /"field": "value"/)
     ;(ToolsObserve as any).captureScreenshotHandler = async () => ({
       device: { platform: 'ios', id: 'booted', osVersion: '18.0', model: 'Simulator', simulator: true },
       screenshot: Buffer.from('png-data').toString('base64'),