npm - mobile-debug-mcp - Versions diffs - 0.25.1 → 0.26.1 - Mend

mobile-debug-mcp 0.25.1 → 0.26.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

package/dist/interact/classify.js +48 -11
package/dist/interact/index.js +113 -0
package/dist/observe/android.js +10 -1
package/dist/observe/index.js +19 -1
package/dist/observe/ios.js +15 -1
package/dist/observe/snapshot-metadata.js +88 -0
package/dist/server/tool-definitions.js +49 -14
package/dist/server/tool-handlers.js +12 -0
package/dist/server-core.js +1 -1
package/docs/CHANGELOG.md +9 -0
package/docs/ROADMAP.md +66 -38
package/docs/rfcs/003-wait-and-synchronization-reliability.md +296 -0
package/docs/rfcs/004-action-verification-routing.md +342 -0
package/docs/specs/mcp-tooling-spec-v1.md +11 -3
package/docs/tools/interact.md +31 -8
package/docs/tools/observe.md +4 -2
package/package.json +1 -1
package/skills/rfc-review/SKILL.md +52 -0
package/skills/rfc-review/references/rfc-review-checklist.md +12 -0
package/skills/rfc-review/references/rfc-review-template.md +28 -0
package/src/interact/classify.ts +53 -13
package/src/interact/index.ts +151 -0
package/src/observe/android.ts +11 -1
package/src/observe/index.ts +26 -1
package/src/observe/ios.ts +28 -13
package/src/observe/snapshot-metadata.ts +107 -0
package/src/server/tool-definitions.ts +49 -14
package/src/server/tool-handlers.ts +13 -0
package/src/server-core.ts +1 -1
package/src/types.ts +23 -0
package/test/unit/interact/classify_action_outcome.test.ts +44 -25
package/test/unit/interact/wait_for_ui_change.test.ts +76 -0
package/test/unit/server/contract.test.ts +8 -6
package/test/unit/server/response_shapes.test.ts +37 -3
package/docs/rfcs/003-wait-and-synchronization-reliability +0 -232

package/src/server/tool-definitions.ts CHANGED Viewed

@@ -240,7 +240,7 @@ Failure Handling:
   },
   {
     name: 'capture_debug_snapshot',
-    description: 'Capture a complete debug snapshot (raw observation layer plus optional derived semantic layer). Returns structured JSON.',
+    description: 'Capture a complete debug snapshot (raw observation layer plus optional derived semantic layer). Returns structured JSON with snapshot_revision, captured_at_ms, and loading_state when detectable.',
     inputSchema: {
       type: 'object',
       properties: {
@@ -291,7 +291,7 @@ Failure Handling:
   },
   {
     name: 'get_ui_tree',
-    description: 'Get the current UI hierarchy from an Android device or iOS simulator. Returns a structured JSON representation of the screen content.',
+    description: 'Get the current UI hierarchy from an Android device or iOS simulator. Returns a structured JSON representation of the screen content with snapshot metadata when available.',
     inputSchema: {
       type: 'object',
       properties: {
@@ -344,7 +344,7 @@ Capabilities:
 Constraints:
 - Does not verify correctness of the resulting state
 - Must not be used alone to confirm action success when an applicable expect_* tool exists
-- Use classify_action_outcome + get_network_activity when the expected outcome is backend/API activity without a visible UI change
+- For backend/API activity without a visible UI change, pass the runtime action_type into classify_action_outcome and collect network evidence only if the result remains ambiguous
 Recommended Usage:
 1. Capture or define the expected outcome
@@ -363,6 +363,34 @@ Recommended Usage:
       required: ['previousFingerprint']
     }
   },
+  {
+    name: 'wait_for_ui_change',
+    description: `Purpose:
+Wait for a non-navigation UI mutation or in-place update to become stable.
+Inputs:
+- expected_change (optional): hierarchy_diff, text_change, or state_change
+- timeout_ms (optional)
+- stability_window_ms (optional)
+Guidance:
+- Prefer wait_for_screen_change for navigation transitions.
+- Prefer wait_for_ui_change for in-place mutations and non-navigation updates.
+- Use the returned snapshot_revision as the observed synchronization point when available.
+Failure Handling:
+- TIMEOUT means the UI did not change in a stable way within the allotted time.`,
+    inputSchema: {
+      type: 'object',
+      properties: {
+        platform: { type: 'string', enum: ['android', 'ios'], description: 'Optional platform override (android|ios)' },
+        deviceId: { type: 'string', description: 'Optional device id/udid to target' },
+        expected_change: { type: 'string', enum: ['hierarchy_diff', 'text_change', 'state_change'], description: 'Optional type of UI change to wait for' },
+        timeout_ms: { type: 'number', description: 'Timeout in ms to wait for change (default 60000)', default: 60000 },
+        stability_window_ms: { type: 'number', description: 'How long the change must remain stable before success (default 250)', default: 250 }
+      }
+    }
+  },
   {
     name: 'expect_screen',
     description: `Purpose:
@@ -890,26 +918,29 @@ Failure Handling:
     name: 'classify_action_outcome',
     description: `Classify the outcome of the most recent action into exactly one of: success, no_op, backend_failure, ui_failure, unknown.
-MUST be called after every action (tap, swipe, type_text, press_back, start_app, etc). Never skip.
-Use this with get_network_activity when the expected outcome is backend/API activity without a visible UI change.
-For backend/API activity, compare get_screen_fingerprint before and after the action and call get_network_activity immediately after the action instead of waiting for wait_for_screen_change.
+Use the runtime action result's \`action_type\` as \`actionType\` so the classifier can distinguish local-state actions from side-effect actions.
+Use this when the intended outcome is not already fully verified by the UI signal alone.
+For backend/API activity, compare get_screen_fingerprint before and after the action and call get_network_activity immediately after the action if the outcome is still ambiguous.
 HOW TO GATHER INPUTS before calling:
 1. Call wait_for_screen_change or compare get_screen_fingerprint before/after — set uiChanged accordingly.
 2. If you checked for a specific element with wait_for_ui, set expectedElementVisible.
-3. Do NOT call get_network_activity yet — omit networkRequests on the first call.
+3. Pass actionType from the action response when available.
+4. Only provide networkRequests if you already collected them or want to classify a side-effect action with backend evidence.
 RULES (applied in order — stop at first match):
 1. If uiChanged=true OR expectedElementVisible=true → outcome=success
-2. Otherwise this tool returns nextAction="call_get_network_activity" — you MUST call get_network_activity once, then call classify_action_outcome again with the results in networkRequests.
+2. If actionType is missing → outcome=unknown
 3. If any request has status=failure or retryable → outcome=backend_failure
-4. If no requests returned → outcome=no_op
-5. If all requests succeeded → outcome=ui_failure
-6. Otherwise → outcome=unknown
+4. If actionType maps to a local-state action → outcome=no_op; prefer state-based verification and avoid default network fallback
+5. If actionType maps to a side-effect action and no networkRequests were supplied → outcome=unknown
+6. If no requests returned → outcome=no_op
+7. If all requests succeeded → outcome=ui_failure
+8. Otherwise → outcome=unknown
 BEHAVIOUR after outcome:
 - success → continue
-- no_op → retry the action once or re-resolve the element
+- no_op → retry with richer state verification or re-resolve the element
 - backend_failure → stop and report the failing endpoint
 - ui_failure → stop and report failure
 - unknown → take one recovery step (e.g. capture_debug_snapshot), then stop`,
@@ -924,9 +955,13 @@ BEHAVIOUR after outcome:
           type: 'boolean',
           description: 'true if the element you expected to appear is now visible (from wait_for_ui). Omit if you did not check for a specific element.'
         },
+        actionType: {
+          type: 'string',
+          description: 'The runtime action_type from the action response (for example tap, tap_element, swipe, type_text, press_back, start_app).'
+        },
         networkRequests: {
           type: 'array',
-          description: 'Pass this only after calling get_network_activity as instructed by nextAction. Also use it when the expected outcome is backend/API activity without a visible UI change.',
+          description: 'Optional network evidence collected after the action. Use it when the expected outcome is backend/API activity or when the UI signal is ambiguous.',
           items: {
             type: 'object',
             properties: {
@@ -948,7 +983,7 @@ BEHAVIOUR after outcome:
     name: 'get_network_activity',
     description: `Returns structured network events captured from platform logs since the last action.
-Call this when classify_action_outcome returns nextAction="call_get_network_activity" or immediately after an action whose expected outcome is backend/API activity without a visible UI change.
+Call this immediately after an action when you want backend evidence for a side-effect flow, only if the result is still ambiguous.
 Do not call more than once per action.
 Events are filtered to significant (non-background) requests only.

package/src/server/tool-handlers.ts CHANGED Viewed

@@ -288,6 +288,16 @@ async function handleWaitForUI(args: ToolCallArgs) {
   return wrapResponse(res)
 }
+async function handleWaitForUIChange(args: ToolCallArgs) {
+  const platform = getStringArg(args, 'platform') as PlatformArg | undefined
+  const deviceId = getStringArg(args, 'deviceId')
+  const timeout_ms = getNumberArg(args, 'timeout_ms') ?? 60000
+  const stability_window_ms = getNumberArg(args, 'stability_window_ms') ?? 250
+  const expected_change = getStringArg(args, 'expected_change') as 'hierarchy_diff' | 'text_change' | 'state_change' | undefined
+  const res = await ToolsInteract.waitForUIChangeHandler({ platform, deviceId, timeout_ms, stability_window_ms, expected_change })
+  return wrapResponse(res)
+}
 async function handleFindElement(args: ToolCallArgs) {
   const query = requireStringArg(args, 'query')
   const exact = getBooleanArg(args, 'exact') ?? false
@@ -438,11 +448,13 @@ async function handleStopLogStream(args: ToolCallArgs) {
 function handleClassifyActionOutcome(args: ToolCallArgs) {
   const uiChanged = requireBooleanArg(args, 'uiChanged')
   const expectedElementVisible = getBooleanArg(args, 'expectedElementVisible')
+  const actionType = getStringArg(args, 'actionType')
   const networkRequests = getArrayArg<ClassifyNetworkRequestArg>(args, 'networkRequests')
   const hasLogErrors = getBooleanArg(args, 'hasLogErrors')
   const result = classifyActionOutcome({
     uiChanged,
     expectedElementVisible: expectedElementVisible ?? null,
+    actionType: actionType ?? null,
     networkRequests: networkRequests ?? null,
     hasLogErrors: hasLogErrors ?? null
   })
@@ -473,6 +485,7 @@ export const toolHandlers: Record<string, ToolHandler> = {
   get_current_screen: handleGetCurrentScreen,
   get_screen_fingerprint: handleGetScreenFingerprint,
   wait_for_screen_change: handleWaitForScreenChange,
+  wait_for_ui_change: handleWaitForUIChange,
   expect_screen: handleExpectScreen,
   expect_element_visible: handleExpectElementVisible,
   expect_state: handleExpectState,

package/src/server-core.ts CHANGED Viewed

@@ -13,7 +13,7 @@ export { wrapResponse, toolDefinitions, handleToolCall }
 export const serverInfo = {
   name: 'mobile-debug-mcp',
-  version: '0.25.1'
+  version: '0.26.1'
 }
 export function createServer() {

package/src/types.ts CHANGED Viewed

@@ -109,6 +109,12 @@ export interface UIElementSemanticMetadata {
   is_container: boolean;
 }
+export interface LoadingState {
+  active: boolean;
+  signal: string;
+  source: string;
+}
 export interface CaptureAndroidScreenResponse {
   device: DeviceInfo;
   screenshot: string; // base64 encoded string
@@ -162,6 +168,9 @@ export interface GetUITreeResponse {
     height: number;
   };
   elements: UIElement[];
+  snapshot_revision: number;
+  captured_at_ms: number;
+  loading_state?: LoadingState | null;
   error?: string;
 }
@@ -183,12 +192,15 @@ export interface SnapshotSemanticResponse {
 export interface CaptureDebugSnapshotRawResponse {
   timestamp: number;
+  snapshot_revision: number;
+  captured_at_ms: number;
   reason: string;
   activity: string | null;
   fingerprint: string | null;
   screenshot: string | null;
   ui_tree: GetUITreeResponse | null;
   logs: StructuredLogEntry[];
+  loading_state?: LoadingState | null;
   device?: DeviceInfo;
   screenshot_error?: string;
   activity_error?: string;
@@ -326,6 +338,17 @@ export interface ExpectStateResponse {
   retryable?: boolean;
 }
+export interface WaitForUIChangeResponse {
+  success: boolean;
+  observed_change: 'hierarchy_diff' | 'text_change' | 'state_change' | null;
+  snapshot_revision?: number;
+  timeout: boolean;
+  elapsed_ms: number;
+  expected_change?: 'hierarchy_diff' | 'text_change' | 'state_change';
+  reason?: string;
+  loading_state?: LoadingState | null;
+}
 export interface SwipeResponse {
   device: DeviceInfo;
   success: boolean;

package/test/unit/interact/classify_action_outcome.test.ts CHANGED Viewed

@@ -7,7 +7,6 @@ function run() {
     const result = classifyActionOutcome({ uiChanged: true })
     assert.strictEqual(result.outcome, 'success')
     assert.ok(result.reasoning.length > 0)
-    assert.strictEqual(result.nextAction, undefined)
   }
   // Step 1 — expectedElementVisible → success
@@ -15,7 +14,6 @@ function run() {
     const result = classifyActionOutcome({ uiChanged: false, expectedElementVisible: true })
     assert.strictEqual(result.outcome, 'success')
     assert.strictEqual(result.reasoning, 'expected element is visible')
-    assert.strictEqual(result.nextAction, undefined)
   }
   // Step 1 — both uiChanged and expectedElementVisible → success
@@ -24,24 +22,50 @@ function run() {
     assert.strictEqual(result.outcome, 'success')
   }
-  // Step 2 — UI did not change, networkRequests not yet provided → nextAction required
+  // No actionType supplied → unknown
   {
     const result = classifyActionOutcome({ uiChanged: false })
     assert.strictEqual(result.outcome, 'unknown')
-    assert.strictEqual(result.nextAction, 'call_get_network_activity')
+    assert.ok(result.reasoning.includes('actionType was not supplied'))
   }
-  // Step 2 — explicit null networkRequests → nextAction required
+  // Local-state action routes to state verification rather than forced network probing
   {
-    const result = classifyActionOutcome({ uiChanged: false, expectedElementVisible: null, networkRequests: null })
+    const result = classifyActionOutcome({ uiChanged: false, actionType: 'tap' })
+    assert.strictEqual(result.outcome, 'no_op')
+    assert.ok(result.reasoning.includes('local-state action'))
+  }
+  // Local-state action with network data still prefers local-state semantics
+  {
+    const result = classifyActionOutcome({
+      uiChanged: false,
+      actionType: 'type_text',
+      networkRequests: []
+    })
+    assert.strictEqual(result.outcome, 'no_op')
+    assert.ok(result.reasoning.includes('local-state action'))
+  }
+  // Explicit side-effect action without networkRequests supplied → unknown
+  {
+    const result = classifyActionOutcome({ uiChanged: false, actionType: 'start_app' })
     assert.strictEqual(result.outcome, 'unknown')
-    assert.strictEqual(result.nextAction, 'call_get_network_activity')
+    assert.ok(result.reasoning.includes('side-effect action'))
+  }
+  // Side-effect action with empty networkRequests → no_op
+  {
+    const result = classifyActionOutcome({ uiChanged: false, actionType: 'start_app', networkRequests: [] })
+    assert.strictEqual(result.outcome, 'no_op')
+    assert.ok(result.reasoning.includes('side-effect action'))
   }
-  // Step 3 — failure status → backend_failure
+  // Network failure → backend_failure
   {
     const result = classifyActionOutcome({
       uiChanged: false,
+      actionType: 'start_app',
       networkRequests: [{ endpoint: '/login', status: 'failure' }]
     })
     assert.strictEqual(result.outcome, 'backend_failure')
@@ -49,10 +73,11 @@ function run() {
     assert.ok(result.reasoning.includes('failure'))
   }
-  // Step 3 — retryable status → backend_failure
+  // Retryable status → backend_failure
   {
     const result = classifyActionOutcome({
       uiChanged: false,
+      actionType: 'start_app',
       networkRequests: [
         { endpoint: '/api/submit', status: 'retryable' },
         { endpoint: '/api/other', status: 'success' }
@@ -62,25 +87,11 @@ function run() {
     assert.ok(result.reasoning.includes('/api/submit'))
   }
-  // Step 4 — empty network requests → no_op
-  {
-    const result = classifyActionOutcome({ uiChanged: false, networkRequests: [] })
-    assert.strictEqual(result.outcome, 'no_op')
-    assert.ok(result.reasoning.includes('no UI change'))
-    assert.ok(result.reasoning.includes('no network activity'))
-  }
-  // Step 4 — empty network requests with log errors → no_op with note
-  {
-    const result = classifyActionOutcome({ uiChanged: false, networkRequests: [], hasLogErrors: true })
-    assert.strictEqual(result.outcome, 'no_op')
-    assert.ok(result.reasoning.includes('log errors'))
-  }
-  // Step 5 — all requests succeeded but UI unchanged → ui_failure
+  // All requests succeeded and UI stayed unchanged → ui_failure
   {
     const result = classifyActionOutcome({
       uiChanged: false,
+      actionType: 'start_app',
       networkRequests: [
         { endpoint: '/api/save', status: 'success' },
         { endpoint: '/api/refresh', status: 'success' }
@@ -90,10 +101,18 @@ function run() {
     assert.ok(result.reasoning.includes('network requests succeeded'))
   }
+  // Empty network requests with log errors → no_op with note
+  {
+    const result = classifyActionOutcome({ uiChanged: false, actionType: 'start_app', networkRequests: [], hasLogErrors: true })
+    assert.strictEqual(result.outcome, 'no_op')
+    assert.ok(result.reasoning.includes('log errors'))
+  }
   // Step 1 takes priority over network signals — success even when failures present
   {
     const result = classifyActionOutcome({
       uiChanged: true,
+      actionType: 'start_app',
       networkRequests: [{ endpoint: '/api/log', status: 'failure' }]
     })
     assert.strictEqual(result.outcome, 'success')

package/test/unit/interact/wait_for_ui_change.test.ts ADDED Viewed

@@ -0,0 +1,76 @@
+import assert from 'assert'
+import { ToolsInteract } from '../../../src/interact/index.js'
+import { ToolsObserve } from '../../../src/observe/index.js'
+async function run() {
+  const originalGetUITreeHandler = (ToolsObserve as any).getUITreeHandler
+  try {
+    let calls = 0
+    ;(ToolsObserve as any).getUITreeHandler = async () => {
+      calls++
+      if (calls === 1) {
+        return {
+          device: { platform: 'android', id: 'mock', osVersion: '14', model: 'Pixel', simulator: true },
+          screen: 'Loading',
+          resolution: { width: 1080, height: 2400 },
+          elements: [{ text: 'Loading', type: 'TextView', bounds: [0, 0, 100, 40], visible: true }],
+          snapshot_revision: 1,
+          captured_at_ms: 1000
+        }
+      }
+      return {
+        device: { platform: 'android', id: 'mock', osVersion: '14', model: 'Pixel', simulator: true },
+        screen: 'Loaded',
+        resolution: { width: 1080, height: 2400 },
+        elements: [{ text: 'Loaded', type: 'TextView', bounds: [0, 0, 100, 40], visible: true }],
+        snapshot_revision: 2,
+        captured_at_ms: 2000
+      }
+    }
+    const success = await ToolsInteract.waitForUIChangeHandler({
+      platform: 'android',
+      deviceId: 'mock',
+      expected_change: 'text_change',
+      timeout_ms: 1500,
+      stability_window_ms: 1
+    })
+    assert.strictEqual(success.success, true)
+    assert.strictEqual(success.observed_change, 'text_change')
+    assert.strictEqual(success.snapshot_revision, 2)
+    assert.strictEqual(success.timeout, false)
+    ;(ToolsObserve as any).getUITreeHandler = async () => ({
+      device: { platform: 'android', id: 'mock', osVersion: '14', model: 'Pixel', simulator: true },
+      screen: 'Static',
+      resolution: { width: 1080, height: 2400 },
+      elements: [{ text: 'Static', type: 'TextView', bounds: [0, 0, 100, 40], visible: true }],
+      snapshot_revision: 9,
+      captured_at_ms: 3000
+    })
+    const timeout = await ToolsInteract.waitForUIChangeHandler({
+      platform: 'android',
+      deviceId: 'mock',
+      expected_change: 'state_change',
+      timeout_ms: 700,
+      stability_window_ms: 1
+    })
+    assert.strictEqual(timeout.success, false)
+    assert.strictEqual(timeout.observed_change, null)
+    assert.strictEqual(timeout.timeout, true)
+    console.log('wait_for_ui_change tests passed')
+  } finally {
+    ;(ToolsObserve as any).getUITreeHandler = originalGetUITreeHandler
+  }
+}
+run().catch((error) => {
+  console.error(error)
+  process.exit(1)
+})

package/test/unit/server/contract.test.ts CHANGED Viewed

@@ -68,15 +68,17 @@ async function run() {
   const classifyActionOutcome = toolDefinitions.find((tool) => tool.name === 'classify_action_outcome')
   assert(classifyActionOutcome, 'classify_action_outcome should be registered')
-  assert.match((classifyActionOutcome as any).description, /backend\/API activity without a visible UI change/i)
-  assert.match((classifyActionOutcome as any).description, /get_network_activity/i)
-  assert.match((classifyActionOutcome as any).description, /immediately after the action/i)
+  assert.match((classifyActionOutcome as any).description, /action_type/i)
+  assert.match((classifyActionOutcome as any).description, /local-state/i)
+  assert.match((classifyActionOutcome as any).description, /side-effect/i)
+  assert.strictEqual((classifyActionOutcome as any).inputSchema.properties.actionType.type, 'string')
+  assert.match((classifyActionOutcome as any).inputSchema.properties.networkRequests.description, /optional network evidence/i)
   const getNetworkActivity = toolDefinitions.find((tool) => tool.name === 'get_network_activity')
   assert(getNetworkActivity, 'get_network_activity should be registered')
-  assert.match((getNetworkActivity as any).description, /backend\/API activity without a visible UI change/i)
-  assert.doesNotMatch((getNetworkActivity as any).description, /Call this only when/i)
-  assert.match((getNetworkActivity as any).description, /immediately after an action/i)
+  assert.match((getNetworkActivity as any).description, /side-effect/i)
+  assert.doesNotMatch((getNetworkActivity as any).description, /nextAction/i)
+  assert.match((getNetworkActivity as any).description, /only if the result is still ambiguous/i)
   await assert.rejects(() => handleToolCall('unknown_tool'), /Unknown tool: unknown_tool/)

package/test/unit/server/response_shapes.test.ts CHANGED Viewed

@@ -8,6 +8,7 @@ import { ToolsObserve } from '../../../src/observe/index.js'
 async function run() {
   const originalInstallAppHandler = (ToolsManage as any).installAppHandler
   const originalWaitForUIHandler = (ToolsInteract as any).waitForUIHandler
+  const originalWaitForUIChangeHandler = (ToolsInteract as any).waitForUIChangeHandler
   const originalTapElementHandler = (ToolsInteract as any).tapElementHandler
   const originalTapHandler = (ToolsInteract as any).tapHandler
   const originalExpectScreenHandler = (ToolsInteract as any).expectScreenHandler
@@ -145,12 +146,16 @@ async function run() {
     ;(ToolsObserve as any).getUITreeHandler = async () => ({
       device: { platform: 'android', id: 'mock', osVersion: '14', model: 'Pixel', simulator: true },
       resolution: { width: 1080, height: 2400 },
+      screen: 'Notifications',
       elements: [{
         text: 'Notifications',
         depth: 0,
         center: { x: 50, y: 20 },
         state: { checked: true, selected: 'Notifications' }
-      }]
+      }],
+      snapshot_revision: 12,
+      captured_at_ms: 1710000000123,
+      loading_state: { active: true, signal: 'progress_indicator', source: 'ui_tree' }
     })
     ;(ToolsInteract as any).expectStateHandler = async () => ({
@@ -227,8 +232,12 @@ async function run() {
     ;(ToolsObserve as any).getUITreeHandler = async () => ({
       device: { platform: 'android', id: 'mock', osVersion: '14', model: 'Pixel', simulator: true },
+      screen: 'Login',
       resolution: { width: 1080, height: 2400 },
-      elements: [{ text: 'Login', depth: 0, center: { x: 50, y: 20 } }]
+      elements: [{ text: 'Login', depth: 0, center: { x: 50, y: 20 } }],
+      snapshot_revision: 12,
+      captured_at_ms: 1710000000123,
+      loading_state: { active: true, signal: 'progress_indicator', source: 'ui_tree' }
     })
     const uiTreeResponse = await handleToolCall('get_ui_tree', { platform: 'android' })
@@ -236,16 +245,21 @@ async function run() {
     assert.strictEqual(uiTreePayload.elements.length, 1)
     assert.strictEqual(uiTreePayload.resolution.height, 2400)
     assert.strictEqual(uiTreePayload.elements[0].text, 'Login')
+    assert.strictEqual(uiTreePayload.snapshot_revision, 12)
+    assert.strictEqual(uiTreePayload.loading_state.signal, 'progress_indicator')
     ;(ToolsObserve as any).captureDebugSnapshotHandler = async () => ({
       raw: {
         timestamp: 1710000000000,
+        snapshot_revision: 12,
+        captured_at_ms: 1710000000123,
         reason: 'manual',
         activity: 'com.example.MainActivity',
         fingerprint: 'fp_raw',
         screenshot: 'base64',
-        ui_tree: { screen: 'Home', elements: [] },
+        ui_tree: { screen: 'Home', elements: [], snapshot_revision: 12, captured_at_ms: 1710000000123, loading_state: { active: true, signal: 'spinner', source: 'snapshot' } },
         logs: [],
+        loading_state: { active: true, signal: 'spinner', source: 'snapshot' },
         device: { platform: 'android', id: 'mock', osVersion: '14', model: 'Pixel', simulator: true }
       },
       semantic: {
@@ -260,13 +274,33 @@ async function run() {
     const snapshotResponse = await handleToolCall('capture_debug_snapshot', { platform: 'android' })
     const snapshotPayload = JSON.parse((snapshotResponse as any).content[0].text)
     assert.strictEqual(snapshotPayload.raw.fingerprint, 'fp_raw')
+    assert.strictEqual(snapshotPayload.raw.snapshot_revision, 12)
+    assert.strictEqual(snapshotPayload.raw.loading_state.signal, 'spinner')
     assert.strictEqual(snapshotPayload.semantic.screen, 'Home')
     assert.strictEqual(snapshotPayload.semantic.confidence, 0.8)
+    ;(ToolsInteract as any).waitForUIChangeHandler = async () => ({
+      success: true,
+      observed_change: 'text_change',
+      snapshot_revision: 13,
+      timeout: false,
+      elapsed_ms: 1550,
+      expected_change: 'text_change',
+      loading_state: { active: false, signal: 'spinner', source: 'ui_tree' },
+      reason: 'UI change observed'
+    })
+    const waitForUIChangeResponse = await handleToolCall('wait_for_ui_change', { expected_change: 'text_change' })
+    const waitForUIChangePayload = JSON.parse((waitForUIChangeResponse as any).content[0].text)
+    assert.strictEqual(waitForUIChangePayload.success, true)
+    assert.strictEqual(waitForUIChangePayload.observed_change, 'text_change')
+    assert.strictEqual(waitForUIChangePayload.snapshot_revision, 13)
     console.log('server response-shape tests passed')
   } finally {
     ;(ToolsManage as any).installAppHandler = originalInstallAppHandler
     ;(ToolsInteract as any).waitForUIHandler = originalWaitForUIHandler
+    ;(ToolsInteract as any).waitForUIChangeHandler = originalWaitForUIChangeHandler
     ;(ToolsInteract as any).tapElementHandler = originalTapElementHandler
     ;(ToolsInteract as any).tapHandler = originalTapHandler
     ;(ToolsInteract as any).expectScreenHandler = originalExpectScreenHandler