npm - mobile-debug-mcp - Versions diffs - 0.26.0 → 0.26.2 - Mend

mobile-debug-mcp 0.26.0 → 0.26.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/dist/interact/classify.js +48 -11
package/dist/interact/index.js +26 -33
package/dist/server/common.js +14 -1
package/dist/server/tool-definitions.js +38 -15
package/dist/server/tool-handlers.js +9 -0
package/dist/server-core.js +1 -1
package/docs/CHANGELOG.md +6 -0
package/docs/ROADMAP.md +281 -88
package/docs/rfcs/004-action-verification-routing.md +342 -0
package/docs/rfcs/005-unified-action-execution-and-verification-model.md +216 -0
package/docs/rfcs/006-runtime-action-instrumentation-and-binding-layer.md +230 -0
package/docs/specs/mcp-tooling-spec-v1.md +7 -3
package/docs/tools/interact.md +14 -8
package/package.json +1 -1
package/src/interact/classify.ts +53 -13
package/src/interact/index.ts +27 -35
package/src/server/common.ts +22 -1
package/src/server/tool-definitions.ts +38 -15
package/src/server/tool-handlers.ts +9 -0
package/src/server-core.ts +1 -1
package/src/types.ts +2 -0
package/test/unit/interact/classify_action_outcome.test.ts +44 -25
package/test/unit/server/contract.test.ts +8 -6
package/test/unit/server/response_shapes.test.ts +8 -0

package/src/server/tool-definitions.ts CHANGED Viewed

@@ -11,7 +11,9 @@ Inputs:
 Output Structure:
  - action_id, timestamp (ISO 8601), action_type
-- target.selector = { appId }
+ - lifecycle_state: post-dispatch lifecycle state (pending_verification or failed)
+ - source_module: runtime source of the action envelope
+ - target.selector = { appId }
 - success = true when launch was dispatched successfully
 - failure_code/retryable when launch dispatch fails
 - ui_fingerprint_before/ui_fingerprint_after when available
@@ -84,7 +86,9 @@ Inputs:
 Output Structure:
  - action_id, timestamp (ISO 8601), action_type
-- target.selector = { appId }
+ - lifecycle_state: post-dispatch lifecycle state (pending_verification or failed)
+ - source_module: runtime source of the action envelope
+ - target.selector = { appId }
 - success = true when the restart command completed
 - failure_code/retryable when restart dispatch fails
 - ui_fingerprint_before/ui_fingerprint_after when available
@@ -344,7 +348,7 @@ Capabilities:
 Constraints:
 - Does not verify correctness of the resulting state
 - Must not be used alone to confirm action success when an applicable expect_* tool exists
-- Use classify_action_outcome + get_network_activity when the expected outcome is backend/API activity without a visible UI change
+- For backend/API activity without a visible UI change, pass the runtime action_type into classify_action_outcome and collect network evidence only if the result remains ambiguous
 Recommended Usage:
 1. Capture or define the expected outcome
@@ -617,7 +621,9 @@ Inputs:
 Output Structure:
  - action_id, timestamp (ISO 8601), action_type
-- target.selector = { x, y }
+ - lifecycle_state: post-dispatch lifecycle state (pending_verification or failed)
+ - source_module: runtime source of the action envelope
+ - target.selector = { x, y }
 - success = true when the tap was dispatched
 - failure_code/retryable when dispatch fails
 - ui_fingerprint_before/ui_fingerprint_after when available
@@ -673,6 +679,8 @@ Output Structure:
 - action_id: unique timestamp-based action identifier
 - timestamp: ISO 8601 timestamp for the action attempt
 - action_type: "tap_element"
+- lifecycle_state: post-dispatch lifecycle state (pending_verification or failed)
+- source_module: runtime source of the action envelope
 - target.selector: original target handle ({ elementId })
 - target.resolved: minimal resolved element info used for the tap
 - success: true when the tap was dispatched
@@ -725,6 +733,8 @@ Inputs:
 Output Structure:
 - action_id, timestamp (ISO 8601), action_type
+- lifecycle_state: post-dispatch lifecycle state (pending_verification or failed)
+- source_module: runtime source of the action envelope
 - target.selector = { x1, y1, x2, y2, duration }
 - success = true when the swipe was dispatched
 - failure_code/retryable when dispatch fails
@@ -777,6 +787,8 @@ Inputs:
 Output Structure:
 - action_id, timestamp (ISO 8601), action_type
+- lifecycle_state: post-dispatch lifecycle state (pending_verification or failed)
+- source_module: runtime source of the action envelope
 - target.selector = original selector
 - target.resolved = minimal resolved element info when found
 - success = true when scrolling produced a visible target element
@@ -831,6 +843,8 @@ Inputs:
 Output Structure:
 - action_id, timestamp (ISO 8601), action_type
+- lifecycle_state: post-dispatch lifecycle state (pending_verification or failed)
+- source_module: runtime source of the action envelope
 - target.selector = { text }
 - success = true when text input was dispatched
 - failure_code/retryable when dispatch fails
@@ -880,6 +894,8 @@ Inputs:
 Output Structure:
 - action_id, timestamp (ISO 8601), action_type
+- lifecycle_state: post-dispatch lifecycle state (pending_verification or failed)
+- source_module: runtime source of the action envelope
 - target.selector = { key: "back" }
 - success = true when the back action was dispatched
 - failure_code/retryable when dispatch fails
@@ -918,26 +934,29 @@ Failure Handling:
     name: 'classify_action_outcome',
     description: `Classify the outcome of the most recent action into exactly one of: success, no_op, backend_failure, ui_failure, unknown.
-MUST be called after every action (tap, swipe, type_text, press_back, start_app, etc). Never skip.
-Use this with get_network_activity when the expected outcome is backend/API activity without a visible UI change.
-For backend/API activity, compare get_screen_fingerprint before and after the action and call get_network_activity immediately after the action instead of waiting for wait_for_screen_change.
+Use the runtime action result's \`action_type\` as \`actionType\` so the classifier can distinguish local-state actions from side-effect actions.
+Use this when the intended outcome is not already fully verified by the UI signal alone.
+For backend/API activity, compare get_screen_fingerprint before and after the action and call get_network_activity immediately after the action if the outcome is still ambiguous.
 HOW TO GATHER INPUTS before calling:
 1. Call wait_for_screen_change or compare get_screen_fingerprint before/after — set uiChanged accordingly.
 2. If you checked for a specific element with wait_for_ui, set expectedElementVisible.
-3. Do NOT call get_network_activity yet — omit networkRequests on the first call.
+3. Pass actionType from the action response when available.
+4. Only provide networkRequests if you already collected them or want to classify a side-effect action with backend evidence.
 RULES (applied in order — stop at first match):
 1. If uiChanged=true OR expectedElementVisible=true → outcome=success
-2. Otherwise this tool returns nextAction="call_get_network_activity" — you MUST call get_network_activity once, then call classify_action_outcome again with the results in networkRequests.
+2. If actionType is missing → outcome=unknown
 3. If any request has status=failure or retryable → outcome=backend_failure
-4. If no requests returned → outcome=no_op
-5. If all requests succeeded → outcome=ui_failure
-6. Otherwise → outcome=unknown
+4. If actionType maps to a local-state action → outcome=no_op; prefer state-based verification and avoid default network fallback
+5. If actionType maps to a side-effect action and no networkRequests were supplied → outcome=unknown
+6. If no requests returned → outcome=no_op
+7. If all requests succeeded → outcome=ui_failure
+8. Otherwise → outcome=unknown
 BEHAVIOUR after outcome:
 - success → continue
-- no_op → retry the action once or re-resolve the element
+- no_op → retry with richer state verification or re-resolve the element
 - backend_failure → stop and report the failing endpoint
 - ui_failure → stop and report failure
 - unknown → take one recovery step (e.g. capture_debug_snapshot), then stop`,
@@ -952,9 +971,13 @@ BEHAVIOUR after outcome:
           type: 'boolean',
           description: 'true if the element you expected to appear is now visible (from wait_for_ui). Omit if you did not check for a specific element.'
         },
+        actionType: {
+          type: 'string',
+          description: 'The runtime action_type from the action response (for example tap, tap_element, swipe, type_text, press_back, start_app).'
+        },
         networkRequests: {
           type: 'array',
-          description: 'Pass this only after calling get_network_activity as instructed by nextAction. Also use it when the expected outcome is backend/API activity without a visible UI change.',
+          description: 'Optional network evidence collected after the action. Use it when the expected outcome is backend/API activity or when the UI signal is ambiguous.',
           items: {
             type: 'object',
             properties: {
@@ -976,7 +999,7 @@ BEHAVIOUR after outcome:
     name: 'get_network_activity',
     description: `Returns structured network events captured from platform logs since the last action.
-Call this when classify_action_outcome returns nextAction="call_get_network_activity" or immediately after an action whose expected outcome is backend/API activity without a visible UI change.
+Call this immediately after an action when you want backend evidence for a side-effect flow, only if the result is still ambiguous.
 Do not call more than once per action.
 Events are filtered to significant (non-background) requests only.

package/src/server/tool-handlers.ts CHANGED Viewed

@@ -47,6 +47,7 @@ async function handleStartApp(args: ToolCallArgs) {
   const uiFingerprintAfter = await captureActionFingerprint(platform, deviceId)
   return wrapResponse(buildActionExecutionResult({
     actionType: 'start_app',
+    sourceModule: 'server',
     device: res.device,
     selector: { appId },
     success: !!res.appStarted,
@@ -82,6 +83,7 @@ async function handleRestartApp(args: ToolCallArgs) {
   const uiFingerprintAfter = await captureActionFingerprint(platform, deviceId)
   return wrapResponse(buildActionExecutionResult({
     actionType: 'restart_app',
+    sourceModule: 'server',
     device: res.device,
     selector: { appId },
     success: !!res.appRestarted,
@@ -319,6 +321,7 @@ async function handleTap(args: ToolCallArgs) {
   const uiFingerprintAfter = await captureActionFingerprint(platform, deviceId)
   return wrapResponse(buildActionExecutionResult({
     actionType: 'tap',
+    sourceModule: 'server',
     selector: { x, y },
     success: !!res.success,
     uiFingerprintBefore,
@@ -348,6 +351,7 @@ async function handleSwipe(args: ToolCallArgs) {
   const uiFingerprintAfter = await captureActionFingerprint(platform, deviceId)
   return wrapResponse(buildActionExecutionResult({
     actionType: 'swipe',
+    sourceModule: 'server',
     selector: { x1, y1, x2, y2, duration },
     success: !!res.success,
     uiFingerprintBefore,
@@ -369,6 +373,7 @@ async function handleScrollToElement(args: ToolCallArgs) {
   const uiFingerprintAfter = await captureActionFingerprint(platform, deviceId)
   return wrapResponse(buildActionExecutionResult({
     actionType: 'scroll_to_element',
+    sourceModule: 'server',
     selector: selector ?? null,
     resolved: res?.success && res?.element ? {
       elementId: null,
@@ -395,6 +400,7 @@ async function handleTypeText(args: ToolCallArgs) {
   const uiFingerprintAfter = await captureActionFingerprint('android', deviceId)
   return wrapResponse(buildActionExecutionResult({
     actionType: 'type_text',
+    sourceModule: 'server',
     selector: { text },
     success: !!res.success,
     uiFingerprintBefore,
@@ -411,6 +417,7 @@ async function handlePressBack(args: ToolCallArgs) {
   const uiFingerprintAfter = await captureActionFingerprint('android', deviceId)
   return wrapResponse(buildActionExecutionResult({
     actionType: 'press_back',
+    sourceModule: 'server',
     selector: { key: 'back' },
     success: !!res.success,
     uiFingerprintBefore,
@@ -448,11 +455,13 @@ async function handleStopLogStream(args: ToolCallArgs) {
 function handleClassifyActionOutcome(args: ToolCallArgs) {
   const uiChanged = requireBooleanArg(args, 'uiChanged')
   const expectedElementVisible = getBooleanArg(args, 'expectedElementVisible')
+  const actionType = getStringArg(args, 'actionType')
   const networkRequests = getArrayArg<ClassifyNetworkRequestArg>(args, 'networkRequests')
   const hasLogErrors = getBooleanArg(args, 'hasLogErrors')
   const result = classifyActionOutcome({
     uiChanged,
     expectedElementVisible: expectedElementVisible ?? null,
+    actionType: actionType ?? null,
     networkRequests: networkRequests ?? null,
     hasLogErrors: hasLogErrors ?? null
   })

package/src/server-core.ts CHANGED Viewed

@@ -13,7 +13,7 @@ export { wrapResponse, toolDefinitions, handleToolCall }
 export const serverInfo = {
   name: 'mobile-debug-mcp',
-  version: '0.26.0'
+  version: '0.26.2'
 }
 export function createServer() {

package/src/types.ts CHANGED Viewed

@@ -258,6 +258,8 @@ export interface ActionExecutionResult {
   action_id: string;
   timestamp: string;
   action_type: string;
+  lifecycle_state?: 'pending_verification' | 'failed';
+  source_module?: 'server' | 'interact';
   device?: DeviceInfo;
   target: {
     selector: Record<string, unknown> | null;

package/test/unit/interact/classify_action_outcome.test.ts CHANGED Viewed

@@ -7,7 +7,6 @@ function run() {
     const result = classifyActionOutcome({ uiChanged: true })
     assert.strictEqual(result.outcome, 'success')
     assert.ok(result.reasoning.length > 0)
-    assert.strictEqual(result.nextAction, undefined)
   }
   // Step 1 — expectedElementVisible → success
@@ -15,7 +14,6 @@ function run() {
     const result = classifyActionOutcome({ uiChanged: false, expectedElementVisible: true })
     assert.strictEqual(result.outcome, 'success')
     assert.strictEqual(result.reasoning, 'expected element is visible')
-    assert.strictEqual(result.nextAction, undefined)
   }
   // Step 1 — both uiChanged and expectedElementVisible → success
@@ -24,24 +22,50 @@ function run() {
     assert.strictEqual(result.outcome, 'success')
   }
-  // Step 2 — UI did not change, networkRequests not yet provided → nextAction required
+  // No actionType supplied → unknown
   {
     const result = classifyActionOutcome({ uiChanged: false })
     assert.strictEqual(result.outcome, 'unknown')
-    assert.strictEqual(result.nextAction, 'call_get_network_activity')
+    assert.ok(result.reasoning.includes('actionType was not supplied'))
   }
-  // Step 2 — explicit null networkRequests → nextAction required
+  // Local-state action routes to state verification rather than forced network probing
   {
-    const result = classifyActionOutcome({ uiChanged: false, expectedElementVisible: null, networkRequests: null })
+    const result = classifyActionOutcome({ uiChanged: false, actionType: 'tap' })
+    assert.strictEqual(result.outcome, 'no_op')
+    assert.ok(result.reasoning.includes('local-state action'))
+  }
+  // Local-state action with network data still prefers local-state semantics
+  {
+    const result = classifyActionOutcome({
+      uiChanged: false,
+      actionType: 'type_text',
+      networkRequests: []
+    })
+    assert.strictEqual(result.outcome, 'no_op')
+    assert.ok(result.reasoning.includes('local-state action'))
+  }
+  // Explicit side-effect action without networkRequests supplied → unknown
+  {
+    const result = classifyActionOutcome({ uiChanged: false, actionType: 'start_app' })
     assert.strictEqual(result.outcome, 'unknown')
-    assert.strictEqual(result.nextAction, 'call_get_network_activity')
+    assert.ok(result.reasoning.includes('side-effect action'))
+  }
+  // Side-effect action with empty networkRequests → no_op
+  {
+    const result = classifyActionOutcome({ uiChanged: false, actionType: 'start_app', networkRequests: [] })
+    assert.strictEqual(result.outcome, 'no_op')
+    assert.ok(result.reasoning.includes('side-effect action'))
   }
-  // Step 3 — failure status → backend_failure
+  // Network failure → backend_failure
   {
     const result = classifyActionOutcome({
       uiChanged: false,
+      actionType: 'start_app',
       networkRequests: [{ endpoint: '/login', status: 'failure' }]
     })
     assert.strictEqual(result.outcome, 'backend_failure')
@@ -49,10 +73,11 @@ function run() {
     assert.ok(result.reasoning.includes('failure'))
   }
-  // Step 3 — retryable status → backend_failure
+  // Retryable status → backend_failure
   {
     const result = classifyActionOutcome({
       uiChanged: false,
+      actionType: 'start_app',
       networkRequests: [
         { endpoint: '/api/submit', status: 'retryable' },
         { endpoint: '/api/other', status: 'success' }
@@ -62,25 +87,11 @@ function run() {
     assert.ok(result.reasoning.includes('/api/submit'))
   }
-  // Step 4 — empty network requests → no_op
-  {
-    const result = classifyActionOutcome({ uiChanged: false, networkRequests: [] })
-    assert.strictEqual(result.outcome, 'no_op')
-    assert.ok(result.reasoning.includes('no UI change'))
-    assert.ok(result.reasoning.includes('no network activity'))
-  }
-  // Step 4 — empty network requests with log errors → no_op with note
-  {
-    const result = classifyActionOutcome({ uiChanged: false, networkRequests: [], hasLogErrors: true })
-    assert.strictEqual(result.outcome, 'no_op')
-    assert.ok(result.reasoning.includes('log errors'))
-  }
-  // Step 5 — all requests succeeded but UI unchanged → ui_failure
+  // All requests succeeded and UI stayed unchanged → ui_failure
   {
     const result = classifyActionOutcome({
       uiChanged: false,
+      actionType: 'start_app',
       networkRequests: [
         { endpoint: '/api/save', status: 'success' },
         { endpoint: '/api/refresh', status: 'success' }
@@ -90,10 +101,18 @@ function run() {
     assert.ok(result.reasoning.includes('network requests succeeded'))
   }
+  // Empty network requests with log errors → no_op with note
+  {
+    const result = classifyActionOutcome({ uiChanged: false, actionType: 'start_app', networkRequests: [], hasLogErrors: true })
+    assert.strictEqual(result.outcome, 'no_op')
+    assert.ok(result.reasoning.includes('log errors'))
+  }
   // Step 1 takes priority over network signals — success even when failures present
   {
     const result = classifyActionOutcome({
       uiChanged: true,
+      actionType: 'start_app',
       networkRequests: [{ endpoint: '/api/log', status: 'failure' }]
     })
     assert.strictEqual(result.outcome, 'success')

package/test/unit/server/contract.test.ts CHANGED Viewed

@@ -68,15 +68,17 @@ async function run() {
   const classifyActionOutcome = toolDefinitions.find((tool) => tool.name === 'classify_action_outcome')
   assert(classifyActionOutcome, 'classify_action_outcome should be registered')
-  assert.match((classifyActionOutcome as any).description, /backend\/API activity without a visible UI change/i)
-  assert.match((classifyActionOutcome as any).description, /get_network_activity/i)
-  assert.match((classifyActionOutcome as any).description, /immediately after the action/i)
+  assert.match((classifyActionOutcome as any).description, /action_type/i)
+  assert.match((classifyActionOutcome as any).description, /local-state/i)
+  assert.match((classifyActionOutcome as any).description, /side-effect/i)
+  assert.strictEqual((classifyActionOutcome as any).inputSchema.properties.actionType.type, 'string')
+  assert.match((classifyActionOutcome as any).inputSchema.properties.networkRequests.description, /optional network evidence/i)
   const getNetworkActivity = toolDefinitions.find((tool) => tool.name === 'get_network_activity')
   assert(getNetworkActivity, 'get_network_activity should be registered')
-  assert.match((getNetworkActivity as any).description, /backend\/API activity without a visible UI change/i)
-  assert.doesNotMatch((getNetworkActivity as any).description, /Call this only when/i)
-  assert.match((getNetworkActivity as any).description, /immediately after an action/i)
+  assert.match((getNetworkActivity as any).description, /side-effect/i)
+  assert.doesNotMatch((getNetworkActivity as any).description, /nextAction/i)
+  assert.match((getNetworkActivity as any).description, /only if the result is still ambiguous/i)
   await assert.rejects(() => handleToolCall('unknown_tool'), /Unknown tool: unknown_tool/)

package/test/unit/server/response_shapes.test.ts CHANGED Viewed

@@ -61,6 +61,8 @@ async function run() {
       action_id: 'tap_element_1',
       timestamp: '2026-04-23T08:00:00.000Z',
       action_type: 'tap_element',
+      lifecycle_state: 'pending_verification',
+      source_module: 'interact',
       target: {
         selector: { elementId: 'el_ready' },
         resolved: { elementId: 'el_ready', text: 'Ready', resource_id: null, accessibility_id: null, class: 'Button', bounds: [0, 0, 10, 10], index: 0 }
@@ -74,6 +76,8 @@ async function run() {
     const tapElementPayload = JSON.parse((tapElementResponse as any).content[0].text)
     assert.strictEqual(tapElementPayload.success, true)
     assert.strictEqual(tapElementPayload.action_type, 'tap_element')
+    assert.strictEqual(tapElementPayload.lifecycle_state, 'pending_verification')
+    assert.strictEqual(tapElementPayload.source_module, 'interact')
     assert.match(tapElementPayload.timestamp, /^\d{4}-\d{2}-\d{2}T/)
     assert.strictEqual(tapElementPayload.target.resolved.elementId, 'el_ready')
     assert.strictEqual(tapElementPayload.ui_fingerprint_before, 'fp_before')
@@ -84,6 +88,8 @@ async function run() {
     const tapPayload = JSON.parse((tapResponse as any).content[0].text)
     assert.strictEqual(tapPayload.success, true)
     assert.strictEqual(tapPayload.action_type, 'tap')
+    assert.strictEqual(tapPayload.lifecycle_state, 'pending_verification')
+    assert.strictEqual(tapPayload.source_module, 'server')
     assert.match(tapPayload.timestamp, /^\d{4}-\d{2}-\d{2}T/)
     assert.deepStrictEqual(tapPayload.target.selector, { x: 1, y: 2 })
     assert.strictEqual(tapPayload.ui_fingerprint_before, 'fp_mock')
@@ -107,6 +113,8 @@ async function run() {
     const startAppPayload = JSON.parse((startAppResponse as any).content[0].text)
     assert.strictEqual(startAppPayload.success, true)
     assert.strictEqual(startAppPayload.action_type, 'start_app')
+    assert.strictEqual(startAppPayload.lifecycle_state, 'pending_verification')
+    assert.strictEqual(startAppPayload.source_module, 'server')
     assert.match(startAppPayload.timestamp, /^\d{4}-\d{2}-\d{2}T/)
     assert.strictEqual(startAppPayload.device.id, 'emulator-5554')
     assert.deepStrictEqual(startAppPayload.target.selector, { appId: 'com.example.app' })