npm - mobile-debug-mcp - Versions diffs - 0.26.4 → 0.27.0 - Mend

mobile-debug-mcp 0.26.4 → 0.27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/dist/interact/index.js +392 -192
package/dist/observe/ios.js +47 -3
package/dist/server/common.js +39 -0
package/dist/server-core.js +1 -1
package/dist/utils/android/utils.js +35 -3
package/docs/CHANGELOG.md +6 -0
package/docs/ROADMAP.md +114 -16
package/docs/rfcs/009-semantic-control-modeling-for-custom-and-composite-controls.md +238 -0
package/docs/rfcs/010-verification-stabilization-and-temporal-convergence.md +265 -0
package/docs/rfcs/011-recovery-and-replanning-for-failed-or-ambiguous-interaction-flows.md +321 -0
package/docs/rfcs/011.1-recovery-contract-types-and-runtime-wiring-spec.md +253 -0
package/docs/rfcs/012.md +203 -0
package/docs/specs/mcp-tooling-spec-v1.md +34 -0
package/docs/tools/interact.md +10 -0
package/package.json +1 -1
package/src/interact/index.ts +433 -194
package/src/observe/ios.ts +42 -3
package/src/server/common.ts +44 -1
package/src/server-core.ts +1 -1
package/src/types.ts +41 -1
package/src/utils/android/utils.ts +30 -3
package/test/unit/interact/adjust_control.test.ts +77 -1
package/test/unit/interact/verification_stabilization.test.ts +94 -0
package/test/unit/observe/find_element.test.ts +46 -0
package/test/unit/observe/state_extraction.test.ts +65 -2
package/test/unit/server/common.test.ts +36 -1

package/dist/observe/ios.js CHANGED Viewed

@@ -52,6 +52,12 @@ function normalizeIOSType(value) {
 function inferIOSRole(type, traits) {
     if (/slider|adjustable/.test(type) || traits.some((trait) => /adjustable|slider/.test(trait)))
         return 'slider';
+    if (/stepper/.test(type))
+        return 'stepper';
+    if (/picker|pop up button|dropdown/.test(type))
+        return 'dropdown';
+    if (/segmented control/.test(type))
+        return 'segmented_control';
     if (/button/.test(type) || traits.some((trait) => /button/.test(trait)))
         return 'button';
     if (/cell/.test(type))
@@ -99,11 +105,49 @@ function buildIOSSelector(type, label, value, stableId) {
         return { value: type, confidence: buildIOSSelectorConfidence('type') };
     return null;
 }
-function buildIOSSemantic(type, traits) {
-    return {
+function buildIOSSemantic(type, traits, role, value) {
+    const semantic = {
         is_clickable: traits.includes("UIAccessibilityTraitButton") || /adjustable|slider/.test(type) || type === "Button" || type === "Cell",
         is_container: /window|application|group|scroll view|collection view/.test(type)
     };
+    if (role === 'slider') {
+        semantic.semantic_role = 'slider';
+        semantic.adjustable = true;
+        semantic.supported_actions = ['adjust'];
+        semantic.state_shape = 'continuous';
+    }
+    else if (role === 'stepper') {
+        semantic.semantic_role = 'stepper';
+        semantic.adjustable = true;
+        semantic.supported_actions = ['increment', 'decrement'];
+        semantic.state_shape = 'discrete';
+    }
+    else if (role === 'dropdown') {
+        semantic.semantic_role = 'dropdown';
+        semantic.supported_actions = ['tap', 'expand'];
+        semantic.state_shape = 'semantic';
+    }
+    else if (role === 'segmented_control') {
+        semantic.semantic_role = 'segmented_control';
+        semantic.supported_actions = ['tap'];
+        semantic.state_shape = 'discrete';
+    }
+    else if (traits.some((trait) => /adjustable|slider/i.test(trait)) || /adjustable|slider/.test(type)) {
+        semantic.semantic_role = 'custom_adjustable';
+        semantic.adjustable = true;
+        semantic.supported_actions = ['adjust'];
+        semantic.state_shape = 'continuous';
+    }
+    else if (semantic.is_clickable) {
+        semantic.supported_actions = ['tap'];
+    }
+    if (semantic.state_shape === undefined && semantic.adjustable && value !== null) {
+        const numericValue = parseIOSNumber(value);
+        if (numericValue !== null && numericValue >= 0 && numericValue <= 1) {
+            semantic.state_shape = 'continuous';
+        }
+    }
+    return semantic;
 }
 function isIOSAdjustable(node, type, traits) {
     return /slider|adjustable|stepper/i.test(type) || traits.some((trait) => /adjustable|slider/i.test(trait));
@@ -162,8 +206,8 @@ export function traverseIDBNode(node, elements, parentIndex = -1, depth = 0) {
     const normalizedType = normalizeIOSType(type);
     const stableId = getIOSStableId(node);
     const selector = buildIOSSelector(type, label, value, stableId);
-    const semantic = buildIOSSemantic(normalizedType, traits);
     const role = inferIOSRole(normalizedType, traits);
+    const semantic = buildIOSSemantic(normalizedType, traits, role, value);
     const clickable = traits.includes("UIAccessibilityTraitButton") || type === "Button" || type === "Cell";
     const isUseful = clickable || (label && label.length > 0) || (value && value.length > 0) || type === "Application" || type === "Window";
     if (isUseful) {

package/dist/server/common.js CHANGED Viewed

@@ -1,4 +1,6 @@
 import { ToolsObserve } from '../observe/index.js';
+export const DEFAULT_MAX_RECOVERY_ATTEMPTS = 3;
+export const DEFAULT_MAX_RETRY_DEPTH = 3;
 export function wrapResponse(data) {
     return {
         content: [{
@@ -86,6 +88,8 @@ export function normalizeResolvedTarget(value = null) {
 export function inferGenericFailure(message) {
     if (message && /timeout/i.test(message))
         return { failureCode: 'TIMEOUT', retryable: true };
+    if (message && /semantic mismatch/i.test(message))
+        return { failureCode: 'SEMANTIC_MISMATCH', retryable: false };
     return { failureCode: 'UNKNOWN', retryable: false };
 }
 export function inferScrollFailure(message) {
@@ -106,6 +110,40 @@ export function determineActionLifecycleState({ success, failure }) {
         return ACTION_LIFECYCLE_STATE_BY_OUTCOME.success;
     return ACTION_LIFECYCLE_STATE_BY_OUTCOME.success;
 }
+function mapFailureCodeToFailureClass(code) {
+    switch (code) {
+        case 'ELEMENT_NOT_FOUND':
+        case 'AMBIGUOUS_TARGET':
+        case 'STALE_REFERENCE':
+            return 'TargetResolutionFailure';
+        case 'ELEMENT_NOT_INTERACTABLE':
+            return 'ExecutionFailure';
+        case 'TIMEOUT':
+        case 'ACTION_REJECTED':
+        case 'NAVIGATION_NO_CHANGE':
+        case 'UNKNOWN':
+            return 'ExecutionFailure';
+        case 'VERIFICATION_FAILED':
+        case 'EXPECT_STATE_MISMATCH':
+            return 'VerificationFailure';
+        case 'CONTROL_CONVERGENCE_FAILED':
+            return 'ControlConvergenceFailure';
+        case 'SEMANTIC_MISMATCH':
+            return 'SemanticMismatchFailure';
+    }
+}
+function buildRecoveryState(failureCode, retryable) {
+    return {
+        failure_class: mapFailureCodeToFailureClass(failureCode),
+        runtime_code: failureCode,
+        recovery_attempts: 0,
+        max_recovery_attempts: DEFAULT_MAX_RECOVERY_ATTEMPTS,
+        retry_depth: 0,
+        max_retry_depth: DEFAULT_MAX_RETRY_DEPTH,
+        is_terminal: false,
+        retry_allowed: retryable
+    };
+}
 export function buildActionExecutionResult({ actionType, device, selector, resolved, success, uiFingerprintBefore, uiFingerprintAfter, failure, details, sourceModule }) {
     const timestampMs = Date.now();
     const timestamp = new Date(timestampMs).toISOString();
@@ -122,6 +160,7 @@ export function buildActionExecutionResult({ actionType, device, selector, resol
         },
         success,
         ...(failure ? { failure_code: failure.failureCode, retryable: failure.retryable } : {}),
+        ...(failure ? { recovery: buildRecoveryState(failure.failureCode, failure.retryable) } : {}),
         ui_fingerprint_before: uiFingerprintBefore,
         ui_fingerprint_after: uiFingerprintAfter,
         ...(details ? { details } : {})

package/dist/server-core.js CHANGED Viewed

@@ -6,7 +6,7 @@ import { handleToolCall } from './server/tool-handlers.js';
 export { wrapResponse, toolDefinitions, handleToolCall };
 export const serverInfo = {
     name: 'mobile-debug-mcp',
-    version: '0.26.4'
+    version: '0.27.0'
 };
 export function createServer() {
     const server = new Server(serverInfo, {

package/dist/utils/android/utils.js CHANGED Viewed

@@ -362,6 +362,12 @@ function normalizeClassName(value) {
 function inferAndroidRole(className) {
     if (/seekbar|slider/.test(className))
         return 'slider';
+    if (/stepper|numberpicker/.test(className))
+        return 'stepper';
+    if (/spinner|dropdown/.test(className))
+        return 'dropdown';
+    if (/segment|tablayout/.test(className))
+        return 'segmented_control';
     if (/switch|toggle/.test(className))
         return 'switch';
     if (/checkbox/.test(className))
@@ -403,11 +409,37 @@ function buildAndroidSelector(text, contentDescription, resourceId, className) {
         return { value: className, confidence: buildAndroidSelectorConfidence('class') };
     return null;
 }
-function buildAndroidSemantic(clickable, className) {
-    return {
+function buildAndroidSemantic(clickable, className, role) {
+    const semantic = {
         is_clickable: clickable,
         is_container: /recyclerview|scroll|layout|viewgroup|frame/.test(className)
     };
+    if (role === 'slider') {
+        semantic.semantic_role = 'slider';
+        semantic.adjustable = true;
+        semantic.supported_actions = ['adjust'];
+        semantic.state_shape = 'continuous';
+    }
+    else if (role === 'stepper') {
+        semantic.semantic_role = 'stepper';
+        semantic.adjustable = true;
+        semantic.supported_actions = ['increment', 'decrement'];
+        semantic.state_shape = 'discrete';
+    }
+    else if (role === 'dropdown') {
+        semantic.semantic_role = 'dropdown';
+        semantic.supported_actions = ['tap', 'expand'];
+        semantic.state_shape = 'semantic';
+    }
+    else if (role === 'segmented_control') {
+        semantic.semantic_role = 'segmented_control';
+        semantic.supported_actions = ['tap'];
+        semantic.state_shape = 'discrete';
+    }
+    else if (clickable) {
+        semantic.supported_actions = ['tap'];
+    }
+    return semantic;
 }
 function isSliderLikeAndroid(node) {
     const className = String(node['@_class'] || '').toLowerCase();
@@ -487,7 +519,7 @@ export function traverseNode(node, elements, parentIndex = -1, depth = 0) {
         const stableId = resourceId ?? (typeof contentDescription === 'string' && contentDescription.trim().length > 0 ? contentDescription : null);
         const testTag = stableId;
         const selector = buildAndroidSelector(text, contentDescription, resourceId, normalizeClassName(className));
-        const semantic = buildAndroidSemantic(clickable, normalizeClassName(className));
+        const semantic = buildAndroidSemantic(clickable, normalizeClassName(className), role);
         const isUseful = clickable || (text && text.length > 0) || (contentDescription && contentDescription.length > 0);
         if (isUseful) {
             const element = {

package/docs/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,12 @@
 All notable changes to the **Mobile Debug MCP** project will be documented in this file.
+## [0.27.0]
+- defines a structured recovery and replanning model for UI interaction failures, enabling the system to respond to execution uncertainty with bounded, deterministic recovery strategies.
+## [0.26.5]
+- Introduces a semantic control model to improve the identification and interaction with custom and composite UI controls.
 ## [0.26.4]
 - Improved slider accuracy

package/docs/ROADMAP.md CHANGED Viewed

@@ -44,17 +44,20 @@ Higher task success with fewer retries.
 - Stronger State Verification — Complete (Foundational verification layer shipped)
 - Richer Element Identity — Complete (Identity and selector confidence foundations shipped)
+- Better Compose / Custom Control Semantics — Complete (Semantic role enrichment and custom-adjustable inference shipped)
 ## Current Focus
 - Wait and Synchronization Reliability
 - Actionability Resolution
+- Verification Stabilization and Temporal Convergence
 ## Upcoming Work
+- Adjustable Control Precision Hardening
 - Environment Auto-Configuration and Toolchain Discovery
 - Adjustable Control Support
-- Better Compose / Custom Control Semantics
+- Verification Stabilization and Temporal Convergence
 - Signal-Oriented Diagnostic Filtering
 - Long Press Gesture
 # Stronger State Verification
@@ -243,6 +246,53 @@ Blocks or strengthens:
 ---
+# Verification Stabilization and Temporal Convergence
+## Rationale
+Real-world feedback exposed false-negative readiness failures caused by transient UI timing, even when target state had actually converged.
+**Status:** Planned
+Addresses friction where agents:
+- fail readiness checks on transient timing races
+- act on stale snapshots
+- misclassify eventual success as timeout failure
+- encounter lag between UI convergence and verification success
+## Scope
+- Bounded recheck before readiness failure
+- Temporal debounce for transient state mismatches
+- Verify-until-stable semantics for readiness checks
+- Stability confirmation windows
+- Snapshot freshness and convergence heuristics
+## Expected Impact
+Very high.
+## Exit Criteria
+- False-negative readiness failures materially reduced
+- Stability confirmation logic implemented
+- Benchmark async flows validate improved convergence detection
+- Verification timing behavior documented in guardrails
+## Success Metrics
+- Higher first-pass verification success
+- Lower false timeout failures
+- Higher wait success rate
+- Fewer retries caused by premature failure classification
+## Dependencies
+Depends on:
+- Stronger State Verification
+- Wait and Synchronization Reliability
+Strengthens:
+- Actionability Resolution
+- Adjustable Control Support
+- Recovery and replanning readiness
+---
 # Actionability Resolution
 ## Rationale
@@ -338,6 +388,48 @@ Blocks or strengthens:
 ---
+# Adjustable Control Precision Hardening
+## Rationale
+Post-implementation feedback shows semantics exist, but fine-grained adjustable targeting and convergence still need hardening.
+**Status:** Planned
+Addresses friction around:
+- slider thumb targeting precision
+- tap vs drag adjustment strategy selection
+- snapping and quantized convergence behavior
+- repeated adjustment retries before landing on target value
+## Scope
+- Fine-grained slider targeting refinement
+- Drag vs tap adjustment strategy heuristics
+- Improved value snapping convergence
+- Control-specific adjustment fallback policies
+## Expected Impact
+High.
+## Exit Criteria
+- Benchmark slider flows reach target values with fewer retries
+- Adjustment strategy selection validated across representative controls
+- Reduced repeated-tap convergence failures
+## Success Metrics
+- Fewer retries for adjustable controls
+- Higher first-attempt target value success
+- Reduced control convergence failures
+## Dependencies
+Depends on:
+- Adjustable Control Support
+- Better Compose / Custom Control Semantics
+Strengthens:
+- Recovery readiness
+---
 # Signal-Oriented Diagnostic Filtering
 ## Rationale
@@ -435,7 +527,7 @@ Strengthens:
 ## Rationale
 Higher priority after agent feedback exposed custom control semantics as a core reliability gap, not a later optimization.
-**Status:** Spec Ready
+**Status:** Completed
 Semantics become more useful once:
 - identity is stronger
@@ -446,8 +538,8 @@ Semantics become more useful once:
 ## Scope
 - Composite control traits
-- Control role enrichment (adjustable, expandable, selectable_group)
-- Interaction contracts metadata
+- Control role enrichment (`slider`, `stepper`, `dropdown`, `segmented_control`, `custom_adjustable`)
+- Interaction contract metadata (`supported_actions`, `adjustable`, `state_shape`)
 - Custom widget gesture affordance hints
 - Semantic confidence annotations
 - Compose-aware selectors for waits (merged semantics and element relationships)
@@ -457,7 +549,7 @@ High.
 ## Exit Criteria
 - Semantic traits implemented for major custom control classes
-- Interaction contracts surfaced in snapshot model
+- Interaction contracts surfaced in observation and resolution paths
 - Confidence model defined for derived semantics
 - Custom control manipulation success validated in benchmark flows
@@ -574,11 +666,13 @@ Synchronization & Actionability
 Control Precision & Observability
 - Adjustable Control Support
+- Adjustable Control Precision Hardening
+- Better Compose / Custom Control Semantics
 - Signal-Oriented Diagnostic Filtering
+- Verification Stabilization and Temporal Convergence
 Interaction Expansion
 - Long Press Gesture
-- Better Compose / Custom Control Semantics
 - Pinch to Zoom
 Deep Observability
@@ -598,11 +692,13 @@ Make core loop reliable and reduce onboarding friction.
 ## Wave 2 (Control Precision + Diagnostics)
 - Adjustable Control Support
+- Adjustable Control Precision Hardening
 - Better Compose / Custom Control Semantics
 - Signal-Oriented Diagnostic Filtering
+- Verification Stabilization and Temporal Convergence
 Focus:
-Improve control precision, custom control semantics, and signal observability.
+Improve control precision, verification convergence, custom control reliability, and signal observability.
 ---
@@ -629,14 +725,16 @@ Roadmap Ordering:
 1. Stronger State Verification
 2. Richer Element Identity
 3. Wait and Synchronization Reliability
-4. Environment Auto-Configuration and Toolchain Discovery
-5. Actionability Resolution
-6. Adjustable Control Support
-7. Better Compose / Custom Control Semantics
-8. Signal-Oriented Diagnostic Filtering
-9. Long Press Gesture
-10. Pinch to Zoom
-11. Action Trace Correlation
+4. Verification Stabilization and Temporal Convergence
+5. Environment Auto-Configuration and Toolchain Discovery
+6. Actionability Resolution
+7. Adjustable Control Support
+8. Adjustable Control Precision Hardening
+9. Better Compose / Custom Control Semantics
+10. Signal-Oriented Diagnostic Filtering
+11. Long Press Gesture
+12. Pinch to Zoom
+13. Action Trace Correlation
 Rationale:
 - Early roadmap items harden state, targeting, synchronization, environment readiness, and action execution.
@@ -649,7 +747,7 @@ Rationale:
 ## Future Considerations
 Still out of scope:
-- Recovery planning logic
+- Full autonomous recovery planning (deferred until after verification stabilization)
 - Autonomous retry strategy
 - MCP-level agent orchestration
 - Autonomous recovery hinting (future consideration only)

package/docs/rfcs/009-semantic-control-modeling-for-custom-and-composite-controls.md ADDED Viewed

@@ -0,0 +1,238 @@
+# RFC 009 — Semantic Control Modeling for Custom and Composite Controls
+## 1. Summary
+This RFC defines a semantic control model for identifying, exposing, and interacting with custom and composite controls that are poorly represented through raw accessibility or platform UI trees.
+It introduces semantic enrichment for controls such as:
+- sliders
+- steppers
+- segmented controls
+- dropdowns
+- Compose/SwiftUI custom widgets
+- composite gesture-driven controls
+The goal is to improve target resolution, control interaction, and verification reliability for controls whose actionable semantics are not fully captured by raw snapshots.
+---
+## 2. Problem Statement
+Current interaction logic works well when platform semantics are explicit.
+It is weaker when controls appear as:
+- generic container views
+- unlabeled clickable wrappers
+- nested composite controls
+- custom Compose/SwiftUI components with weak accessibility exposure
+Observed problems include:
+- controls resolving as parent containers rather than actionable targets
+- missing slider-like controls in snapshots
+- weak distinction between discrete vs continuous controls
+- inability to infer supported interactions from control structure
+- unreliable verification of control state
+This causes brittle automation and coordinate fallback behavior.
+---
+## 3. Goals
+This RFC introduces a semantic layer that MUST:
+- infer higher-level control semantics from raw UI structures
+- enrich snapshots with semantic control metadata
+- improve actionable target selection (RFC 007)
+- improve adjustable control handling (RFC 008)
+- improve verification for semantic control state
+- reduce coordinate fallback usage
+---
+## 4. Non-Goals
+This RFC does NOT define:
+- replacement of raw accessibility trees
+- ML-based semantic inference
+- probabilistic control classification
+- new gesture primitives
+- autonomous planning behavior
+Semantic modeling is deterministic enrichment layered over raw signals.
+---
+## 5. Runtime Surfaces
+This RFC applies to existing runtime surfaces:
+- findElementHandler
+- _resolveActionableAncestor
+- _buildResolvedElement
+- tapElementHandler
+- scrollToElementHandler
+Semantic modeling augments these surfaces; it does not replace them.
+---
+## 6. Semantic Control Model
+Controls MAY progressively expose semantic metadata such as:
+```ts
+interface SemanticControl {
+  semantic_role:
+    | "slider"
+    | "stepper"
+    | "dropdown"
+    | "segmented_control"
+    | "custom_adjustable"
+    | "composite_control";
+  supported_actions: string[];
+  adjustable: boolean;
+  state_shape:
+    | "continuous"
+    | "discrete"
+    | "semantic";
+}
+```
+The control roles above represent an expected semantic model, not a claim that all such control classes are equally surfaced in the current runtime.
+Current runtime support may initially expose simpler semantic signals such as:
+- role hints
+- semantic labels
+- value_range metadata
+- selector confidence or related resolution signals
+Richer control roles are progressive extensions over time.
+---
+## 7. Semantic Inference Rules
+Inference MAY use signals such as:
+- accessibility role hints
+- value_range metadata
+- child composition patterns
+- repeated selectable child structures
+- platform traits (adjustable, selected, expanded)
+- known control heuristics
+Inference MUST be deterministic and explainable.
+Raw signals always win on conflict.
+Semantic inference confidence, where present, is advisory only and MUST NOT be treated as executable truth.
+---
+## 8. Resolution Integration (RFC 007)
+Semantic metadata SHOULD improve target resolution by:
+- preferring actionable child controls over generic containers
+- promoting semantically actionable descendants
+- disambiguating among multiple candidate matches
+Semantic signals are advisory enrichment, not executable truth.
+---
+## 9. Adjustable Control Integration (RFC 008)
+Where adjustable=true:
+Semantic metadata MAY expose:
+- supported adjustment mode
+- discrete vs continuous state model
+- expected verification strategy
+This improves convergence for value-setting workflows.
+---
+## 10. Verification Integration
+Verification MAY use semantic control metadata to improve:
+- value-state verification
+- discrete selection verification
+- semantic-state checks
+Formal verification still remains governed by RFC 005.
+---
+## 11. Output Contract (Progressive Extension)
+Current runtime may expose partial semantic outputs.
+Expected progressive shape (future extension model):
+```ts
+interface SemanticResolutionMetadata {
+  semantic_role?: string;
+  supported_actions?: string[];
+  adjustable?: boolean;
+  state_shape?: string;
+  confidence?: "low" | "medium" | "high";
+}
+```
+These fields are progressive enrichment and MUST NOT be assumed universally present.
+Implementations MAY expose only a subset of this model initially. Presence of a richer semantic role does not imply universal runtime support for all control classes.
+---
+## 12. Failure Modes
+Semantic modeling MAY fail due to:
+- insufficient raw signals
+- ambiguous composite structures
+- conflicting heuristics
+When semantic inference confidence is insufficient:
+- raw resolution flow MUST continue
+- semantic fields MAY be omitted
+- no semantic guessing should be forced
+---
+## 13. Success Metrics
+- fewer coordinate fallbacks
+- improved control discovery
+- improved actionable-target precision
+- improved slider/custom-control automation success
+- reduced semantic mismatch failures (RFC 010)
+---
+## 14. Relationship to Other RFCs
+RFC 005 — verification correctness model
+RFC 006 — runtime action execution
+RFC 007 — target resolution
+RFC 008 — adjustable control support
+RFC 010 — recovery uses semantic mismatch failures defined here
+---
+## 15. Summary
+This RFC adds deterministic semantic control enrichment for custom and composite controls, improving resolution, interaction reliability, and verification while remaining layered over existing runtime signals.