npm - mobile-debug-mcp - Versions diffs - 0.26.2 → 0.26.4 - Mend

mobile-debug-mcp 0.26.2 → 0.26.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/AGENTS.md +3 -0
package/dist/interact/index.js +600 -70
package/dist/observe/ios.js +1 -1
package/dist/server/tool-definitions.js +59 -1
package/dist/server/tool-handlers.js +25 -0
package/dist/server-core.js +1 -1
package/dist/utils/android/utils.js +2 -2
package/docs/CHANGELOG.md +6 -0
package/docs/ROADMAP.md +72 -16
package/docs/rfcs/007-actionability-resolution-and-executable-target-selection.md +277 -0
package/docs/rfcs/008-adjustable-control-support-and-semantic-value-manipulation.md +273 -0
package/docs/specs/mcp-tooling-spec-v1.md +1 -1
package/docs/tools/interact.md +30 -1
package/package.json +1 -1
package/src/interact/index.ts +761 -72
package/src/observe/ios.ts +1 -1
package/src/server/tool-definitions.ts +59 -1
package/src/server/tool-handlers.ts +26 -0
package/src/server-core.ts +1 -1
package/src/types.ts +90 -0
package/src/utils/android/utils.ts +2 -2
package/test/unit/interact/adjust_control.test.ts +365 -0
package/test/unit/observe/find_element.test.ts +5 -0
package/test/unit/observe/state_extraction.test.ts +24 -0
package/test/unit/server/contract.test.ts +8 -0
package/test/unit/server/response_shapes.test.ts +39 -0

package/dist/observe/ios.js CHANGED Viewed

@@ -106,7 +106,7 @@ function buildIOSSemantic(type, traits) {
     };
 }
 function isIOSAdjustable(node, type, traits) {
-    return /slider|adjustable|stepper|progress/i.test(type) || traits.some((trait) => /adjustable|slider|progress/i.test(trait));
+    return /slider|adjustable|stepper/i.test(type) || traits.some((trait) => /adjustable|slider/i.test(trait));
 }
 function extractIOSState(node, type, label, value, traits) {
     const state = {};

package/dist/server/tool-definitions.js CHANGED Viewed

@@ -555,6 +555,62 @@ Failure Handling:
             required: ['property', 'expected']
         }
     },
+    {
+        name: 'adjust_control',
+        description: `Purpose:
+Adjust a numeric control value with verification.
+This is the initial adjustable-control surface for slider-like controls and other controls that expose a numeric value or value_range.
+Inputs:
+- selector or element_id
+- property (defaults to "value")
+- targetValue
+- tolerance (optional)
+- maxAttempts (optional)
+- platform/deviceId (optional)
+Output Structure:
+- action_id, timestamp (ISO 8601), action_type
+- lifecycle_state: post-dispatch lifecycle state (pending_verification or failed)
+- source_module: runtime source of the action envelope
+- target_state / actual_state / within_tolerance / converged / attempts / adjustment_mode
+- target.selector = original selector or element handle
+- success = true when the control converges within tolerance
+Verification Guidance:
+- Prefer direct target placement when value_range is available; fall back to a drag only if the direct tap does not converge
+- Use expect_state for the control value readback
+- Treat coordinate fallback as degraded mode
+Failure Handling:
+- ELEMENT_NOT_FOUND → re-resolve the control
+- ELEMENT_NOT_INTERACTABLE → the control cannot be adjusted through the current runtime
+- TIMEOUT → the control did not converge within bounded retries
+- UNKNOWN → capture a snapshot and stop`,
+        inputSchema: {
+            type: 'object',
+            properties: {
+                selector: {
+                    type: 'object',
+                    properties: {
+                        text: { type: 'string' },
+                        resource_id: { type: 'string' },
+                        accessibility_id: { type: 'string' },
+                        contains: { type: 'boolean', default: false }
+                    }
+                },
+                element_id: { type: 'string', description: 'Optional previously resolved element identifier.' },
+                property: { type: 'string', description: 'Readable numeric state property to adjust.', default: 'value' },
+                targetValue: { type: 'number', description: 'Target numeric value.' },
+                tolerance: { type: 'number', description: 'Accepted numeric tolerance around the target value.', default: 0 },
+                maxAttempts: { type: 'number', description: 'Maximum adjustment attempts.', default: 3 },
+                platform: { type: 'string', enum: ['android', 'ios'], description: 'Optional platform override' },
+                deviceId: { type: 'string', description: 'Optional device serial/udid' }
+            },
+            required: ['targetValue']
+        }
+    },
     {
         name: 'wait_for_ui',
         description: `Purpose:
@@ -596,7 +652,9 @@ Recommended Usage:
     },
     {
         name: 'find_element',
-        description: 'Find a UI element by semantic query (text, content-desc, resource-id, class). Returns best match.',
+        description: `Find a UI element by semantic query (text, content-desc, resource-id, class).
+Returns the best match plus resolution metadata when available, including confidence, selection reason, and fallback alternates.`,
         inputSchema: {
             type: 'object',
             properties: {

package/dist/server/tool-handlers.js CHANGED Viewed

@@ -226,6 +226,30 @@ async function handleExpectState(args) {
     const res = await ToolsInteract.expectStateHandler({ selector: selector ?? undefined, element_id: element_id ?? undefined, property, expected, platform, deviceId });
     return wrapResponse(res);
 }
+async function handleAdjustControl(args) {
+    const selector = getObjectArg(args, 'selector');
+    const element_id = getStringArg(args, 'element_id');
+    const property = getStringArg(args, 'property') ?? 'value';
+    const targetValue = requireNumberArg(args, 'targetValue');
+    const tolerance = getNumberArg(args, 'tolerance') ?? 0;
+    const maxAttempts = getNumberArg(args, 'maxAttempts') ?? 3;
+    const platform = getStringArg(args, 'platform');
+    const deviceId = getStringArg(args, 'deviceId');
+    if (!selector && !element_id) {
+        throw new Error('Missing selector or element_id argument');
+    }
+    const res = await ToolsInteract.adjustControlHandler({
+        selector: selector ?? undefined,
+        element_id: element_id ?? undefined,
+        property,
+        targetValue,
+        tolerance,
+        maxAttempts,
+        platform,
+        deviceId
+    });
+    return wrapResponse(res);
+}
 async function handleWaitForUI(args) {
     const selector = getObjectArg(args, 'selector');
     const condition = getStringArg(args, 'condition') ?? 'exists';
@@ -431,6 +455,7 @@ export const toolHandlers = {
     expect_screen: handleExpectScreen,
     expect_element_visible: handleExpectElementVisible,
     expect_state: handleExpectState,
+    adjust_control: handleAdjustControl,
     wait_for_ui: handleWaitForUI,
     find_element: handleFindElement,
     tap: handleTap,

package/dist/server-core.js CHANGED Viewed

@@ -6,7 +6,7 @@ import { handleToolCall } from './server/tool-handlers.js';
 export { wrapResponse, toolDefinitions, handleToolCall };
 export const serverInfo = {
     name: 'mobile-debug-mcp',
-    version: '0.26.2'
+    version: '0.26.4'
 };
 export function createServer() {
     const server = new Server(serverInfo, {

package/dist/utils/android/utils.js CHANGED Viewed

@@ -360,7 +360,7 @@ function normalizeClassName(value) {
     return typeof value === 'string' ? value.trim().toLowerCase() : '';
 }
 function inferAndroidRole(className) {
-    if (/seekbar|slider|progress/.test(className))
+    if (/seekbar|slider/.test(className))
         return 'slider';
     if (/switch|toggle/.test(className))
         return 'switch';
@@ -411,7 +411,7 @@ function buildAndroidSemantic(clickable, className) {
 }
 function isSliderLikeAndroid(node) {
     const className = String(node['@_class'] || '').toLowerCase();
-    return /seekbar|slider|range|progress/i.test(className);
+    return /seekbar|slider|range/i.test(className);
 }
 function extractAndroidState(node) {
     const checked = parseBooleanAttr(node['@_checked']);

package/docs/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,12 @@
 All notable changes to the **Mobile Debug MCP** project will be documented in this file.
+## [0.26.4]
+- Improved slider accuracy
+## [0.26.3]
+- updates the `find_element` tool to return detailed resolution metadata, including confidence scores,
 ## [0.26.2]
 - unified action execution and verification model

package/docs/ROADMAP.md CHANGED Viewed

@@ -52,10 +52,62 @@ Higher task success with fewer retries.
 ## Upcoming Work
+- Environment Auto-Configuration and Toolchain Discovery
 - Adjustable Control Support
+- Better Compose / Custom Control Semantics
 - Signal-Oriented Diagnostic Filtering
 - Long Press Gesture
-- Better Compose / Custom Control Semantics
+# Stronger State Verification
+# Richer Element Identity
+# Wait and Synchronization Reliability
+# Environment Auto-Configuration and Toolchain Discovery
+## Rationale
+Reduce onboarding friction and improve developer experience by minimizing manual setup dependencies.
+**Status:** Planned
+Addresses friction around:
+- manual idb installation
+- manual adb path configuration
+- manual xcrun path configuration
+- environment drift across machines
+- setup failures blocking first use
+## Scope
+- Automatic discovery of adb
+- Automatic discovery of xcrun
+- idb detection and guided bootstrap support
+- Startup toolchain validation
+- Environment health diagnostics / doctor-style checks
+- Minimal-manual-configuration defaults
+## Expected Impact
+High.
+## Exit Criteria
+- adb and xcrun auto-discovery implemented
+- Missing dependencies surfaced with guided remediation
+- Startup environment validation available
+- Manual path configuration eliminated or minimized for standard setups
+- First-run setup validated on representative developer environments
+## Success Metrics
+- Reduced setup friction during onboarding
+- Lower environment configuration failures
+- Faster time-to-first-successful-session
+- Reduced support/debugging caused by local setup issues
+## Dependencies
+Depends on:
+- Stronger State Verification
+- Richer Element Identity
+Strengthens:
+- Actionability Resolution
+- Broader user adoption readiness
+---
 ## Later Horizon
@@ -160,6 +212,7 @@ Addresses failures where agents:
 - wait_for_ui_change (hierarchy diff based waiting)
 - Structured loading state detection
 - Snapshot revision / staleness metadata
+- Focused snapshot views / incremental snapshot diffs
 - Compose-aware wait robustness improvements
 ## Expected Impact
@@ -169,6 +222,7 @@ Very high.
 - wait_for_ui_change implemented
 - Loading state detection available for representative controls
 - Snapshot revision or staleness metadata exposed
+- Focused or diff-oriented snapshots validated in benchmark flows
 - UI-first sync guidance added to spec guardrails
 - In-place update waits validated on benchmark flows
@@ -379,9 +433,9 @@ Strengthens:
 # Better Compose / Custom Control Semantics
 ## Rationale
-Important, but strengthened by earlier capabilities first.
+Higher priority after agent feedback exposed custom control semantics as a core reliability gap, not a later optimization.
-**Status:** Planned
+**Status:** Spec Ready
 Semantics become more useful once:
 - identity is stronger
@@ -419,7 +473,6 @@ Depends on:
 - Wait and Synchronization Reliability
 - Actionability Resolution
 - Adjustable Control Support
-- Signal-Oriented Diagnostic Filtering
 - Long Press Gesture
 ---
@@ -515,6 +568,7 @@ Foundation
 - Richer Element Identity
 Synchronization & Actionability
+- Environment Auto-Configuration and Toolchain Discovery
 - Wait and Synchronization Reliability
 - Actionability Resolution
@@ -535,27 +589,28 @@ Deep Observability
 - Richer Element Identity
 - Wait and Synchronization Reliability
 - Actionability Resolution
+- Environment Auto-Configuration and Toolchain Discovery
 Focus:
-Make core loop more reliable.
+Make core loop reliable and reduce onboarding friction.
 ---
 ## Wave 2 (Control Precision + Diagnostics)
 - Adjustable Control Support
+- Better Compose / Custom Control Semantics
 - Signal-Oriented Diagnostic Filtering
 Focus:
-Improve control precision and signal observability.
+Improve control precision, custom control semantics, and signal observability.
 ---
 ## Wave 3 (Interaction Expansion)
 - Long Press Gesture
-- Better Compose / Custom Control Semantics
 Focus:
-Expand interaction capability.
+Expand interaction capability after core control reliability is improved.
 ---
@@ -574,16 +629,17 @@ Roadmap Ordering:
 1. Stronger State Verification
 2. Richer Element Identity
 3. Wait and Synchronization Reliability
-4. Actionability Resolution
-5. Adjustable Control Support
-6. Signal-Oriented Diagnostic Filtering
-7. Long Press Gesture
-8. Better Compose / Custom Control Semantics
-9. Pinch to Zoom
-10. Action Trace Correlation
+4. Environment Auto-Configuration and Toolchain Discovery
+5. Actionability Resolution
+6. Adjustable Control Support
+7. Better Compose / Custom Control Semantics
+8. Signal-Oriented Diagnostic Filtering
+9. Long Press Gesture
+10. Pinch to Zoom
+11. Action Trace Correlation
 Rationale:
-- Early roadmap items harden state, targeting, synchronization, action execution.
+- Early roadmap items harden state, targeting, synchronization, environment readiness, and action execution.
 - Mid roadmap items improve control precision and signal observability.
 - Later interaction-focused items expand interaction coverage.
 - Final observability work deepens debugging observability.

package/docs/rfcs/007-actionability-resolution-and-executable-target-selection.md ADDED Viewed

@@ -0,0 +1,277 @@
+# RFC 007 — Actionability Resolution and Executable Target Selection
+## 1. Summary
+This RFC defines how the system resolves which discovered UI element should receive an action before dispatch.
+It addresses ambiguity between:
+- visible elements vs actionable elements
+- leaf nodes vs clickable containers
+- semantic targets vs coordinate fallbacks
+- multiple candidate targets with uncertain executability
+Goal:
+Improve first-attempt action correctness by resolving the best executable target prior to action dispatch.
+This RFC defines the `Resolved` stage semantics referenced in RFC 005 and operationalized by RFC 006.
+It is grounded in the existing element-resolution flow and extends current resolution behavior rather than assuming a wholly new resolver architecture.
+---
+## 2. Problem Statement
+Current interaction failures often arise before execution.
+The agent may discover the intended UI concept, but not the correct executable target.
+Examples:
+- tapping label text instead of clickable container
+- sliders not surfacing semantic handles
+- generic Compose containers hiding true affordances
+- multiple matching targets without ranking logic
+Observed failure modes:
+- false taps
+- submit ambiguity
+- coordinate guessing
+- retry loops
+- brittle fallback behavior
+This is a target-resolution problem, not an execution problem.
+---
+## 3. Design Goals
+Resolution MUST:
+- Prefer executable targets over merely visible matches
+- Reduce ambiguous target selection
+- Support confidence-based ranking
+- Build on existing runtime resolution surfaces before introducing new resolution metadata
+- Use structural and semantic resolution signals
+- Minimize coordinate fallback usage
+- Integrate with verification expectations from RFC 005
+---
+## 4. Actionability Model
+Candidate targets are evaluated using actionability signals.
+### Structural signals
+- clickable
+- enabled
+- focusable
+- bounds
+- parent action ownership
+### Semantic signals
+- control role
+- label association
+- affordance hints
+- selectable or adjustable semantics
+### Interaction signals
+- reliable target patterns
+- control-specific heuristics
+- gesture compatibility
+---
+## 4.1 Current Runtime Resolution Surfaces
+This RFC builds on current runtime resolution paths, including:
+- `findElementHandler` for candidate discovery
+- `_resolveActionableAncestor` for executable ancestor promotion
+- `tapElementHandler` for resolved element dispatch
+- `scrollToElementHandler` for scroll-mediated target acquisition
+These existing handlers are the current implementation substrate for the Resolved stage.
+This RFC extends and systematizes those behaviors; it does not assume replacement of those paths.
+---
+## 5. Target Candidate Ranking
+When multiple targets match, candidates are ranked.
+Illustrative confidence model:
+resolution_confidence =
+ interactability_score
+ + semantic_match_score
+ + structural_reliability_score
+Highest-confidence executable target is preferred.
+The confidence model is illustrative and normative only at the rule-precedence level; implementations may use simpler heuristics while preserving resolution ordering guarantees. Any scoring mechanism is implementation-defined and may not be externally surfaced.
+---
+## 6. Resolution Rules
+### Rule A — Prefer actionable containers over passive leaf nodes
+Prefer:
+- clickable container
+Over:
+- passive child text nodes
+Example:
+Prefer button container over "Generate Session" label node.
+---
+### Rule B — Prefer semantic controls over coordinate fallbacks
+Use semantic control targets whenever possible.
+Coordinate fallback only when:
+- no semantic target exists
+- adjustable control semantics absent
+- fallback confidence acceptable
+---
+### Rule C — Prefer explicit affordance ownership
+If child and parent differ:
+prefer the node owning the action handler.
+---
+## 7. Ambiguity Handling
+When multiple plausible targets remain:
+System SHOULD:
+- rank candidates
+- expose confidence
+- preserve alternates for fallback reasoning
+Low-confidence targets may trigger:
+- guarded execution
+- alternate resolution attempt
+- explicit recovery path
+---
+## 8. Adjustable Control Resolution
+Special handling for:
+- sliders
+- steppers
+- drag controls
+Support:
+- adjustable-role recognition
+- control-bound discovery
+- value-aware interaction targeting
+This RFC defines target resolution.
+Value-setting behavior remains governed by Adjustable Control Support.
+---
+## 9. Compose / Custom Control Resolution
+Support derived actionability for:
+- merged Compose semantics
+- composite controls
+- inferred interaction contracts
+This RFC depends on and strengthens Better Compose / Custom Control Semantics.
+---
+## 10. Resolution Output Model (Current + Future Extension)
+This model is non-normative and represents a progressive enrichment direction rather than a required runtime contract.
+Resolution may evolve toward the following enriched output shape. Current runtime implementations may expose only resolved-target output plus limited supporting metadata.
+At minimum, current implementations are expected to produce a resolved target. Confidence, alternates, fallback metadata, and reason codes may be introduced incrementally.
+Illustrative future-complete shape:
+{
+  "resolved_target": "...",
+  "confidence": 0.92,
+  "fallback_available": true,
+  "resolution_reason": "clickable_parent_preferred"
+}
+---
+## 11. Verification Integration
+Resolution is incomplete without verification expectations.
+Resolved output should be derived directly from the existing element-resolution flow before adding richer metadata layers.
+Resolved target should carry expected post-action signal.
+Examples:
+- navigation transition expected
+- menu expected
+- control value change expected
+This feeds RFC 005 verification.
+---
+## 12. Success Metrics
+Track:
+- reduced false-tap failures
+- lower retarget retries
+- higher first-attempt action success
+- reduced coordinate fallback usage
+- improved custom control interaction success
+---
+## 13. Dependencies
+Depends on:
+- Stronger State Verification
+- Richer Element Identity
+- Wait and Synchronization Reliability
+Strengthens:
+- Adjustable Control Support
+- Better Compose / Custom Control Semantics
+---
+## 14. Relationship to Other RFCs
+RFC 005
+Defines what Resolved means in lifecycle semantics.
+RFC 006
+Defines how runtime interprets action execution.
+RFC 007
+Defines how a target becomes Resolved.
+Specifically, it formalizes the current discovery → actionable ancestor resolution → dispatch preparation flow already present in runtime handlers.
+Together:
+- RFC 005 — action correctness
+- RFC 006 — runtime execution binding
+- RFC 007 — executable target resolution
+---
+## 15. Summary
+This RFC reduces failures caused by acting on the wrong thing, even when the right thing was discovered.
+It improves:
+- action precision
+- control reliability
+- Compose interaction robustness
+- agent success with fewer retries
+It addresses one of the largest remaining sources of interaction brittleness.