npm - mobile-debug-mcp - Versions diffs - 0.21.5 → 0.23.0 - Mend

mobile-debug-mcp 0.21.5 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

package/dist/utils/resolve-device.js CHANGED Viewed

@@ -10,15 +10,27 @@ function parseNumericVersion(v) {
     const minor = parseInt(m[2] || "0", 10) || 0;
     return major + minor / 100;
 }
+let androidDeviceLister = listAndroidDevices;
+let iosDeviceLister = listIOSDevices;
+export function _setDeviceListersForTests(overrides) {
+    if (overrides.listAndroidDevices)
+        androidDeviceLister = overrides.listAndroidDevices;
+    if (overrides.listIOSDevices)
+        iosDeviceLister = overrides.listIOSDevices;
+}
+export function _resetDeviceListersForTests() {
+    androidDeviceLister = listAndroidDevices;
+    iosDeviceLister = listIOSDevices;
+}
 export async function listDevices(platform, appId) {
     if (!platform || platform === "android") {
-        const android = await listAndroidDevices(appId);
+        const android = await androidDeviceLister(appId);
         if (platform === "android")
             return android;
-        const ios = await listIOSDevices(appId);
+        const ios = await iosDeviceLister(appId);
         return [...android, ...ios];
     }
-    return listIOSDevices(appId);
+    return iosDeviceLister(appId);
 }
 export async function resolveTargetDevice(opts) {
     const { platform, appId, prefer, deviceId } = opts;

package/docs/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,15 @@
 All notable changes to the **Mobile Debug MCP** project will be documented in this file.
+## [0.23.0]
+- Added network monitoring
+- Added
+## [0.22.0]
+- Added a portable `test-authoring` skill package and documented the repository's vendor-neutral skill format
+- Added `AGENTS.md` as a top-level cold-start guide for autonomous agents entering the public repository
+- Added `tap_element` for deterministic action execution on previously resolved `wait_for_ui` elements
 ## [0.21.5]
 - Fixed incorrect timeout
@@ -52,7 +61,7 @@ All notable changes to the **Mobile Debug MCP** project will be documented in th
 ## [0.16.0]
 - Added `wait_for_screen_change` interact tool: polls the platform-specific `get_screen_fingerprint` until it differs from a provided `previousFingerprint`, with configurable `timeoutMs` and `pollIntervalMs` and an optional stability confirmation poll to avoid reacting to transient UI flickers. Implemented at the interact layer and delegates fingerprinting to the observe implementations (Android/iOS).
-- Added unit tests covering immediate change, transient null fingerprints, stability confirmation and timeout behavior: `test/interact/unit/wait_for_screen_change.test.ts`.
+- Added unit tests covering immediate change, transient null fingerprints, stability confirmation and timeout behavior: `test/unit/interact/wait_for_screen_change.test.ts`.
 ## [0.15.0]
 - Reorganised repository for cohesion: merged tool handlers into feature entrypoints (src/observe, src/interact, src/manage) and moved platform helpers and CLI tooling into src/utils/{android,ios,cli}.

package/docs/tools/interact.md CHANGED Viewed

@@ -50,7 +50,7 @@ Notes:
 - Visibility check uses element.bounds intersecting the device resolution when available; falls back to the element.visible flag if bounds/resolution are missing.
 - The tool fingerprints the visible UI between scrolls; if the fingerprint doesn't change after a swipe the tool stops early assuming end-of-list.
 - Android swipe uses `adb shell input swipe` with screen percentage coordinates. iOS swipe uses `idb ui swipe` command; note `idb` swipe does not accept a duration argument.
-- Unit tests are located at `test/unit/observe/scroll_to_element.test.ts` and device runners at `test/device/observe/`.
+- Unit tests are located at `test/unit/observe/scroll_to_element.test.ts`, automated device smoke checks under `test/device/automated/...`, and manual device runners under `test/device/manual/...`.
 ---
@@ -129,57 +129,96 @@ Notes:
 - `tapCoordinates` are the recommended center point to use for `tap` calls.
 - `confidence` mirrors the internal scoring (0..1) and is suitable for telemetry or logging to decide whether to proceed with an automated action.
 - The tool favours actionable (clickable/focusable) targets; when a matching node is not directly actionable, it finds the smallest containing clickable ancestor.
-- Unit tests for edge cases (parent-clickable child-text, resource-id matches, fuzzy matching) are under `test/observe/unit/find_element.test.ts`.
+- Unit tests for edge cases (parent-clickable child-text, resource-id matches, fuzzy matching) are under `test/unit/observe/find_element.test.ts`.
 ---
 ## wait_for_ui
 Purpose:
-- Wait for a condition to occur on the device: UI element appearance, a log line, a screen fingerprint change, or an idle/stable screen state.
-Supported types and behavior:
-- ui: Delegates to `find_element` to perform a semantic search of the UI tree. Returns the matched element descriptor (including tapCoordinates) when found.
-- log: Reads the active log stream (via `start_log_stream`/`readLogStreamHandler`) and falls back to a snapshot of recent logs (`getLogsHandler`). Matches when the query substring appears in a new log line after a captured baseline.
-- screen: Compares screen fingerprints (visual checks) against an initial baseline and returns when fingerprint changes. If `query` is provided it will attempt a `find_element` on the new screen to validate the expected content.
-- idle: Waits until the screen fingerprint remains stable for a short stability window (default 1000ms).
+- Deterministically wait for a UI selector match and return the matched element metadata.
 Input (ToolsInteract.waitForUIHandler):
 ```
-{ "type": "ui|log|screen|idle", "query": "optional string", "timeoutMs": 5000, "pollIntervalMs": 200, "platform": "android|ios", "deviceId": "optional device id" }
+{
+  "selector": { "text": "optional", "resource_id": "optional", "accessibility_id": "optional", "contains": false },
+  "condition": "exists|not_exists|visible|clickable",
+  "timeout_ms": 60000,
+  "poll_interval_ms": 300,
+  "match": { "index": 0 },
+  "retry": { "max_attempts": 1, "backoff_ms": 0 },
+  "platform": "android|ios",
+  "deviceId": "optional device id"
+}
 ```
 Success response highlights:
-- success: true
-- type: requested type
-- matched: true
-- details: human-friendly explanation
-- timestamp: epoch ms
-- element: (for ui/screen when matched) actionable element metadata with tapCoordinates
-- log: (for log) matched log message and raw entry
-- newFingerprint: (for screen) new fingerprint value
+- status: `success`
+- matched: number of matches found in the current poll
+- element: matched element metadata including `elementId`
+- metrics: latency, poll count, attempts
 Failure/timeout response:
-- success: false
-- error or reason: explanation
-- type: requested type
-- timeoutMs: value used
+- status: `timeout`
+- error: structured error with `code` and `message`
+- metrics: latency, poll count, attempts
 Notes & tips:
-- Defaults (timeoutMs=5000, pollIntervalMs=200) balance responsiveness with device query overhead; adjust in tests or scripts as needed.
-- For UI-sensitive flows prefer type='ui' rather than relying solely on visual fingerprint changes, as some UI updates don't alter the fingerprint.
+- `wait_for_ui` is responsible for **resolution only**.
+- Successful responses now include an `elementId` that can be passed to `tap_element`.
+- This enables the deterministic loop: **observe -> act -> verify**.
 Tests:
-- Unit: `test/interact/unit/wait_for_ui.test.ts`
-- Device runner: `test/interact/device/wait_for_ui_device.ts` (requires devices/emulators and adb/xcrun in PATH)
+- Unit: `test/unit/interact/wait_for_ui_contract.test.ts` and `test/unit/interact/wait_for_ui_selector_matching.test.ts`
+- Automated device checks now live under `test/device/automated/...`; manual/debug runners live under `test/device/manual/...` (requires devices/emulators and adb/xcrun in PATH)
 Example:
 ```
-// Wait up to 5s for a button labeled "Generate Session" on Android
-ToolsInteract.waitForUIHandler({ type: 'ui', query: 'Generate Session', timeoutMs: 5000, platform: 'android' })
+ToolsInteract.waitForUIHandler({
+  selector: { text: 'Generate Session' },
+  condition: 'clickable',
+  timeout_ms: 5000,
+  platform: 'android'
+})
 ```
 Troubleshooting:
-- If wait_for_ui(log) never matches, ensure log streaming is started for the target package and baseline logs captured correctly.
-- If wait_for_ui(screen) times out despite visible UI change, try type='ui' to validate content-level changes.
+- If `wait_for_ui` times out, confirm the selector is precise and that the current UI tree exposes the expected text, resource ID, or accessibility ID.
+## tap_element
+Purpose:
+- Execute a tap against a UI element that has already been resolved by `wait_for_ui`.
+Input:
+```
+{ "elementId": "el_..." }
+```
+Behavior:
+- validates that the element still exists in the current UI context
+- validates that the element is visible
+- validates that the element is enabled
+- performs the tap using the resolved element bounds
+Success response:
+```
+{ "success": true, "elementId": "el_123", "action": "tap" }
+```
+Failure response:
+```
+{
+  "success": false,
+  "elementId": "el_123",
+  "action": "tap",
+  "error": { "code": "element_not_found|element_not_visible|element_not_enabled", "message": "..." }
+}
+```
+Notes:
+- `tap_element` does **not** accept selectors.
+- `tap_element` does **not** perform lookup, waiting, retries, or ambiguity resolution.
+- Migration pattern for selector-based flows is:
+  1. `wait_for_ui(selector)`
+  2. `tap_element(elementId)`

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "mobile-debug-mcp",
-  "version": "0.21.5",
+  "version": "0.23.0",
   "description": "MCP server for mobile app debugging (Android + iOS), with focus on security and reliability",
   "type": "module",
   "bin": {
@@ -13,8 +13,8 @@
     "healthcheck": "tsx ./src/cli/idb/check-idb.ts",
     "install-idb": "tsx ./src/cli/idb/install-idb.ts",
     "preflight-ios": "tsx ./src/cli/ios/preflight-ios.ts",
-    "test:unit": "SKIP_DEVICE_TESTS=1 tsx test/unit/index.ts",
-    "test:integration": "npm run build && tsx test/device/index.ts",
+    "test:unit": "tsx test/unit/index.ts",
+    "test:integration": "npm run test:device",
     "test:device": "npm run build && tsx test/device/index.ts",
     "test": "npm run test:unit",
     "lint": "eslint --ext .ts,.js src test --quiet",

package/skills/README.md ADDED Viewed

@@ -0,0 +1,35 @@
+# Skills
+This repository stores agent skills as **plain Markdown packages** so they can be consumed by different agent systems, including Copilot, Codex, Claude, or custom internal agents.
+## Portability rules
+1. Keep the entrypoint in `skills/<skill-name>/SKILL.md`.
+2. Use simple, readable sections instead of vendor-specific syntax or hidden metadata.
+3. Keep `SKILL.md` short and task-oriented.
+4. Put detailed guidance in `skills/<skill-name>/references/*.md`.
+5. Describe behavior in terms of **inputs, outputs, workflow, and constraints**, not a specific product's tool API.
+6. When a skill depends on repository conventions, link to the exact repo paths and commands the agent should inspect or run.
+## Recommended SKILL.md structure
+- Title
+- `name`
+- `version`
+- `summary`
+- Purpose
+- Activation conditions
+- Surface area (actions)
+- Core guidance
+- Inputs & outputs
+- Failure handling
+- Progressive disclosure
+- References
+## Repository-specific expectation
+Skills in this repo should be:
+- **generic enough** to be followed by different agents
+- **specific enough** to encode this repository's working conventions
+- **lightweight** so agents can load them quickly and fetch references only when needed

package/skills/test-authoring/SKILL.md ADDED Viewed

@@ -0,0 +1,57 @@
+# Test Authoring skill
+name: test-authoring
+version: 0.1.0
+summary: Reusable guidance for adding or updating tests in this repository without tying the workflow to a single agent product.
+# Purpose
+Help an agent create, extend, or reorganize tests while following this repository's current conventions for unit tests, automated device smoke tests, and manual device helpers.
+# Activation conditions
+Activate when an agent needs to:
+- add a new test for existing code
+- update tests after changing tool behavior or response contracts
+- move tests into the correct unit, automated device, or manual device location
+- improve coverage for handlers, server contracts, or shared utilities
+# Surface area (actions)
+- choose-test-type
+- place-test-file
+- mirror-existing-pattern
+- add-or-update-fixtures
+- validate-test-command
+- document-test-scope
+# Core guidance
+1. Prefer the **smallest reliable test type**: unit test first, automated device smoke test only when real platform integration matters, manual helper only when automation is not practical.
+2. Follow the repository's current automated test style: self-running `tsx` scripts with explicit assertions and non-zero exit on failure.
+3. Put automated unit tests under `test/unit/...`; the unit runner automatically executes every `*.test.ts` file there.
+4. Put automated device smoke tests under `test/device/automated/...`; put human-invoked helpers under `test/device/manual/...`.
+5. Reuse existing mocking seams and test hooks before creating new ones.
+6. When testing user-facing tools, protect **response shape**, required fields, and documented defaults.
+7. Keep tests deterministic: isolate process-global mocks, prefer fake binaries or injected helpers over shared mutable module state.
+8. Validate with the repository commands that already exist instead of inventing new runners.
+# Inputs & outputs
+- choose-test-type(input: { changedPaths[], behaviorType, requiresRealDevice?: boolean }) -> { recommendedType: 'unit'|'device-automated'|'device-manual', rationale, targetPath }
+- place-test-file(input: { featureArea, recommendedType }) -> { filePath, namingPattern, runner }
+- mirror-existing-pattern(input: { targetPath }) -> { referenceFiles[], patternSummary }
+- validate-test-command(input: { scope: 'unit'|'device'|'repo' }) -> { commands[] }
+- document-test-scope(input: { testPath, purpose }) -> { docsToUpdate[] }
+# Failure handling
+- If the correct test type is ambiguous, prefer unit coverage unless the behavior depends on real device/simulator integration.
+- If a test requires brittle global monkeypatching, isolate it in a subprocess-friendly way or add a narrow injection seam.
+- If device automation is too environment-dependent, keep the helper under `test/device/manual/...` and add a smaller automated smoke test around a stable contract.
+# Progressive disclosure
+- Keep this file short.
+- Load detailed references only when deciding placement, matching existing style, or validating new tests.
+# References
+- `references/repo-test-layout.md` — where tests belong and what each tree means
+- `references/test-authoring-workflow.md` — step-by-step workflow for creating or changing tests
+- `references/test-quality-checklist.md` — what a good test in this repo should prove before it is considered done
+# License
+Same as repository (MIT).

package/skills/test-authoring/references/repo-test-layout.md ADDED Viewed

@@ -0,0 +1,47 @@
+# Repository test layout
+Use this reference when deciding **where** a new test belongs.
+## Automated unit tests
+- Location: `test/unit/...`
+- Naming: `*.test.ts`
+- Execution: `npm run test:unit`
+- Runner behavior: `test/unit/index.ts` discovers every `*.test.ts` file and runs each one in an isolated `tsx` subprocess
+Best for:
+- handler logic
+- schema and contract defaults
+- response-shape validation
+- shared utility behavior
+- deterministic edge cases
+## Automated device smoke tests
+- Location: `test/device/automated/...`
+- Naming: usually `*.smoke.ts` or `*.integration.ts`
+- Execution: `npm run test:device`
+- Runner behavior: `test/device/index.ts` runs only the automated subtree
+Best for:
+- real Android/iOS tool wiring
+- smoke-level confidence that a command works end-to-end
+- checks that need real `adb`, `xcrun`, simulator, or device behavior
+## Manual device helpers
+- Location: `test/device/manual/...`
+- Purpose: scripts that humans or agents can invoke directly for ad hoc validation
+- Not part of the default automated test commands
+Best for:
+- workflows that require app-specific arguments
+- environment-specific debugging
+- high-friction scenarios not suitable for default CI-like execution
+## Placement rules
+1. If the behavior can be validated with mocks, fake binaries, or narrow test hooks, prefer `test/unit/...`.
+2. If the behavior depends on real platform tooling, add an automated smoke test under `test/device/automated/...`.
+3. If the behavior requires manual setup, project-specific app IDs, or ad hoc arguments, add a helper under `test/device/manual/...`.
+4. Avoid leaving obsolete tests in the wrong tree; move or delete them so tree meaning stays clear.

package/skills/test-authoring/references/test-authoring-workflow.md ADDED Viewed

@@ -0,0 +1,73 @@
+# Test authoring workflow
+Use this workflow when adding or updating tests in this repository.
+## 1. Identify the behavior boundary
+Classify the change:
+- pure utility or parser logic
+- handler / response contract logic
+- server registration or schema wiring
+- real platform integration
+This usually determines the cheapest useful test type.
+## 2. Inspect nearby examples
+Before writing a new test, inspect tests already covering the same area:
+- `test/unit/server/...` for tool registration and response-shape tests
+- `test/unit/interact/...` for handler tests and UI wait behavior
+- `test/unit/manage/...` for build/install flows and fake toolchain binaries
+- `test/unit/observe/...` for observe response contracts
+- `test/unit/utils/...` for shared helper coverage
+- `test/device/automated/...` for smoke test wrappers
+- `test/device/manual/...` for JSON-producing helper scripts used by smoke tests
+## 3. Match the existing style
+For automated tests in this repo:
+- use a self-running script
+- use `assert` or explicit checks
+- print a small success message
+- `process.exit(1)` on failure
+For unit tests that need isolation:
+- patch only what is required
+- restore original values in `finally`
+- prefer injected seams or fake binaries over broad module mutation
+## 4. Prefer existing seams
+Before adding a new hook, search for:
+- existing test-only setters/resetters
+- environment-variable based indirection
+- fake executable patterns already used in system/manage tests
+Only add a new seam when the current code cannot be tested safely otherwise.
+## 5. Protect the contract, not just the happy path
+When testing tools or handlers, cover:
+- expected success shape
+- required fields
+- key defaults
+- at least one failure or invalid-input path
+## 6. Validate with repo commands
+Use only the existing validation entrypoints:
+- `npm run build`
+- `npm run lint`
+- `npm run test:unit`
+- `SKIP_DEVICE_TESTS=1 npm run test:device` for automated device wrappers when real devices are not part of the current validation run
+## 7. Update docs only when structure or expectations changed
+If the test layout, naming, or default commands change, update repo docs. If you only add another test following existing rules, docs may not need changes.

package/skills/test-authoring/references/test-quality-checklist.md ADDED Viewed

@@ -0,0 +1,39 @@
+# Test quality checklist
+A new test in this repo is high quality when it does most of the following:
+## Contract quality
+- proves the right behavior, not just that code executed
+- asserts on meaningful fields rather than only truthy values
+- protects documented defaults where drift would matter
+- covers at least one failure path when the behavior can fail
+## Determinism
+- does not depend on arbitrary timing unless the behavior is inherently time-based
+- restores mocks, env vars, and monkeypatches in `finally`
+- avoids leaking state into later tests
+- runs reliably in an isolated subprocess
+## Placement quality
+- lives in the correct tree (`test/unit`, `test/device/automated`, or `test/device/manual`)
+- uses file naming consistent with the surrounding tests
+- does not leave obsolete duplicate coverage in a different tree
+## Maintainability
+- reuses existing helpers, fake-binary patterns, or test seams
+- keeps fixtures as small as possible
+- makes the failure obvious from the assertion message or output
+- stays focused on one behavior cluster instead of becoming a broad scenario script
+## Completion bar
+Before considering the work done, the author should be able to say:
+1. The test is in the correct place.
+2. The test matches the current repo style.
+3. The test would fail if the protected behavior regressed.
+4. The normal repository validation commands still pass.

package/src/interact/classify.ts ADDED Viewed

@@ -0,0 +1,64 @@
+export type ActionOutcome = 'success' | 'no_op' | 'backend_failure' | 'ui_failure' | 'unknown'
+export type NetworkRequestStatus = 'success' | 'failure' | 'retryable'
+export interface NetworkRequest {
+  endpoint: string
+  status: NetworkRequestStatus
+}
+export interface ClassifyActionOutcomeInput {
+  uiChanged: boolean
+  expectedElementVisible?: boolean | null
+  /** null = get_network_activity has not been called yet */
+  networkRequests?: NetworkRequest[] | null
+  hasLogErrors?: boolean | null
+}
+export interface ClassifyActionOutcomeResult {
+  outcome: ActionOutcome
+  reasoning: string
+  /** Present when the caller must call get_network_activity before a final classification is possible */
+  nextAction?: 'call_get_network_activity'
+}
+/**
+ * Pure deterministic classifier. Applies rules in fixed order.
+ * Same inputs always produce the same output.
+ */
+export function classifyActionOutcome(input: ClassifyActionOutcomeInput): ClassifyActionOutcomeResult {
+  const { uiChanged, expectedElementVisible, networkRequests, hasLogErrors } = input
+  // Step 1 — UI signal is positive
+  if (uiChanged || expectedElementVisible === true) {
+    return { outcome: 'success', reasoning: expectedElementVisible === true ? 'expected element is visible' : 'UI changed after action' }
+  }
+  // Step 2 — UI did not change; network signal is required
+  if (networkRequests === null || networkRequests === undefined) {
+    return {
+      outcome: 'unknown',
+      reasoning: 'UI did not change; get_network_activity must be called before classification can proceed',
+      nextAction: 'call_get_network_activity'
+    }
+  }
+  // Step 3 — any network failure
+  const failedRequest = networkRequests.find((r) => r.status === 'failure' || r.status === 'retryable')
+  if (failedRequest) {
+    return { outcome: 'backend_failure', reasoning: `network request ${failedRequest.endpoint} returned ${failedRequest.status}` }
+  }
+  // Step 4 — no network requests at all
+  if (networkRequests.length === 0) {
+    const logNote = hasLogErrors ? ' (log errors present)' : ''
+    return { outcome: 'no_op', reasoning: `no UI change and no network activity${logNote}` }
+  }
+  // Step 5 — network requests exist and all succeeded
+  if (networkRequests.every((r) => r.status === 'success')) {
+    return { outcome: 'ui_failure', reasoning: 'network requests succeeded but UI did not change' }
+  }
+  // Step 6 — fallback
+  return { outcome: 'unknown', reasoning: 'signals are inconclusive' }
+}