mobile-debug-mcp 0.13.0 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # Mobile Dev Tools
1
+ # Mobile Debug Tools
2
2
 
3
3
  A minimal, secure MCP server for AI-assisted mobile development. Build, install, and inspect Android/iOS apps from an MCP-compatible client.
4
4
 
@@ -33,7 +33,7 @@ I have a crash on the app, can you diagnose it, fix and validate using the mcp t
33
33
 
34
34
  ## Docs
35
35
 
36
- - Tools: [Tools](docs/TOOLS.md) — full input/response examples
36
+ - Tools: [Tools](docs/tools/TOOLS.md) — full input/response examples
37
37
  - Changelog: [Changelog](docs/CHANGELOG.md)
38
38
 
39
39
  ## License
@@ -1,5 +1,6 @@
1
1
  import { execAdb, getAndroidDeviceMetadata, getDeviceInfo } from "./utils.js";
2
2
  import { AndroidObserve } from "./observe.js";
3
+ import { scrollToElementShared } from "../tools/scroll_to_element.js";
3
4
  export class AndroidInteract {
4
5
  observe = new AndroidObserve();
5
6
  async waitForElement(text, timeout, deviceId) {
@@ -76,4 +77,15 @@ export class AndroidInteract {
76
77
  return { device: deviceInfo, success: false, error: e instanceof Error ? e.message : String(e) };
77
78
  }
78
79
  }
80
+ async scrollToElement(selector, direction = 'down', maxScrolls = 10, scrollAmount = 0.7, deviceId) {
81
+ return await scrollToElementShared({
82
+ selector,
83
+ direction,
84
+ maxScrolls,
85
+ scrollAmount,
86
+ deviceId,
87
+ fetchTree: async () => await this.observe.getUITree(deviceId),
88
+ swipe: async (x1, y1, x2, y2, duration, devId) => await this.swipe(x1, y1, x2, y2, duration, devId)
89
+ });
90
+ }
79
91
  }
@@ -1,6 +1,7 @@
1
1
  import { spawn } from "child_process";
2
2
  import { getIOSDeviceMetadata, getIdbCmd, isIDBInstalled } from "./utils.js";
3
3
  import { iOSObserve } from "./observe.js";
4
+ import { scrollToElementShared } from "../tools/scroll_to_element.js";
4
5
  export class iOSInteract {
5
6
  observe = new iOSObserve();
6
7
  async waitForElement(text, timeout, deviceId = "booted") {
@@ -66,4 +67,54 @@ export class iOSInteract {
66
67
  return { device, success: false, x, y, error: e instanceof Error ? e.message : String(e) };
67
68
  }
68
69
  }
70
+ async swipe(x1, y1, x2, y2, duration, deviceId = "booted") {
71
+ const device = await getIOSDeviceMetadata(deviceId);
72
+ // Use shared helper to detect idb
73
+ const idbExists = await isIDBInstalled();
74
+ if (!idbExists) {
75
+ return {
76
+ device,
77
+ success: false,
78
+ start: [x1, y1],
79
+ end: [x2, y2],
80
+ duration,
81
+ error: "iOS swipe requires 'idb' (iOS Device Bridge)."
82
+ };
83
+ }
84
+ try {
85
+ const targetUdid = (device.id && device.id !== 'booted') ? device.id : undefined;
86
+ // idb 'ui swipe' does not accept a duration parameter; use coordinates only
87
+ const args = ['ui', 'swipe', x1.toString(), y1.toString(), x2.toString(), y2.toString()];
88
+ if (targetUdid) {
89
+ args.push('--udid', targetUdid);
90
+ }
91
+ await new Promise((resolve, reject) => {
92
+ const proc = spawn(getIdbCmd(), args);
93
+ let stderr = '';
94
+ proc.stderr.on('data', d => stderr += d.toString());
95
+ proc.on('close', code => {
96
+ if (code === 0)
97
+ resolve();
98
+ else
99
+ reject(new Error(`idb ui swipe failed: ${stderr}`));
100
+ });
101
+ proc.on('error', err => reject(err));
102
+ });
103
+ return { device, success: true, start: [x1, y1], end: [x2, y2], duration };
104
+ }
105
+ catch (e) {
106
+ return { device, success: false, start: [x1, y1], end: [x2, y2], duration, error: e instanceof Error ? e.message : String(e) };
107
+ }
108
+ }
109
+ async scrollToElement(selector, direction = 'down', maxScrolls = 10, scrollAmount = 0.7, deviceId = 'booted') {
110
+ return await scrollToElementShared({
111
+ selector,
112
+ direction,
113
+ maxScrolls,
114
+ scrollAmount,
115
+ deviceId,
116
+ fetchTree: async () => await this.observe.getUITree(deviceId),
117
+ swipe: async (x1, y1, x2, y2, duration, devId) => await this.swipe(x1, y1, x2, y2, duration, devId)
118
+ });
119
+ }
69
120
  }
package/dist/server.js CHANGED
@@ -325,8 +325,8 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
325
325
  properties: {
326
326
  platform: {
327
327
  type: "string",
328
- enum: ["android"],
329
- description: "Platform to swipe on (currently only android supported)"
328
+ enum: ["android", "ios"],
329
+ description: "Platform to swipe on (android or ios)"
330
330
  },
331
331
  x1: { type: "number", description: "Start X coordinate" },
332
332
  y1: { type: "number", description: "Start Y coordinate" },
@@ -341,6 +341,30 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
341
341
  required: ["x1", "y1", "x2", "y2", "duration"]
342
342
  }
343
343
  },
344
+ {
345
+ name: "scroll_to_element",
346
+ description: "Scroll the current screen until a target UI element becomes visible, then return its details.",
347
+ inputSchema: {
348
+ type: "object",
349
+ properties: {
350
+ platform: { type: "string", enum: ["android", "ios"], description: "Platform to operate on (required)" },
351
+ selector: {
352
+ type: "object",
353
+ properties: {
354
+ text: { type: "string" },
355
+ resourceId: { type: "string" },
356
+ contentDesc: { type: "string" },
357
+ className: { type: "string" }
358
+ }
359
+ },
360
+ direction: { type: "string", enum: ["down", "up"], default: "down" },
361
+ maxScrolls: { type: "number", default: 10 },
362
+ scrollAmount: { type: "number", default: 0.7 },
363
+ deviceId: { type: "string", description: "Device UDID (iOS) or Serial (Android). Defaults to booted/connected." }
364
+ },
365
+ required: ["platform", "selector"]
366
+ }
367
+ },
344
368
  {
345
369
  name: "type_text",
346
370
  description: "Type text into the currently focused input field on an Android device.",
@@ -527,8 +551,13 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
527
551
  return wrapResponse(res);
528
552
  }
529
553
  if (name === "swipe") {
530
- const { x1, y1, x2, y2, duration, deviceId } = (args || {});
531
- const res = await ToolsInteract.swipeHandler({ x1, y1, x2, y2, duration, deviceId });
554
+ const { platform = 'android', x1, y1, x2, y2, duration, deviceId } = (args || {});
555
+ const res = await ToolsInteract.swipeHandler({ platform, x1, y1, x2, y2, duration, deviceId });
556
+ return wrapResponse(res);
557
+ }
558
+ if (name === "scroll_to_element") {
559
+ const { platform, selector, direction, maxScrolls, scrollAmount, deviceId } = (args || {});
560
+ const res = await ToolsInteract.scrollToElementHandler({ platform, selector, direction, maxScrolls, scrollAmount, deviceId });
532
561
  return wrapResponse(res);
533
562
  }
534
563
  if (name === "type_text") {
@@ -2,31 +2,24 @@ import { resolveTargetDevice } from '../utils/resolve-device.js';
2
2
  import { AndroidInteract } from '../android/interact.js';
3
3
  import { iOSInteract } from '../ios/interact.js';
4
4
  export class ToolsInteract {
5
+ static async getInteractionService(platform, deviceId) {
6
+ const effectivePlatform = platform || 'android';
7
+ const resolved = await resolveTargetDevice({ platform: effectivePlatform, deviceId });
8
+ const interact = effectivePlatform === 'android' ? new AndroidInteract() : new iOSInteract();
9
+ return { interact: interact, resolved, platform: effectivePlatform };
10
+ }
5
11
  static async waitForElementHandler({ platform, text, timeout, deviceId }) {
6
12
  const effectiveTimeout = timeout ?? 10000;
7
- if (platform === 'android') {
8
- const resolved = await resolveTargetDevice({ platform: 'android', deviceId });
9
- return await new AndroidInteract().waitForElement(text, effectiveTimeout, resolved.id);
10
- }
11
- else {
12
- const resolved = await resolveTargetDevice({ platform: 'ios', deviceId });
13
- return await new iOSInteract().waitForElement(text, effectiveTimeout, resolved.id);
14
- }
13
+ const { interact, resolved } = await ToolsInteract.getInteractionService(platform, deviceId);
14
+ return await interact.waitForElement(text, effectiveTimeout, resolved.id);
15
15
  }
16
16
  static async tapHandler({ platform, x, y, deviceId }) {
17
- const effectivePlatform = platform || 'android';
18
- if (effectivePlatform === 'android') {
19
- const resolved = await resolveTargetDevice({ platform: 'android', deviceId });
20
- return await new AndroidInteract().tap(x, y, resolved.id);
21
- }
22
- else {
23
- const resolved = await resolveTargetDevice({ platform: 'ios', deviceId });
24
- return await new iOSInteract().tap(x, y, resolved.id);
25
- }
17
+ const { interact, resolved } = await ToolsInteract.getInteractionService(platform, deviceId);
18
+ return await interact.tap(x, y, resolved.id);
26
19
  }
27
- static async swipeHandler({ x1, y1, x2, y2, duration, deviceId }) {
28
- const resolved = await resolveTargetDevice({ platform: 'android', deviceId });
29
- return await new AndroidInteract().swipe(x1, y1, x2, y2, duration, resolved.id);
20
+ static async swipeHandler({ platform = 'android', x1, y1, x2, y2, duration, deviceId }) {
21
+ const { interact, resolved } = await ToolsInteract.getInteractionService(platform, deviceId);
22
+ return await interact.swipe(x1, y1, x2, y2, duration, resolved.id);
30
23
  }
31
24
  static async typeTextHandler({ text, deviceId }) {
32
25
  const resolved = await resolveTargetDevice({ platform: 'android', deviceId });
@@ -36,4 +29,8 @@ export class ToolsInteract {
36
29
  const resolved = await resolveTargetDevice({ platform: 'android', deviceId });
37
30
  return await new AndroidInteract().pressBack(resolved.id);
38
31
  }
32
+ static async scrollToElementHandler({ platform, selector, direction = 'down', maxScrolls = 10, scrollAmount = 0.7, deviceId }) {
33
+ const { interact, resolved } = await ToolsInteract.getInteractionService(platform, deviceId);
34
+ return await interact.scrollToElement(selector, direction, maxScrolls, scrollAmount, resolved.id);
35
+ }
39
36
  }
@@ -0,0 +1,98 @@
1
+ export async function scrollToElementShared(opts) {
2
+ const { selector, direction = 'down', maxScrolls = 10, scrollAmount = 0.7, deviceId, fetchTree, swipe, stabilizationDelayMs = 350 } = opts;
3
+ const matchElement = (el) => {
4
+ if (!el)
5
+ return false;
6
+ if (selector.text !== undefined && selector.text !== el.text)
7
+ return false;
8
+ if (selector.resourceId !== undefined && selector.resourceId !== el.resourceId)
9
+ return false;
10
+ if (selector.contentDesc !== undefined && selector.contentDesc !== el.contentDescription)
11
+ return false;
12
+ if (selector.className !== undefined && selector.className !== el.type)
13
+ return false;
14
+ return true;
15
+ };
16
+ const isVisible = (el, resolution) => {
17
+ if (!el)
18
+ return false;
19
+ if (el.visible === false)
20
+ return false;
21
+ if (!el.bounds || !resolution || !resolution.width || !resolution.height)
22
+ return (el.visible === undefined ? true : !!el.visible);
23
+ const [left, top, right, bottom] = el.bounds;
24
+ const withinY = bottom > 0 && top < resolution.height;
25
+ const withinX = right > 0 && left < resolution.width;
26
+ return withinX && withinY;
27
+ };
28
+ const findVisibleMatch = (elements, resolution) => {
29
+ if (!Array.isArray(elements))
30
+ return null;
31
+ for (const e of elements) {
32
+ if (matchElement(e) && isVisible(e, resolution))
33
+ return e;
34
+ }
35
+ return null;
36
+ };
37
+ // Initial check
38
+ let tree = await fetchTree();
39
+ if (tree.error)
40
+ return { success: false, reason: tree.error, scrollsPerformed: 0 };
41
+ let found = findVisibleMatch(tree.elements, tree.resolution);
42
+ if (found) {
43
+ return { success: true, element: { text: found.text, resourceId: found.resourceId, bounds: found.bounds }, scrollsPerformed: 0 };
44
+ }
45
+ const fingerprintOf = (t) => {
46
+ try {
47
+ return JSON.stringify((t.elements || []).map((e) => ({ text: e.text, resourceId: e.resourceId, bounds: e.bounds })));
48
+ }
49
+ catch {
50
+ return '';
51
+ }
52
+ };
53
+ let prevFingerprint = fingerprintOf(tree);
54
+ const width = (tree.resolution && tree.resolution.width) ? tree.resolution.width : 0;
55
+ const height = (tree.resolution && tree.resolution.height) ? tree.resolution.height : 0;
56
+ const centerX = Math.round(width / 2) || 50;
57
+ const clampPct = (v) => Math.max(0.05, Math.min(0.95, v));
58
+ const computeCoords = () => {
59
+ const defaultStart = direction === 'down' ? 0.8 : 0.2;
60
+ const startPct = clampPct(defaultStart);
61
+ const endPct = clampPct(defaultStart + (direction === 'down' ? -scrollAmount : scrollAmount));
62
+ const x1 = centerX;
63
+ const x2 = centerX;
64
+ const y1 = Math.round((height || 100) * startPct);
65
+ const y2 = Math.round((height || 100) * endPct);
66
+ return { x1, y1, x2, y2 };
67
+ };
68
+ const duration = 300;
69
+ let scrollsPerformed = 0;
70
+ for (let i = 0; i < maxScrolls; i++) {
71
+ const { x1, y1, x2, y2 } = computeCoords();
72
+ try {
73
+ await swipe(x1, y1, x2, y2, duration, deviceId);
74
+ }
75
+ catch (e) {
76
+ // Log swipe failures to aid debugging but don't fail the overall flow
77
+ try {
78
+ console.warn(`scrollToElement swipe failed: ${e instanceof Error ? e.message : String(e)}`);
79
+ }
80
+ catch { }
81
+ }
82
+ scrollsPerformed++;
83
+ await new Promise(resolve => setTimeout(resolve, stabilizationDelayMs));
84
+ tree = await fetchTree();
85
+ if (tree.error)
86
+ return { success: false, reason: tree.error, scrollsPerformed: scrollsPerformed };
87
+ found = findVisibleMatch(tree.elements, tree.resolution);
88
+ if (found) {
89
+ return { success: true, element: { text: found.text, resourceId: found.resourceId, bounds: found.bounds }, scrollsPerformed };
90
+ }
91
+ const fp = fingerprintOf(tree);
92
+ if (fp === prevFingerprint) {
93
+ return { success: false, reason: 'UI unchanged after scroll; likely end of list', scrollsPerformed: scrollsPerformed };
94
+ }
95
+ prevFingerprint = fp;
96
+ }
97
+ return { success: false, reason: 'Element not found after scrolling', scrollsPerformed: scrollsPerformed };
98
+ }
package/docs/CHANGELOG.md CHANGED
@@ -2,6 +2,13 @@
2
2
 
3
3
  All notable changes to the **Mobile Debug MCP** project will be documented in this file.
4
4
 
5
+ ## [0.14.0]
6
+ - Added `scroll_to_element` tool: platform-aware helper that scrolls until a UI element matching a selector is visible. Supports Android and iOS with configurable options: direction, maxScrolls, and scrollAmount. Includes unit tests and device runners under `test/device/` for manual E2E validation.
7
+ - Moved scroll logic into platform-specific implementations (`src/android/interact.ts`, `src/ios/interact.ts`) and delegated from `src/tools/interact.ts` to centralise platform behaviour.
8
+ - Fixed iOS `idb` swipe arguments and improved visibility detection by using element bounds and device resolution to avoid treating off-screen elements as visible.
9
+ - Consolidated unit tests for `scroll_to_element` into `test/unit/observe/scroll_to_element.test.ts`, and removed older duplicate test files.
10
+
11
+
5
12
  ## [0.13.0]
6
13
  - Fixed a crash in the `start_app` tool by adding validation to ensure `appId` and `platform` are provided.
7
14
 
@@ -4,8 +4,8 @@ This repository groups tool docs into three areas aligned with the codebase: man
4
4
 
5
5
  See:
6
6
 
7
- - docs/manage.md — build, install and device management tools
8
- - docs/observe.md — logs, screenshots and UI inspection tools
9
- - docs/interact.md — UI interaction tools (tap, swipe, type, wait)
7
+ - [mange](manage.md) — build, install and device management tools
8
+ - [observe](observe.md) — logs, screenshots and UI inspection tools
9
+ - [interact](interact.md) — UI interaction tools (tap, swipe, type, wait)
10
10
 
11
11
  For per-tool deep dives, open the linked files above.
@@ -41,3 +41,34 @@ Notes:
41
41
  - swipe: `adb shell input swipe x1 y1 x2 y2 duration`.
42
42
  - type_text: `adb shell input text` (spaces encoded as %s) — may fail for special characters.
43
43
  - press_back: `adb shell input keyevent 4`.
44
+
45
+ ---
46
+
47
+ ## scroll_to_element
48
+
49
+ Description:
50
+ - Scrolls the UI until an element matching the provided selector becomes visible, or until a maximum number of scroll attempts is reached.
51
+ - Delegates platform behaviour to Android and iOS implementations for reliable swipes and UI-tree checks.
52
+
53
+ Input example:
54
+ ```
55
+ { "platform": "android", "selector": { "text": "Offscreen Test Element" }, "direction": "down", "maxScrolls": 10, "scrollAmount": 0.7, "deviceId": "emulator-5554" }
56
+ ```
57
+
58
+ Response example (found):
59
+ ```
60
+ { "success": true, "reason": "element_found", "element": { /* element metadata */ }, "scrollsPerformed": 2 }
61
+ ```
62
+
63
+ Response example (failure - unchanged UI):
64
+ ```
65
+ { "success": false, "reason": "ui_unchanged_after_scroll", "scrollsPerformed": 3 }
66
+ ```
67
+
68
+ Notes:
69
+ - Matching is exact on provided selector fields (text, resourceId, contentDesc, className).
70
+ - Visibility check uses element.bounds intersecting the device resolution when available; falls back to the element.visible flag if bounds/resolution are missing.
71
+ - The tool fingerprints the visible UI between scrolls; if the fingerprint doesn't change after a swipe the tool stops early assuming end-of-list.
72
+ - Android swipe uses `adb shell input swipe` with screen percentage coordinates. iOS swipe uses `idb ui swipe` command; note `idb` swipe does not accept a duration argument.
73
+ - Unit tests are located at `test/unit/observe/scroll_to_element.test.ts` and device runners at `test/device/observe/`.
74
+
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mobile-debug-mcp",
3
- "version": "0.13.0",
3
+ "version": "0.14.0",
4
4
  "description": "MCP server for mobile app debugging (Android + iOS), with focus on security and reliability",
5
5
  "type": "module",
6
6
  "bin": {
@@ -1,6 +1,7 @@
1
1
  import { WaitForElementResponse, TapResponse, SwipeResponse, TypeTextResponse, PressBackResponse } from "../types.js"
2
2
  import { execAdb, getAndroidDeviceMetadata, getDeviceInfo } from "./utils.js"
3
3
  import { AndroidObserve } from "./observe.js"
4
+ import { scrollToElementShared } from "../tools/scroll_to_element.js"
4
5
 
5
6
 
6
7
  export class AndroidInteract {
@@ -88,4 +89,16 @@ export class AndroidInteract {
88
89
  }
89
90
  }
90
91
 
92
+ async scrollToElement(selector: { text?: string, resourceId?: string, contentDesc?: string, className?: string }, direction: 'down' | 'up' = 'down', maxScrolls = 10, scrollAmount = 0.7, deviceId?: string) {
93
+ return await scrollToElementShared({
94
+ selector,
95
+ direction,
96
+ maxScrolls,
97
+ scrollAmount,
98
+ deviceId,
99
+ fetchTree: async () => await this.observe.getUITree(deviceId),
100
+ swipe: async (x1: number, y1: number, x2: number, y2: number, duration: number, devId?: string) => await this.swipe(x1, y1, x2, y2, duration, devId)
101
+ })
102
+ }
103
+
91
104
  }
@@ -1,7 +1,8 @@
1
1
  import { spawn } from "child_process"
2
- import { WaitForElementResponse, TapResponse } from "../types.js"
2
+ import { WaitForElementResponse, TapResponse, SwipeResponse } from "../types.js"
3
3
  import { getIOSDeviceMetadata, getIdbCmd, isIDBInstalled } from "./utils.js"
4
4
  import { iOSObserve } from "./observe.js"
5
+ import { scrollToElementShared } from "../tools/scroll_to_element.js"
5
6
 
6
7
  export class iOSInteract {
7
8
  private observe = new iOSObserve();
@@ -75,4 +76,58 @@ export class iOSInteract {
75
76
  return { device, success: false, x, y, error: e instanceof Error ? e.message : String(e) };
76
77
  }
77
78
  }
79
+
80
+ async swipe(x1: number, y1: number, x2: number, y2: number, duration: number, deviceId: string = "booted"): Promise<SwipeResponse> {
81
+ const device = await getIOSDeviceMetadata(deviceId);
82
+ // Use shared helper to detect idb
83
+ const idbExists = await isIDBInstalled();
84
+
85
+ if (!idbExists) {
86
+ return {
87
+ device,
88
+ success: false,
89
+ start: [x1, y1],
90
+ end: [x2, y2],
91
+ duration,
92
+ error: "iOS swipe requires 'idb' (iOS Device Bridge)."
93
+ }
94
+ }
95
+
96
+ try {
97
+ const targetUdid = (device.id && device.id !== 'booted') ? device.id : undefined;
98
+ // idb 'ui swipe' does not accept a duration parameter; use coordinates only
99
+ const args: string[] = ['ui', 'swipe', x1.toString(), y1.toString(), x2.toString(), y2.toString()];
100
+ if (targetUdid) {
101
+ args.push('--udid', targetUdid);
102
+ }
103
+
104
+ await new Promise<void>((resolve, reject) => {
105
+ const proc = spawn(getIdbCmd(), args);
106
+ let stderr = '';
107
+ proc.stderr.on('data', d => stderr += d.toString());
108
+ proc.on('close', code => {
109
+ if (code === 0) resolve();
110
+ else reject(new Error(`idb ui swipe failed: ${stderr}`));
111
+ });
112
+ proc.on('error', err => reject(err));
113
+ });
114
+
115
+ return { device, success: true, start: [x1, y1], end: [x2, y2], duration };
116
+ } catch (e) {
117
+ return { device, success: false, start: [x1, y1], end: [x2, y2], duration, error: e instanceof Error ? e.message : String(e) };
118
+ }
119
+ }
120
+
121
+ async scrollToElement(selector: { text?: string, resourceId?: string, contentDesc?: string, className?: string }, direction: 'down' | 'up' = 'down', maxScrolls = 10, scrollAmount = 0.7, deviceId: string = 'booted') {
122
+ return await scrollToElementShared({
123
+ selector,
124
+ direction,
125
+ maxScrolls,
126
+ scrollAmount,
127
+ deviceId,
128
+ fetchTree: async () => await this.observe.getUITree(deviceId),
129
+ swipe: async (x1: number, y1: number, x2: number, y2: number, duration: number, devId?: string) => await this.swipe(x1, y1, x2, y2, duration, devId)
130
+ })
131
+ }
78
132
  }
133
+
package/src/server.ts CHANGED
@@ -346,8 +346,8 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
346
346
  properties: {
347
347
  platform: {
348
348
  type: "string",
349
- enum: ["android"],
350
- description: "Platform to swipe on (currently only android supported)"
349
+ enum: ["android","ios"],
350
+ description: "Platform to swipe on (android or ios)"
351
351
  },
352
352
  x1: { type: "number", description: "Start X coordinate" },
353
353
  y1: { type: "number", description: "Start Y coordinate" },
@@ -362,6 +362,30 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
362
362
  required: ["x1", "y1", "x2", "y2", "duration"]
363
363
  }
364
364
  },
365
+ {
366
+ name: "scroll_to_element",
367
+ description: "Scroll the current screen until a target UI element becomes visible, then return its details.",
368
+ inputSchema: {
369
+ type: "object",
370
+ properties: {
371
+ platform: { type: "string", enum: ["android", "ios"], description: "Platform to operate on (required)" },
372
+ selector: {
373
+ type: "object",
374
+ properties: {
375
+ text: { type: "string" },
376
+ resourceId: { type: "string" },
377
+ contentDesc: { type: "string" },
378
+ className: { type: "string" }
379
+ }
380
+ },
381
+ direction: { type: "string", enum: ["down", "up"], default: "down" },
382
+ maxScrolls: { type: "number", default: 10 },
383
+ scrollAmount: { type: "number", default: 0.7 },
384
+ deviceId: { type: "string", description: "Device UDID (iOS) or Serial (Android). Defaults to booted/connected." }
385
+ },
386
+ required: ["platform", "selector"]
387
+ }
388
+ },
365
389
  {
366
390
  name: "type_text",
367
391
  description: "Type text into the currently focused input field on an Android device.",
@@ -568,8 +592,14 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
568
592
  }
569
593
 
570
594
  if (name === "swipe") {
571
- const { x1, y1, x2, y2, duration, deviceId } = (args || {}) as any
572
- const res = await ToolsInteract.swipeHandler({ x1, y1, x2, y2, duration, deviceId })
595
+ const { platform = 'android', x1, y1, x2, y2, duration, deviceId } = (args || {}) as any
596
+ const res = await ToolsInteract.swipeHandler({ platform, x1, y1, x2, y2, duration, deviceId })
597
+ return wrapResponse(res)
598
+ }
599
+
600
+ if (name === "scroll_to_element") {
601
+ const { platform, selector, direction, maxScrolls, scrollAmount, deviceId } = (args || {}) as any
602
+ const res = await ToolsInteract.scrollToElementHandler({ platform, selector, direction, maxScrolls, scrollAmount, deviceId })
573
603
  return wrapResponse(res)
574
604
  }
575
605
 
@@ -4,31 +4,27 @@ import { iOSInteract } from '../ios/interact.js'
4
4
 
5
5
  export class ToolsInteract {
6
6
 
7
+ private static async getInteractionService(platform?: 'android' | 'ios', deviceId?: string) {
8
+ const effectivePlatform = platform || 'android'
9
+ const resolved = await resolveTargetDevice({ platform: effectivePlatform as 'android' | 'ios', deviceId })
10
+ const interact = effectivePlatform === 'android' ? new AndroidInteract() : new iOSInteract()
11
+ return { interact: interact as any, resolved, platform: effectivePlatform }
12
+ }
13
+
7
14
  static async waitForElementHandler({ platform, text, timeout, deviceId }: { platform: 'android' | 'ios', text: string, timeout?: number, deviceId?: string }) {
8
15
  const effectiveTimeout = timeout ?? 10000
9
- if (platform === 'android') {
10
- const resolved = await resolveTargetDevice({ platform: 'android', deviceId })
11
- return await new AndroidInteract().waitForElement(text, effectiveTimeout, resolved.id)
12
- } else {
13
- const resolved = await resolveTargetDevice({ platform: 'ios', deviceId })
14
- return await new iOSInteract().waitForElement(text, effectiveTimeout, resolved.id)
15
- }
16
+ const { interact, resolved } = await ToolsInteract.getInteractionService(platform, deviceId)
17
+ return await interact.waitForElement(text, effectiveTimeout, resolved.id)
16
18
  }
17
19
 
18
20
  static async tapHandler({ platform, x, y, deviceId }: { platform?: 'android' | 'ios', x: number, y: number, deviceId?: string }) {
19
- const effectivePlatform = platform || 'android'
20
- if (effectivePlatform === 'android') {
21
- const resolved = await resolveTargetDevice({ platform: 'android', deviceId })
22
- return await new AndroidInteract().tap(x, y, resolved.id)
23
- } else {
24
- const resolved = await resolveTargetDevice({ platform: 'ios', deviceId })
25
- return await new iOSInteract().tap(x, y, resolved.id)
26
- }
21
+ const { interact, resolved } = await ToolsInteract.getInteractionService(platform, deviceId)
22
+ return await interact.tap(x, y, resolved.id)
27
23
  }
28
24
 
29
- static async swipeHandler({ x1, y1, x2, y2, duration, deviceId }: { x1: number, y1: number, x2: number, y2: number, duration: number, deviceId?: string }) {
30
- const resolved = await resolveTargetDevice({ platform: 'android', deviceId })
31
- return await new AndroidInteract().swipe(x1, y1, x2, y2, duration, resolved.id)
25
+ static async swipeHandler({ platform = 'android', x1, y1, x2, y2, duration, deviceId }: { platform?: 'android' | 'ios', x1: number, y1: number, x2: number, y2: number, duration: number, deviceId?: string }) {
26
+ const { interact, resolved } = await ToolsInteract.getInteractionService(platform, deviceId)
27
+ return await interact.swipe(x1, y1, x2, y2, duration, resolved.id)
32
28
  }
33
29
 
34
30
  static async typeTextHandler({ text, deviceId }: { text: string, deviceId?: string }) {
@@ -41,5 +37,10 @@ export class ToolsInteract {
41
37
  return await new AndroidInteract().pressBack(resolved.id)
42
38
  }
43
39
 
40
+ static async scrollToElementHandler({ platform, selector, direction = 'down', maxScrolls = 10, scrollAmount = 0.7, deviceId }: { platform: 'android' | 'ios', selector: { text?: string, resourceId?: string, contentDesc?: string, className?: string }, direction?: 'down' | 'up', maxScrolls?: number, scrollAmount?: number, deviceId?: string }) {
41
+ const { interact, resolved } = await ToolsInteract.getInteractionService(platform, deviceId)
42
+ return await interact.scrollToElement(selector, direction, maxScrolls, scrollAmount, resolved.id)
43
+ }
44
+
44
45
  }
45
46
 
@@ -0,0 +1,110 @@
1
+ import { UIElement, GetUITreeResponse, SwipeResponse } from '../types.js'
2
+
3
+ export interface ScrollSelector { text?: string; resourceId?: string; contentDesc?: string; className?: string }
4
+
5
+ export async function scrollToElementShared(opts: {
6
+ selector: ScrollSelector,
7
+ direction?: 'down' | 'up',
8
+ maxScrolls?: number,
9
+ scrollAmount?: number,
10
+ deviceId?: string,
11
+ fetchTree: () => Promise<GetUITreeResponse>,
12
+ swipe: (x1: number, y1: number, x2: number, y2: number, duration: number, deviceId?: string) => Promise<SwipeResponse>,
13
+ stabilizationDelayMs?: number
14
+ }): Promise<{ success: boolean; reason?: string; element?: Partial<UIElement>; scrollsPerformed: number }> {
15
+ const { selector, direction = 'down', maxScrolls = 10, scrollAmount = 0.7, deviceId, fetchTree, swipe, stabilizationDelayMs = 350 } = opts
16
+
17
+ const matchElement = (el?: UIElement) => {
18
+ if (!el) return false
19
+ if (selector.text !== undefined && selector.text !== el.text) return false
20
+ if (selector.resourceId !== undefined && selector.resourceId !== el.resourceId) return false
21
+ if (selector.contentDesc !== undefined && selector.contentDesc !== el.contentDescription) return false
22
+ if (selector.className !== undefined && selector.className !== el.type) return false
23
+ return true
24
+ }
25
+
26
+ const isVisible = (el?: UIElement, resolution?: GetUITreeResponse['resolution']) => {
27
+ if (!el) return false
28
+ if (el.visible === false) return false
29
+ if (!el.bounds || !resolution || !resolution.width || !resolution.height) return (el.visible === undefined ? true : !!el.visible)
30
+ const [left, top, right, bottom] = el.bounds
31
+ const withinY = bottom > 0 && top < resolution.height
32
+ const withinX = right > 0 && left < resolution.width
33
+ return withinX && withinY
34
+ }
35
+
36
+ const findVisibleMatch = (elements?: UIElement[], resolution?: GetUITreeResponse['resolution']) => {
37
+ if (!Array.isArray(elements)) return null
38
+ for (const e of elements) {
39
+ if (matchElement(e) && isVisible(e, resolution)) return e
40
+ }
41
+ return null
42
+ }
43
+
44
+ // Initial check
45
+ let tree = await fetchTree()
46
+ if (tree.error) return { success: false, reason: tree.error, scrollsPerformed: 0 }
47
+
48
+ let found = findVisibleMatch(tree.elements, tree.resolution)
49
+ if (found) {
50
+ return { success: true, element: { text: found.text, resourceId: found.resourceId, bounds: found.bounds }, scrollsPerformed: 0 }
51
+ }
52
+
53
+ const fingerprintOf = (t: GetUITreeResponse) => {
54
+ try {
55
+ return JSON.stringify((t.elements || []).map((e: UIElement) => ({ text: e.text, resourceId: e.resourceId, bounds: e.bounds })))
56
+ } catch {
57
+ return ''
58
+ }
59
+ }
60
+
61
+ let prevFingerprint = fingerprintOf(tree)
62
+
63
+ const width = (tree.resolution && tree.resolution.width) ? tree.resolution.width : 0
64
+ const height = (tree.resolution && tree.resolution.height) ? tree.resolution.height : 0
65
+ const centerX = Math.round(width / 2) || 50
66
+
67
+ const clampPct = (v: number) => Math.max(0.05, Math.min(0.95, v))
68
+ const computeCoords = () => {
69
+ const defaultStart = direction === 'down' ? 0.8 : 0.2
70
+ const startPct = clampPct(defaultStart)
71
+ const endPct = clampPct(defaultStart + (direction === 'down' ? -scrollAmount : scrollAmount))
72
+ const x1 = centerX
73
+ const x2 = centerX
74
+ const y1 = Math.round((height || 100) * startPct)
75
+ const y2 = Math.round((height || 100) * endPct)
76
+ return { x1, y1, x2, y2 }
77
+ }
78
+
79
+ const duration = 300
80
+ let scrollsPerformed = 0
81
+
82
+ for (let i = 0; i < maxScrolls; i++) {
83
+ const { x1, y1, x2, y2 } = computeCoords()
84
+ try {
85
+ await swipe(x1, y1, x2, y2, duration, deviceId)
86
+ } catch (e) {
87
+ // Log swipe failures to aid debugging but don't fail the overall flow
88
+ try { console.warn(`scrollToElement swipe failed: ${e instanceof Error ? e.message : String(e)}`) } catch {}
89
+ }
90
+
91
+ scrollsPerformed++
92
+ await new Promise(resolve => setTimeout(resolve, stabilizationDelayMs))
93
+
94
+ tree = await fetchTree()
95
+ if (tree.error) return { success: false, reason: tree.error, scrollsPerformed: scrollsPerformed }
96
+
97
+ found = findVisibleMatch(tree.elements, tree.resolution)
98
+ if (found) {
99
+ return { success: true, element: { text: found.text, resourceId: found.resourceId, bounds: found.bounds }, scrollsPerformed }
100
+ }
101
+
102
+ const fp = fingerprintOf(tree)
103
+ if (fp === prevFingerprint) {
104
+ return { success: false, reason: 'UI unchanged after scroll; likely end of list', scrollsPerformed: scrollsPerformed }
105
+ }
106
+ prevFingerprint = fp
107
+ }
108
+
109
+ return { success: false, reason: 'Element not found after scrolling', scrollsPerformed: scrollsPerformed }
110
+ }
@@ -0,0 +1,24 @@
1
+ #!/usr/bin/env node
2
+ import { AndroidInteract } from '../../../dist/android/interact.js'
3
+
4
+
5
+ // Usage: tsx test/device/observe/run-scroll-test-android.ts <deviceId> <appId> <selectorText>
6
+ const args = process.argv.slice(2)
7
+ const DEVICE_ID = args[0] || process.env.DEVICE_ID || 'emulator-5554'
8
+ const SELECTOR = args[2] || process.env.SELECTOR || 'Generate Session'
9
+
10
+ async function main() {
11
+ console.log('Starting app if not running...')
12
+ // Best-effort tap to wake device/emulator
13
+ try { const tmp = new AndroidInteract(); await tmp.tap(10,10, DEVICE_ID).catch(()=>{}) } catch {}
14
+ await new Promise(r => setTimeout(r, 1000))
15
+
16
+ console.log('Running scroll_to_element for selector:', SELECTOR)
17
+ // Use ToolsInteract from dist to call the handler
18
+ const ToolsInteract = (await import('../../../dist/tools/interact.js')).ToolsInteract
19
+
20
+ const res = await (ToolsInteract as any).scrollToElementHandler({ platform: 'android', selector: { text: SELECTOR }, direction: 'down', maxScrolls: 10, scrollAmount: 0.7, deviceId: DEVICE_ID })
21
+ console.log('Result:', JSON.stringify(res, null, 2))
22
+ }
23
+
24
+ main().catch(console.error)
@@ -0,0 +1,129 @@
1
+ import { ToolsInteract } from '../../../src/tools/interact.js'
2
+ import { ToolsObserve } from '../../../src/tools/observe.js'
3
+
4
+ const origGet = (ToolsObserve as any).getUITreeHandler
5
+ const origSwipe = (ToolsInteract as any).swipeHandler
6
+
7
+ async function runTests() {
8
+ // Use a stable logger to avoid test harness replacing console.log between calls
9
+ console.log = (...args: any[]) => { try { process.stdout.write(args.map(a => (typeof a === 'string' ? a : JSON.stringify(a))).join(' ') + '\n') } catch {} }
10
+ console.log('Starting tests for scroll_to_element...')
11
+
12
+ // Test 1: Element found immediately
13
+ console.log('\nTest 1: Element found immediately')
14
+ (ToolsObserve as any).getUITreeHandler = async () => ({
15
+ device: { platform: 'android', id: 'mock', osVersion: '12', model: 'Pixel', simulator: true },
16
+ screen: '',
17
+ resolution: { width: 1080, height: 1920 },
18
+ elements: [{
19
+ text: 'Target',
20
+ type: 'Button',
21
+ contentDescription: null,
22
+ clickable: true,
23
+ enabled: true,
24
+ visible: true,
25
+ bounds: [0, 0, 100, 100],
26
+ resourceId: null
27
+ }]
28
+ })
29
+
30
+ const res1 = await ToolsInteract.scrollToElementHandler({ platform: 'android', selector: { text: 'Target' }, direction: 'down', maxScrolls: 5, scrollAmount: 0.7 })
31
+ console.log('Result:', res1.success === true ? 'PASS' : 'FAIL')
32
+ console.log('scrollsPerformed:', (res1 as any).scrollsPerformed)
33
+
34
+ // Test 2: Element found after scrolling
35
+ console.log('\nTest 2: Element found after scrolling')
36
+ let calls = 0
37
+ (ToolsObserve as any).getUITreeHandler = async () => {
38
+ calls++
39
+ if (calls < 3) {
40
+ return {
41
+ device: { platform: 'android', id: 'mock', osVersion: '12', model: 'Pixel', simulator: true },
42
+ screen: '',
43
+ resolution: { width: 1080, height: 1920 },
44
+ elements: []
45
+ }
46
+ }
47
+ return {
48
+ device: { platform: 'android', id: 'mock', osVersion: '12', model: 'Pixel', simulator: true },
49
+ screen: '',
50
+ resolution: { width: 1080, height: 1920 },
51
+ elements: [{
52
+ text: 'Target',
53
+ type: 'Button',
54
+ contentDescription: null,
55
+ clickable: true,
56
+ enabled: true,
57
+ visible: true,
58
+ bounds: [0, 0, 100, 100],
59
+ resourceId: null
60
+ }]
61
+ }
62
+ }
63
+
64
+ // Stub swipe so it doesn't try to call adb/idb
65
+ (ToolsInteract as any).swipeHandler = async () => ({ success: true })
66
+
67
+ const res2 = await ToolsInteract.scrollToElementHandler({ platform: 'android', selector: { text: 'Target' }, direction: 'down', maxScrolls: 5, scrollAmount: 0.7 })
68
+ console.log('Result:', res2.success === true ? 'PASS' : 'FAIL')
69
+ console.log('calls:', calls, calls >= 3 ? 'PASS' : 'FAIL')
70
+
71
+ // Test 3: UI unchanged stops early
72
+ console.log('\nTest 3: UI unchanged stops early')
73
+ (ToolsObserve as any).getUITreeHandler = async () => ({
74
+ device: { platform: 'android', id: 'mock', osVersion: '12', model: 'Pixel', simulator: true },
75
+ screen: '',
76
+ resolution: { width: 1080, height: 1920 },
77
+ elements: []
78
+ })
79
+
80
+ (ToolsInteract as any).swipeHandler = async () => ({ success: true })
81
+
82
+ const res3 = await ToolsInteract.scrollToElementHandler({ platform: 'android', selector: { text: 'Missing' }, direction: 'down', maxScrolls: 5, scrollAmount: 0.7 })
83
+ console.log('Result:', res3.success === false && (res3 as any).attempts === 1 ? 'PASS' : 'FAIL')
84
+ console.log('Reason:', (res3 as any).reason || JSON.stringify(res3))
85
+
86
+ // Test 4: Offscreen element scrolls into view
87
+ console.log('\nTest 4: Offscreen element scrolls into view')
88
+ const ai = new (await import('../../../src/android/interact.js')).AndroidInteract()
89
+ const origObserveGet = ai['observe'].getUITree
90
+ const origAiSwipe = ai.swipe
91
+ let swiped = false
92
+ let swipeCalled = 0
93
+ ;(ai['observe'] as any).getUITree = async () => {
94
+ if (!swiped) {
95
+ return {
96
+ device: { platform: 'android', id: 'mock', osVersion: '12', model: 'Pixel', simulator: true },
97
+ screen: '',
98
+ resolution: { width: 1080, height: 1920 },
99
+ elements: [ { text: null, type: 'android.view.View', resourceId: null, contentDescription: null, bounds: [0,0,1080,200], visible: true } ]
100
+ }
101
+ }
102
+ return {
103
+ device: { platform: 'android', id: 'mock', osVersion: '12', model: 'Pixel', simulator: true },
104
+ screen: '',
105
+ resolution: { width: 1080, height: 1920 },
106
+ elements: [{ text: 'OffscreenTarget', type: 'android.widget.Button', contentDescription: null, clickable: true, enabled: true, visible: true, bounds: [100,400,300,460], resourceId: null }]
107
+ }
108
+ }
109
+ ;(ai as any).swipe = async () => { swipeCalled++; swiped = true; return { success: true } }
110
+
111
+ const r4 = await ai.scrollToElement({ text: 'OffscreenTarget' }, 'down', 3, 0.7, 'mock')
112
+ const ok4 = r4 && (r4 as any).success === true && (r4 as any).scrollsPerformed === 1 && swipeCalled === 1
113
+ console.log('Result:', ok4 ? 'PASS' : 'FAIL')
114
+ console.log(' success:', (r4 as any).success, 'scrollsPerformed:', (r4 as any).scrollsPerformed, 'swipeCalled:', swipeCalled)
115
+
116
+ ;(ai['observe'] as any).getUITree = origObserveGet
117
+ ;(ai as any).swipe = origAiSwipe
118
+
119
+ // Restore
120
+ (ToolsObserve as any).getUITreeHandler = origGet
121
+ ;(ToolsInteract as any).swipeHandler = origSwipe
122
+ }
123
+
124
+ // Ensure console.log is a function (some test runners replace it)
125
+ if (typeof console.log !== 'function') {
126
+ console.log = (...args: any[]) => { try { process.stdout.write(args.map(a => (typeof a === 'string' ? a : JSON.stringify(a))).join(' ') + '\n') } catch { /* swallow */ } }
127
+ }
128
+
129
+ runTests().catch(console.error)
@@ -95,7 +95,7 @@ async function runTests() {
95
95
  const elapsed4 = Date.now() - start4;
96
96
  console.log("Result:", result4.found === false && result4.error === "ADB Connection Failed" ? "PASS" : "FAIL");
97
97
  console.log("Error Message:", result4.error);
98
- console.log("Elapsed time (should be < 500ms):", elapsed4, elapsed4 < 500 ? "PASS" : "FAIL");
98
+ console.log("Elapsed time (should be < 1000ms):", elapsed4, elapsed4 < 1000 ? "PASS" : "FAIL");
99
99
 
100
100
  // Restore
101
101
  (AndroidObserve as any).prototype.getUITree = originalGetUITree;