mobile-debug-mcp 0.12.8 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mobile-debug-mcp",
3
- "version": "0.12.8",
3
+ "version": "0.14.0",
4
4
  "description": "MCP server for mobile app debugging (Android + iOS), with focus on security and reliability",
5
5
  "type": "module",
6
6
  "bin": {
@@ -1,6 +1,7 @@
1
1
  import { WaitForElementResponse, TapResponse, SwipeResponse, TypeTextResponse, PressBackResponse } from "../types.js"
2
2
  import { execAdb, getAndroidDeviceMetadata, getDeviceInfo } from "./utils.js"
3
3
  import { AndroidObserve } from "./observe.js"
4
+ import { scrollToElementShared } from "../tools/scroll_to_element.js"
4
5
 
5
6
 
6
7
  export class AndroidInteract {
@@ -88,4 +89,16 @@ export class AndroidInteract {
88
89
  }
89
90
  }
90
91
 
92
+ async scrollToElement(selector: { text?: string, resourceId?: string, contentDesc?: string, className?: string }, direction: 'down' | 'up' = 'down', maxScrolls = 10, scrollAmount = 0.7, deviceId?: string) {
93
+ return await scrollToElementShared({
94
+ selector,
95
+ direction,
96
+ maxScrolls,
97
+ scrollAmount,
98
+ deviceId,
99
+ fetchTree: async () => await this.observe.getUITree(deviceId),
100
+ swipe: async (x1: number, y1: number, x2: number, y2: number, duration: number, devId?: string) => await this.swipe(x1, y1, x2, y2, duration, devId)
101
+ })
102
+ }
103
+
91
104
  }
@@ -1,7 +1,8 @@
1
1
  import { spawn } from "child_process"
2
- import { WaitForElementResponse, TapResponse } from "../types.js"
2
+ import { WaitForElementResponse, TapResponse, SwipeResponse } from "../types.js"
3
3
  import { getIOSDeviceMetadata, getIdbCmd, isIDBInstalled } from "./utils.js"
4
4
  import { iOSObserve } from "./observe.js"
5
+ import { scrollToElementShared } from "../tools/scroll_to_element.js"
5
6
 
6
7
  export class iOSInteract {
7
8
  private observe = new iOSObserve();
@@ -75,4 +76,58 @@ export class iOSInteract {
75
76
  return { device, success: false, x, y, error: e instanceof Error ? e.message : String(e) };
76
77
  }
77
78
  }
79
+
80
+ async swipe(x1: number, y1: number, x2: number, y2: number, duration: number, deviceId: string = "booted"): Promise<SwipeResponse> {
81
+ const device = await getIOSDeviceMetadata(deviceId);
82
+ // Use shared helper to detect idb
83
+ const idbExists = await isIDBInstalled();
84
+
85
+ if (!idbExists) {
86
+ return {
87
+ device,
88
+ success: false,
89
+ start: [x1, y1],
90
+ end: [x2, y2],
91
+ duration,
92
+ error: "iOS swipe requires 'idb' (iOS Device Bridge)."
93
+ }
94
+ }
95
+
96
+ try {
97
+ const targetUdid = (device.id && device.id !== 'booted') ? device.id : undefined;
98
+ // idb 'ui swipe' does not accept a duration parameter; use coordinates only
99
+ const args: string[] = ['ui', 'swipe', x1.toString(), y1.toString(), x2.toString(), y2.toString()];
100
+ if (targetUdid) {
101
+ args.push('--udid', targetUdid);
102
+ }
103
+
104
+ await new Promise<void>((resolve, reject) => {
105
+ const proc = spawn(getIdbCmd(), args);
106
+ let stderr = '';
107
+ proc.stderr.on('data', d => stderr += d.toString());
108
+ proc.on('close', code => {
109
+ if (code === 0) resolve();
110
+ else reject(new Error(`idb ui swipe failed: ${stderr}`));
111
+ });
112
+ proc.on('error', err => reject(err));
113
+ });
114
+
115
+ return { device, success: true, start: [x1, y1], end: [x2, y2], duration };
116
+ } catch (e) {
117
+ return { device, success: false, start: [x1, y1], end: [x2, y2], duration, error: e instanceof Error ? e.message : String(e) };
118
+ }
119
+ }
120
+
121
+ async scrollToElement(selector: { text?: string, resourceId?: string, contentDesc?: string, className?: string }, direction: 'down' | 'up' = 'down', maxScrolls = 10, scrollAmount = 0.7, deviceId: string = 'booted') {
122
+ return await scrollToElementShared({
123
+ selector,
124
+ direction,
125
+ maxScrolls,
126
+ scrollAmount,
127
+ deviceId,
128
+ fetchTree: async () => await this.observe.getUITree(deviceId),
129
+ swipe: async (x1: number, y1: number, x2: number, y2: number, duration: number, devId?: string) => await this.swipe(x1, y1, x2, y2, duration, devId)
130
+ })
131
+ }
78
132
  }
133
+
package/src/ios/manage.ts CHANGED
@@ -298,102 +298,15 @@ export class iOSManage {
298
298
 
299
299
  async startApp(bundleId: string, deviceId: string = "booted"): Promise<StartAppResponse> {
300
300
  validateBundleId(bundleId)
301
- // Prepare instrumentation object upfront so it can be returned to callers
302
- const instrumentation = { ts: new Date().toISOString(), action: 'startApp', cmd: 'xcrun', args: ['simctl','launch', deviceId, bundleId], cwd: process.cwd(), env: { PATH: process.env.PATH, XCRUN_PATH: process.env.XCRUN_PATH } }
303
-
304
- try {
305
- // Instrumentation: persist and emit to stderr for server logs
306
- try { await fs.appendFile('/tmp/mcp_startapp_instrument.log', JSON.stringify(instrumentation) + '\n') } catch (e) {}
307
- try { console.error('MCP-STARTAPP-EXEC', JSON.stringify(instrumentation)) } catch (e) {}
308
- } catch {}
309
-
310
- // Attempt to launch
311
- let launchResult: any = null
312
301
  try {
313
- launchResult = await execCommand(['simctl', 'launch', deviceId, bundleId], deviceId)
314
- } catch (launchErr:any) {
315
- // Collect diagnostics when simctl launch fails
316
- const launchDiag = execCommandWithDiagnostics(['simctl', 'launch', deviceId, bundleId], deviceId)
302
+ const result = await execCommand(['simctl', 'launch', deviceId, bundleId], deviceId)
317
303
  const device = await getIOSDeviceMetadata(deviceId)
318
- const post = await this.collectPostLaunchDiagnostics(bundleId, deviceId)
319
- return { device, appStarted: false, launchTimeMs: 0, error: launchErr instanceof Error ? launchErr.message : String(launchErr), diagnostics: { launchDiag, post }, instrumentation } as any
320
- }
321
-
322
- // Basic success but verify RunningBoard/installcoordination didn't mark it as placeholder
323
- const device = await getIOSDeviceMetadata(deviceId)
324
- // short wait to let system settle
325
- await new Promise(r => setTimeout(r, 1000))
326
-
327
- let appinfo = ''
328
- try {
329
- const ai = await execCommand(['simctl', 'appinfo', deviceId, bundleId], deviceId)
330
- appinfo = ai.output || ''
331
- } catch {}
332
-
333
- // capture recent runningboard/installcoordination logs
334
- const logDiag = execCommandWithDiagnostics(['simctl','spawn',deviceId,'log','show','--style','syslog','--predicate',`(process == "${bundleId}" ) OR eventMessage CONTAINS "installcoordinationd" OR eventMessage CONTAINS "runningboard"`, '--last', '1m'], deviceId)
335
-
336
- const placeholderDetected = (appinfo && /isPlaceholder[:=]?\s*Y/i.test(appinfo)) || (logDiag && ((logDiag.runResult && ((logDiag.runResult.stdout || '').includes('isPlaceholder')) || (logDiag.runResult.stderr || '').includes('isPlaceholder'))))
337
-
338
- if (placeholderDetected) {
339
- const post = await this.collectPostLaunchDiagnostics(bundleId, deviceId, appinfo)
340
- return { device, appStarted: false, launchTimeMs: 0, diagnostics: { appinfo, logDiag, post }, instrumentation } as any
341
- }
342
-
343
- return { device, appStarted: !!(launchResult && launchResult.output), launchTimeMs: 1000, instrumentation }
344
- }
345
-
346
- appExecutableName(bundleId: string) {
347
- // Best-effort executable name: prefer last component of bundleId
348
- try { const candidate = bundleId.split('.').pop(); return candidate || bundleId }
349
- catch { return bundleId }
350
- }
351
-
352
- // Collect bundle- and system-level diagnostics after a failed or placeholder launch
353
- async collectPostLaunchDiagnostics(bundleId: string, deviceId: string = "booted", appinfo?: string) {
354
- const diagnostics: any = { ts: new Date().toISOString(), bundleId, deviceId }
355
-
356
- // gather simctl appinfo (if not provided)
357
- try { diagnostics.appinfo = appinfo || ((await execCommand(['simctl','appinfo', deviceId, bundleId], deviceId)).output || '') } catch (e) { diagnostics.appinfoError = String(e) }
358
-
359
- // attempt to discover bundle path from appinfo
360
- let bundlePath: string | null = null
361
- if (diagnostics.appinfo) {
362
- const m = diagnostics.appinfo.match(/Path\s*=\s*"?([\S]+)"?/) || diagnostics.appinfo.match(/Container: (\/\S+)/)
363
- if (m) bundlePath = m[1]
364
- }
365
-
366
- // lipo / file / otool / codesign / xattr
367
- if (bundlePath) {
368
- diagnostics.bundlePath = bundlePath
369
- const execs = [
370
- { name: 'file', cmd: ['file', bundlePath + '/' + this.appExecutableName(bundleId)] },
371
- { name: 'lipo', cmd: ['lipo', '-info', bundlePath + '/' + this.appExecutableName(bundleId)] },
372
- { name: 'otool-L', cmd: ['otool', '-L', bundlePath + '/' + this.appExecutableName(bundleId)] },
373
- { name: 'otool-load', cmd: ['otool', '-l', bundlePath + '/' + this.appExecutableName(bundleId)] },
374
- { name: 'plutil', cmd: ['plutil', '-p', bundlePath + '/Info.plist'] },
375
- { name: 'codesign', cmd: ['codesign', '-dvvv', bundlePath] },
376
- { name: 'xattr', cmd: ['xattr', '-l', bundlePath] },
377
- { name: 'ls', cmd: ['ls', '-la', bundlePath] },
378
- ]
379
-
380
- diagnostics.bundle = {}
381
- for (const e of execs) {
382
- try {
383
- const r = execCommandWithDiagnostics(e.cmd, deviceId)
384
- diagnostics.bundle[e.name] = r && r.runResult ? { stdout: r.runResult.stdout, stderr: r.runResult.stderr, code: r.runResult.exitCode } : { error: 'no-result' }
385
- } catch (err) { diagnostics.bundle[e.name] = { error: String(err) } }
386
- }
304
+ return { device, appStarted: !!result.output, launchTimeMs: 1000 }
305
+ } catch (e:any) {
306
+ const diag = execCommandWithDiagnostics(['simctl', 'launch', deviceId, bundleId], deviceId)
307
+ const device = await getIOSDeviceMetadata(deviceId)
308
+ return { device, appStarted: false, launchTimeMs: 0, error: e instanceof Error ? e.message : String(e), diagnostics: diag } as any
387
309
  }
388
-
389
- // collect recent system logs and a screenshot
390
- try { diagnostics.recentLogs = execCommandWithDiagnostics(['simctl','spawn',deviceId,'log','show','--style','syslog','--predicate',`eventMessage CONTAINS "installcoordinationd" OR eventMessage CONTAINS "runningboard"`, '--last', '5m'], deviceId) } catch (e) { diagnostics.recentLogsError = String(e) }
391
- try {
392
- const shot = await execCommandWithDiagnostics(['simctl','io', deviceId, 'screenshot', '--type', 'png', '/tmp/mcp_post_launch_screenshot.png'], deviceId)
393
- diagnostics.screenshot = { created: true, path: '/tmp/mcp_post_launch_screenshot.png', result: shot && shot.runResult }
394
- } catch (e) { diagnostics.screenshotError = String(e) }
395
-
396
- return diagnostics
397
310
  }
398
311
 
399
312
  async terminateApp(bundleId: string, deviceId: string = "booted"): Promise<TerminateAppResponse> {
package/src/ios/utils.ts CHANGED
@@ -88,41 +88,7 @@ export function validateBundleId(bundleId: string) {
88
88
 
89
89
  export function execCommand(args: string[], deviceId: string = "booted"): Promise<IOSResult> {
90
90
  return new Promise((resolve, reject) => {
91
- // Instrumentation: append a JSON line with timestamp, command, args, cwd and selected env vars
92
- try {
93
- const mcpEnv: Record<string,string|undefined> = {}
94
- for (const k of Object.keys(process.env || {})) {
95
- if (k.startsWith('MCP_')) mcpEnv[k] = process.env[k]
96
- }
97
-
98
- const instrument = {
99
- timestamp: new Date().toISOString(),
100
- command: getXcrunCmd(),
101
- args,
102
- cwd: process.cwd(),
103
- env: {
104
- PATH: process.env.PATH,
105
- XCRUN_PATH: process.env.XCRUN_PATH,
106
- ...mcpEnv
107
- }
108
- }
109
-
110
- try {
111
- require('fs').appendFileSync('/tmp/mcp_exec_instrument.log', JSON.stringify(instrument) + '\n')
112
- } catch (e) {}
113
-
114
- } catch (e) {
115
- // swallow instrumentation errors to avoid changing behavior
116
- }
117
-
118
91
  // Use spawn for better stream control and consistency with Android implementation
119
- // Instrument: emit a JSON line to stderr so the MCP server stderr/stdout capture can record the exact command and env
120
- try {
121
- const instLine = JSON.stringify({ ts: new Date().toISOString(), cmd: getXcrunCmd(), args, cwd: process.cwd(), PATH: process.env.PATH })
122
- // Use stderr so it appears in server logs reliably
123
- console.error('MCP-INSTRUMENT-EXEC', instLine)
124
- } catch (e) {}
125
-
126
92
  const child = spawn(getXcrunCmd(), args)
127
93
 
128
94
  let stdout = ''
@@ -140,14 +106,6 @@ export function execCommand(args: string[], deviceId: string = "booted"): Promis
140
106
  })
141
107
  }
142
108
 
143
- // Additional instrumentation: write pid and env snapshot when child starts
144
- try {
145
- const pidInfo = { ts: new Date().toISOString(), childPid: (child.pid || null), invoked: getXcrunCmd(), args }
146
- try { require('fs').appendFileSync('/tmp/mcp_exec_instrument.log', JSON.stringify(pidInfo) + '\n') } catch (e) {}
147
- } catch (e) {
148
- // ignore
149
- }
150
-
151
109
  const DEFAULT_XCRUN_LOG_TIMEOUT = parseInt(process.env.MCP_XCRUN_LOG_TIMEOUT || '', 10) || 30000 // env (ms) or default 30s
152
110
  const DEFAULT_XCRUN_CMD_TIMEOUT = parseInt(process.env.MCP_XCRUN_TIMEOUT || '', 10) || 60000 // env (ms) or default 60s
153
111
  const timeoutMs = args.includes('log') ? DEFAULT_XCRUN_LOG_TIMEOUT : DEFAULT_XCRUN_CMD_TIMEOUT // choose appropriate timeout
@@ -173,11 +131,6 @@ export function execCommand(args: string[], deviceId: string = "booted"): Promis
173
131
  }
174
132
 
175
133
  export function execCommandWithDiagnostics(args: string[], deviceId: string = "booted") {
176
- try {
177
- const syncInst = { ts: new Date().toISOString(), cmd: getXcrunCmd(), args, cwd: process.cwd() }
178
- require('fs').appendFileSync('/tmp/mcp_exec_instrument_sync.log', JSON.stringify(syncInst) + '\n')
179
- } catch (e) {}
180
-
181
134
  // Run synchronously to capture stdout/stderr and exitCode reliably for diagnostics
182
135
  const DEFAULT_XCRUN_LOG_TIMEOUT = parseInt(process.env.MCP_XCRUN_LOG_TIMEOUT || '', 10) || 30000
183
136
  const DEFAULT_XCRUN_CMD_TIMEOUT = parseInt(process.env.MCP_XCRUN_TIMEOUT || '', 10) || 60000
package/src/server.ts CHANGED
@@ -346,8 +346,8 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
346
346
  properties: {
347
347
  platform: {
348
348
  type: "string",
349
- enum: ["android"],
350
- description: "Platform to swipe on (currently only android supported)"
349
+ enum: ["android","ios"],
350
+ description: "Platform to swipe on (android or ios)"
351
351
  },
352
352
  x1: { type: "number", description: "Start X coordinate" },
353
353
  y1: { type: "number", description: "Start Y coordinate" },
@@ -362,6 +362,30 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
362
362
  required: ["x1", "y1", "x2", "y2", "duration"]
363
363
  }
364
364
  },
365
+ {
366
+ name: "scroll_to_element",
367
+ description: "Scroll the current screen until a target UI element becomes visible, then return its details.",
368
+ inputSchema: {
369
+ type: "object",
370
+ properties: {
371
+ platform: { type: "string", enum: ["android", "ios"], description: "Platform to operate on (required)" },
372
+ selector: {
373
+ type: "object",
374
+ properties: {
375
+ text: { type: "string" },
376
+ resourceId: { type: "string" },
377
+ contentDesc: { type: "string" },
378
+ className: { type: "string" }
379
+ }
380
+ },
381
+ direction: { type: "string", enum: ["down", "up"], default: "down" },
382
+ maxScrolls: { type: "number", default: 10 },
383
+ scrollAmount: { type: "number", default: 0.7 },
384
+ deviceId: { type: "string", description: "Device UDID (iOS) or Serial (Android). Defaults to booted/connected." }
385
+ },
386
+ required: ["platform", "selector"]
387
+ }
388
+ },
365
389
  {
366
390
  name: "type_text",
367
391
  description: "Type text into the currently focused input field on an Android device.",
@@ -412,31 +436,54 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
412
436
  try {
413
437
  if (name === "start_app") {
414
438
  const { platform, appId, deviceId } = args as any
415
- // Defensive validation: ensure required args are present and log malformed requests
439
+ // Defensive validation: ensure caller provided platform and appId.
416
440
  if (!platform || !appId) {
417
- try { require('fs').appendFileSync('/tmp/mcp_bad_requests.log', JSON.stringify({ ts: new Date().toISOString(), tool: 'start_app', args }) + '\n') } catch (e) {}
418
- const deviceFallback: any = { platform: platform || 'ios', id: deviceId || 'unknown', osVersion: '', model: '', simulator: true }
419
- const response: StartAppResponse = { device: deviceFallback, appStarted: false, launchTimeMs: 0, error: 'Missing required argument: platform and/or appId', diagnostics: { receivedArgs: args } }
420
- return wrapResponse(response)
421
- }
441
+ const msg = 'Both platform and appId parameters are required (platform: ios|android, appId: bundle id or package name).'
442
+ const payload = { ts: new Date().toISOString(), tool: 'start_app', args }
443
+ let logged = false
444
+
445
+ // Prefer the diagnostics module when available
446
+ try {
447
+ const diag = require('./utils/diagnostics.js')
448
+ if (diag && diag.appendDiagnosticFile) {
449
+ diag.appendDiagnosticFile('bad_requests.log', payload)
450
+ logged = true
451
+ }
452
+ } catch (err) {
453
+ console.error('Diagnostics append failed:', String(err))
454
+ }
422
455
 
423
- try {
424
- const res = await (platform === 'android' ? new AndroidManage().startApp(appId, deviceId) : new iOSManage().startApp(appId, deviceId))
425
- // Preserve diagnostics and instrumentation from platform managers so agents receive full context
426
- const response: StartAppResponse = {
427
- device: res.device,
428
- appStarted: res.appStarted,
429
- launchTimeMs: res.launchTimeMs,
430
- error: (res as any).error,
431
- diagnostics: (res as any).diagnostics,
432
- instrumentation: (res as any).instrumentation
456
+ // Fallback to /tmp file (synchronous) and report failures rather than swallowing
457
+ if (!logged) {
458
+ try {
459
+ const fs = require('fs')
460
+ fs.appendFileSync('/tmp/mcp_bad_requests.log', JSON.stringify(payload) + '\n')
461
+ logged = true
462
+ } catch (err) {
463
+ console.error('Failed to write bad request to /tmp/mcp_bad_requests.log:', String(err))
464
+ }
433
465
  }
434
- return wrapResponse(response)
435
- } catch (err:any) {
436
- try { require('fs').appendFileSync('/tmp/mcp_bad_requests.log', JSON.stringify({ ts: new Date().toISOString(), tool: 'start_app', args, error: err && err.message ? err.message : String(err) }) + '\n') } catch (e) {}
437
- const deviceFallback: any = { platform: platform || 'ios', id: deviceId || 'unknown', osVersion: '', model: '', simulator: true }
438
- return wrapResponse({ device: deviceFallback, appStarted: false, launchTimeMs: 0, error: err instanceof Error ? err.message : String(err), diagnostics: { receivedArgs: args } })
466
+
467
+ // Final fallback: emit payload to stderr so it's visible in server logs
468
+ if (!logged) {
469
+ try {
470
+ console.error('Bad request (start_app) payload:', JSON.stringify(payload))
471
+ } catch (err) {
472
+ // Last resort: still log the failure
473
+ console.error('Failed to emit bad request payload to stderr:', String(err))
474
+ }
475
+ }
476
+
477
+ return wrapResponse({ error: msg })
439
478
  }
479
+
480
+ const res = await (platform === 'android' ? new AndroidManage().startApp(appId, deviceId) : new iOSManage().startApp(appId, deviceId))
481
+ const response: StartAppResponse = {
482
+ device: res.device,
483
+ appStarted: res.appStarted,
484
+ launchTimeMs: res.launchTimeMs
485
+ }
486
+ return wrapResponse(response)
440
487
  }
441
488
 
442
489
  if (name === "terminate_app") {
@@ -545,8 +592,14 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
545
592
  }
546
593
 
547
594
  if (name === "swipe") {
548
- const { x1, y1, x2, y2, duration, deviceId } = (args || {}) as any
549
- const res = await ToolsInteract.swipeHandler({ x1, y1, x2, y2, duration, deviceId })
595
+ const { platform = 'android', x1, y1, x2, y2, duration, deviceId } = (args || {}) as any
596
+ const res = await ToolsInteract.swipeHandler({ platform, x1, y1, x2, y2, duration, deviceId })
597
+ return wrapResponse(res)
598
+ }
599
+
600
+ if (name === "scroll_to_element") {
601
+ const { platform, selector, direction, maxScrolls, scrollAmount, deviceId } = (args || {}) as any
602
+ const res = await ToolsInteract.scrollToElementHandler({ platform, selector, direction, maxScrolls, scrollAmount, deviceId })
550
603
  return wrapResponse(res)
551
604
  }
552
605
 
@@ -4,31 +4,27 @@ import { iOSInteract } from '../ios/interact.js'
4
4
 
5
5
  export class ToolsInteract {
6
6
 
7
+ private static async getInteractionService(platform?: 'android' | 'ios', deviceId?: string) {
8
+ const effectivePlatform = platform || 'android'
9
+ const resolved = await resolveTargetDevice({ platform: effectivePlatform as 'android' | 'ios', deviceId })
10
+ const interact = effectivePlatform === 'android' ? new AndroidInteract() : new iOSInteract()
11
+ return { interact: interact as any, resolved, platform: effectivePlatform }
12
+ }
13
+
7
14
  static async waitForElementHandler({ platform, text, timeout, deviceId }: { platform: 'android' | 'ios', text: string, timeout?: number, deviceId?: string }) {
8
15
  const effectiveTimeout = timeout ?? 10000
9
- if (platform === 'android') {
10
- const resolved = await resolveTargetDevice({ platform: 'android', deviceId })
11
- return await new AndroidInteract().waitForElement(text, effectiveTimeout, resolved.id)
12
- } else {
13
- const resolved = await resolveTargetDevice({ platform: 'ios', deviceId })
14
- return await new iOSInteract().waitForElement(text, effectiveTimeout, resolved.id)
15
- }
16
+ const { interact, resolved } = await ToolsInteract.getInteractionService(platform, deviceId)
17
+ return await interact.waitForElement(text, effectiveTimeout, resolved.id)
16
18
  }
17
19
 
18
20
  static async tapHandler({ platform, x, y, deviceId }: { platform?: 'android' | 'ios', x: number, y: number, deviceId?: string }) {
19
- const effectivePlatform = platform || 'android'
20
- if (effectivePlatform === 'android') {
21
- const resolved = await resolveTargetDevice({ platform: 'android', deviceId })
22
- return await new AndroidInteract().tap(x, y, resolved.id)
23
- } else {
24
- const resolved = await resolveTargetDevice({ platform: 'ios', deviceId })
25
- return await new iOSInteract().tap(x, y, resolved.id)
26
- }
21
+ const { interact, resolved } = await ToolsInteract.getInteractionService(platform, deviceId)
22
+ return await interact.tap(x, y, resolved.id)
27
23
  }
28
24
 
29
- static async swipeHandler({ x1, y1, x2, y2, duration, deviceId }: { x1: number, y1: number, x2: number, y2: number, duration: number, deviceId?: string }) {
30
- const resolved = await resolveTargetDevice({ platform: 'android', deviceId })
31
- return await new AndroidInteract().swipe(x1, y1, x2, y2, duration, resolved.id)
25
+ static async swipeHandler({ platform = 'android', x1, y1, x2, y2, duration, deviceId }: { platform?: 'android' | 'ios', x1: number, y1: number, x2: number, y2: number, duration: number, deviceId?: string }) {
26
+ const { interact, resolved } = await ToolsInteract.getInteractionService(platform, deviceId)
27
+ return await interact.swipe(x1, y1, x2, y2, duration, resolved.id)
32
28
  }
33
29
 
34
30
  static async typeTextHandler({ text, deviceId }: { text: string, deviceId?: string }) {
@@ -41,5 +37,10 @@ export class ToolsInteract {
41
37
  return await new AndroidInteract().pressBack(resolved.id)
42
38
  }
43
39
 
40
+ static async scrollToElementHandler({ platform, selector, direction = 'down', maxScrolls = 10, scrollAmount = 0.7, deviceId }: { platform: 'android' | 'ios', selector: { text?: string, resourceId?: string, contentDesc?: string, className?: string }, direction?: 'down' | 'up', maxScrolls?: number, scrollAmount?: number, deviceId?: string }) {
41
+ const { interact, resolved } = await ToolsInteract.getInteractionService(platform, deviceId)
42
+ return await interact.scrollToElement(selector, direction, maxScrolls, scrollAmount, resolved.id)
43
+ }
44
+
44
45
  }
45
46
 
@@ -0,0 +1,110 @@
1
+ import { UIElement, GetUITreeResponse, SwipeResponse } from '../types.js'
2
+
3
+ export interface ScrollSelector { text?: string; resourceId?: string; contentDesc?: string; className?: string }
4
+
5
+ export async function scrollToElementShared(opts: {
6
+ selector: ScrollSelector,
7
+ direction?: 'down' | 'up',
8
+ maxScrolls?: number,
9
+ scrollAmount?: number,
10
+ deviceId?: string,
11
+ fetchTree: () => Promise<GetUITreeResponse>,
12
+ swipe: (x1: number, y1: number, x2: number, y2: number, duration: number, deviceId?: string) => Promise<SwipeResponse>,
13
+ stabilizationDelayMs?: number
14
+ }): Promise<{ success: boolean; reason?: string; element?: Partial<UIElement>; scrollsPerformed: number }> {
15
+ const { selector, direction = 'down', maxScrolls = 10, scrollAmount = 0.7, deviceId, fetchTree, swipe, stabilizationDelayMs = 350 } = opts
16
+
17
+ const matchElement = (el?: UIElement) => {
18
+ if (!el) return false
19
+ if (selector.text !== undefined && selector.text !== el.text) return false
20
+ if (selector.resourceId !== undefined && selector.resourceId !== el.resourceId) return false
21
+ if (selector.contentDesc !== undefined && selector.contentDesc !== el.contentDescription) return false
22
+ if (selector.className !== undefined && selector.className !== el.type) return false
23
+ return true
24
+ }
25
+
26
+ const isVisible = (el?: UIElement, resolution?: GetUITreeResponse['resolution']) => {
27
+ if (!el) return false
28
+ if (el.visible === false) return false
29
+ if (!el.bounds || !resolution || !resolution.width || !resolution.height) return (el.visible === undefined ? true : !!el.visible)
30
+ const [left, top, right, bottom] = el.bounds
31
+ const withinY = bottom > 0 && top < resolution.height
32
+ const withinX = right > 0 && left < resolution.width
33
+ return withinX && withinY
34
+ }
35
+
36
+ const findVisibleMatch = (elements?: UIElement[], resolution?: GetUITreeResponse['resolution']) => {
37
+ if (!Array.isArray(elements)) return null
38
+ for (const e of elements) {
39
+ if (matchElement(e) && isVisible(e, resolution)) return e
40
+ }
41
+ return null
42
+ }
43
+
44
+ // Initial check
45
+ let tree = await fetchTree()
46
+ if (tree.error) return { success: false, reason: tree.error, scrollsPerformed: 0 }
47
+
48
+ let found = findVisibleMatch(tree.elements, tree.resolution)
49
+ if (found) {
50
+ return { success: true, element: { text: found.text, resourceId: found.resourceId, bounds: found.bounds }, scrollsPerformed: 0 }
51
+ }
52
+
53
+ const fingerprintOf = (t: GetUITreeResponse) => {
54
+ try {
55
+ return JSON.stringify((t.elements || []).map((e: UIElement) => ({ text: e.text, resourceId: e.resourceId, bounds: e.bounds })))
56
+ } catch {
57
+ return ''
58
+ }
59
+ }
60
+
61
+ let prevFingerprint = fingerprintOf(tree)
62
+
63
+ const width = (tree.resolution && tree.resolution.width) ? tree.resolution.width : 0
64
+ const height = (tree.resolution && tree.resolution.height) ? tree.resolution.height : 0
65
+ const centerX = Math.round(width / 2) || 50
66
+
67
+ const clampPct = (v: number) => Math.max(0.05, Math.min(0.95, v))
68
+ const computeCoords = () => {
69
+ const defaultStart = direction === 'down' ? 0.8 : 0.2
70
+ const startPct = clampPct(defaultStart)
71
+ const endPct = clampPct(defaultStart + (direction === 'down' ? -scrollAmount : scrollAmount))
72
+ const x1 = centerX
73
+ const x2 = centerX
74
+ const y1 = Math.round((height || 100) * startPct)
75
+ const y2 = Math.round((height || 100) * endPct)
76
+ return { x1, y1, x2, y2 }
77
+ }
78
+
79
+ const duration = 300
80
+ let scrollsPerformed = 0
81
+
82
+ for (let i = 0; i < maxScrolls; i++) {
83
+ const { x1, y1, x2, y2 } = computeCoords()
84
+ try {
85
+ await swipe(x1, y1, x2, y2, duration, deviceId)
86
+ } catch (e) {
87
+ // Log swipe failures to aid debugging but don't fail the overall flow
88
+ try { console.warn(`scrollToElement swipe failed: ${e instanceof Error ? e.message : String(e)}`) } catch {}
89
+ }
90
+
91
+ scrollsPerformed++
92
+ await new Promise(resolve => setTimeout(resolve, stabilizationDelayMs))
93
+
94
+ tree = await fetchTree()
95
+ if (tree.error) return { success: false, reason: tree.error, scrollsPerformed: scrollsPerformed }
96
+
97
+ found = findVisibleMatch(tree.elements, tree.resolution)
98
+ if (found) {
99
+ return { success: true, element: { text: found.text, resourceId: found.resourceId, bounds: found.bounds }, scrollsPerformed }
100
+ }
101
+
102
+ const fp = fingerprintOf(tree)
103
+ if (fp === prevFingerprint) {
104
+ return { success: false, reason: 'UI unchanged after scroll; likely end of list', scrollsPerformed: scrollsPerformed }
105
+ }
106
+ prevFingerprint = fp
107
+ }
108
+
109
+ return { success: false, reason: 'Element not found after scrolling', scrollsPerformed: scrollsPerformed }
110
+ }
package/src/types.ts CHANGED
@@ -12,7 +12,6 @@ export interface StartAppResponse {
12
12
  launchTimeMs: number;
13
13
  error?: string;
14
14
  diagnostics?: any;
15
- instrumentation?: any;
16
15
  }
17
16
 
18
17
  export interface TerminateAppResponse {
@@ -0,0 +1,24 @@
1
+ #!/usr/bin/env node
2
+ import { AndroidInteract } from '../../../dist/android/interact.js'
3
+
4
+
5
+ // Usage: tsx test/device/observe/run-scroll-test-android.ts <deviceId> <appId> <selectorText>
6
+ const args = process.argv.slice(2)
7
+ const DEVICE_ID = args[0] || process.env.DEVICE_ID || 'emulator-5554'
8
+ const SELECTOR = args[2] || process.env.SELECTOR || 'Generate Session'
9
+
10
+ async function main() {
11
+ console.log('Starting app if not running...')
12
+ // Best-effort tap to wake device/emulator
13
+ try { const tmp = new AndroidInteract(); await tmp.tap(10,10, DEVICE_ID).catch(()=>{}) } catch {}
14
+ await new Promise(r => setTimeout(r, 1000))
15
+
16
+ console.log('Running scroll_to_element for selector:', SELECTOR)
17
+ // Use ToolsInteract from dist to call the handler
18
+ const ToolsInteract = (await import('../../../dist/tools/interact.js')).ToolsInteract
19
+
20
+ const res = await (ToolsInteract as any).scrollToElementHandler({ platform: 'android', selector: { text: SELECTOR }, direction: 'down', maxScrolls: 10, scrollAmount: 0.7, deviceId: DEVICE_ID })
21
+ console.log('Result:', JSON.stringify(res, null, 2))
22
+ }
23
+
24
+ main().catch(console.error)